@@ -309,10 +309,6 @@ public class OrderedBytes {
309309
310310 public static final Charset UTF8 = Charset .forName ("UTF-8" );
311311 private static final byte TERM = 0x00 ;
312- private static final BigDecimal E8 = BigDecimal .valueOf (1e8 );
313- private static final BigDecimal E32 = BigDecimal .valueOf (1e32 );
314- private static final BigDecimal EN2 = BigDecimal .valueOf (1e-2 );
315- private static final BigDecimal EN10 = BigDecimal .valueOf (1e-10 );
316312
317313 /**
318314 * Max precision guaranteed to fit into a {@code long}.
@@ -637,7 +633,7 @@ private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
637633 byte [] a = dst .getBytes ();
638634 boolean isNeg = val .signum () == -1 ;
639635 final int offset = dst .getOffset (), start = dst .getPosition ();
640- int e = 0 , d , startM ;
636+ int e = 0 , startM ;
641637
642638 if (isNeg ) { /* Small negative number: 0x14, -E, ~M */
643639 dst .put (NEG_SMALL );
@@ -646,21 +642,17 @@ private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
646642 }
647643
648644 // normalize abs(val) to determine E
649- while (abs .compareTo (EN10 ) < 0 ) { abs = abs .movePointRight (8 ); e += 4 ; }
650- while (abs .compareTo (EN2 ) < 0 ) { abs = abs .movePointRight (2 ); e ++; }
645+ int zerosBeforeFirstNonZero = abs .scale () - abs .precision ();
646+ int lengthToMoveRight = zerosBeforeFirstNonZero % 2 ==
647+ 0 ? zerosBeforeFirstNonZero : zerosBeforeFirstNonZero - 1 ;
648+ e = lengthToMoveRight / 2 ;
649+ abs = abs .movePointRight (lengthToMoveRight );
651650
652651 putVaruint64 (dst , e , !isNeg ); // encode appropriate E value.
653652
654653 // encode M by peeling off centimal digits, encoding x as 2x+1
655654 startM = dst .getPosition ();
656- // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
657- // numeric scale.
658- for (int i = 0 ; i < 18 && abs .compareTo (BigDecimal .ZERO ) != 0 ; i ++) {
659- abs = abs .movePointRight (2 );
660- d = abs .intValue ();
661- dst .put ((byte ) ((2 * d + 1 ) & 0xff ));
662- abs = abs .subtract (BigDecimal .valueOf (d ));
663- }
655+ encodeToCentimal (dst , abs );
664656 // terminal digit should be 2x
665657 a [offset + dst .getPosition () - 1 ] = (byte ) (a [offset + dst .getPosition () - 1 ] & 0xfe );
666658 if (isNeg ) {
@@ -712,7 +704,7 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
712704 byte [] a = dst .getBytes ();
713705 boolean isNeg = val .signum () == -1 ;
714706 final int start = dst .getPosition (), offset = dst .getOffset ();
715- int e = 0 , d , startM ;
707+ int e = 0 , startM ;
716708
717709 if (isNeg ) { /* Large negative number: 0x08, ~E, ~M */
718710 dst .put (NEG_LARGE );
@@ -721,9 +713,10 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
721713 }
722714
723715 // normalize abs(val) to determine E
724- while (abs .compareTo (E32 ) >= 0 && e <= 350 ) { abs = abs .movePointLeft (32 ); e +=16 ; }
725- while (abs .compareTo (E8 ) >= 0 && e <= 350 ) { abs = abs .movePointLeft (8 ); e += 4 ; }
726- while (abs .compareTo (BigDecimal .ONE ) >= 0 && e <= 350 ) { abs = abs .movePointLeft (2 ); e ++; }
716+ int integerDigits = abs .precision () - abs .scale ();
717+ int lengthToMoveLeft = integerDigits % 2 == 0 ? integerDigits : integerDigits + 1 ;
718+ e = lengthToMoveLeft / 2 ;
719+ abs = abs .movePointLeft (lengthToMoveLeft );
727720
728721 // encode appropriate header byte and/or E value.
729722 if (e > 10 ) { /* large number, write out {~,}E */
@@ -738,14 +731,7 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
738731
739732 // encode M by peeling off centimal digits, encoding x as 2x+1
740733 startM = dst .getPosition ();
741- // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
742- // numeric scale.
743- for (int i = 0 ; i < 18 && abs .compareTo (BigDecimal .ZERO ) != 0 ; i ++) {
744- abs = abs .movePointRight (2 );
745- d = abs .intValue ();
746- dst .put ((byte ) (2 * d + 1 ));
747- abs = abs .subtract (BigDecimal .valueOf (d ));
748- }
734+ encodeToCentimal (dst , abs );
749735 // terminal digit should be 2x
750736 a [offset + dst .getPosition () - 1 ] = (byte ) (a [offset + dst .getPosition () - 1 ] & 0xfe );
751737 if (isNeg ) {
@@ -755,6 +741,32 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
755741 return dst .getPosition () - start ;
756742 }
757743
744+ /**
745+ * Encode a value val in [0.01, 1.0) into Centimals.
746+ * Util function for {@link this.encodeNumericLarge()} and {@link this.encodeNumericSmall()}
747+ * @param dst The destination to which encoded digits are written.
748+ * @param val A BigDecimal after the normalization. The value must be in [0.01, 1.0).
749+ */
750+ private static void encodeToCentimal (PositionedByteRange dst , BigDecimal val ) {
751+ // The input value val must be in [0.01, 1.0)
752+ String stringOfAbs = val .stripTrailingZeros ().toPlainString ();
753+ String value = stringOfAbs .substring (stringOfAbs .indexOf ('.' ) + 1 );
754+ int d ;
755+
756+ // If the first float digit is 0, we will encode one digit more than MAX_PRECISION
757+ // We encode at most MAX_PRECISION significant digits into centimals,
758+ // because the input value, has been already normalized.
759+ int maxPrecision = value .charAt (0 ) == '0' ? MAX_PRECISION + 1 : MAX_PRECISION ;
760+ maxPrecision = Math .min (maxPrecision , value .length ());
761+ for (int i = 0 ; i < maxPrecision ; i += 2 ) {
762+ d = (value .charAt (i ) - '0' ) * 10 ;
763+ if (i + 1 < maxPrecision ) {
764+ d += (value .charAt (i + 1 ) - '0' );
765+ }
766+ dst .put ((byte ) (2 * d + 1 ));
767+ }
768+ }
769+
758770 /**
759771 * Encode a numerical value using the variable-length encoding.
760772 * @param dst The destination to which encoded digits are written.
@@ -795,6 +807,8 @@ public static int encodeNumeric(PositionedByteRange dst, double val, Order ord)
795807
796808 /**
797809 * Encode a numerical value using the variable-length encoding.
810+ * If the number of significant digits of the value exceeds the
811+ * {@link OrderedBytes#MAX_PRECISION}, the exceeding part will be lost.
798812 * @param dst The destination to which encoded digits are written.
799813 * @param val The value to encode.
800814 * @param ord The {@link Order} to respect while encoding {@code val}.
0 commit comments