Skip to content

Commit 93a68f9

Browse files
YutSeanApache9
authored andcommitted
HBASE-26566 Optimize encodeNumeric in OrderedBytes (#3940)
Signed-off-by: Reid Chan <reidchan@apache.org>
1 parent afabefb commit 93a68f9

2 files changed

Lines changed: 53 additions & 30 deletions

File tree

hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,6 @@ public class OrderedBytes {
309309

310310
public static final Charset UTF8 = Charset.forName("UTF-8");
311311
private static final byte TERM = 0x00;
312-
private static final BigDecimal E8 = BigDecimal.valueOf(1e8);
313-
private static final BigDecimal E32 = BigDecimal.valueOf(1e32);
314-
private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2);
315-
private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10);
316312

317313
/**
318314
* Max precision guaranteed to fit into a {@code long}.
@@ -637,7 +633,7 @@ private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
637633
byte[] a = dst.getBytes();
638634
boolean isNeg = val.signum() == -1;
639635
final int offset = dst.getOffset(), start = dst.getPosition();
640-
int e = 0, d, startM;
636+
int e = 0, startM;
641637

642638
if (isNeg) { /* Small negative number: 0x14, -E, ~M */
643639
dst.put(NEG_SMALL);
@@ -646,21 +642,17 @@ private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
646642
}
647643

648644
// normalize abs(val) to determine E
649-
while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; }
650-
while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; }
645+
int zerosBeforeFirstNonZero = abs.scale() - abs.precision();
646+
int lengthToMoveRight = zerosBeforeFirstNonZero % 2 ==
647+
0 ? zerosBeforeFirstNonZero : zerosBeforeFirstNonZero - 1;
648+
e = lengthToMoveRight / 2;
649+
abs = abs.movePointRight(lengthToMoveRight);
651650

652651
putVaruint64(dst, e, !isNeg); // encode appropriate E value.
653652

654653
// encode M by peeling off centimal digits, encoding x as 2x+1
655654
startM = dst.getPosition();
656-
// TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
657-
// numeric scale.
658-
for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
659-
abs = abs.movePointRight(2);
660-
d = abs.intValue();
661-
dst.put((byte) ((2 * d + 1) & 0xff));
662-
abs = abs.subtract(BigDecimal.valueOf(d));
663-
}
655+
encodeToCentimal(dst, abs);
664656
// terminal digit should be 2x
665657
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
666658
if (isNeg) {
@@ -712,7 +704,7 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
712704
byte[] a = dst.getBytes();
713705
boolean isNeg = val.signum() == -1;
714706
final int start = dst.getPosition(), offset = dst.getOffset();
715-
int e = 0, d, startM;
707+
int e = 0, startM;
716708

717709
if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
718710
dst.put(NEG_LARGE);
@@ -721,9 +713,10 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
721713
}
722714

723715
// normalize abs(val) to determine E
724-
while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; }
725-
while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; }
726-
while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; }
716+
int integerDigits = abs.precision() - abs.scale();
717+
int lengthToMoveLeft = integerDigits % 2 == 0 ? integerDigits : integerDigits + 1;
718+
e = lengthToMoveLeft / 2;
719+
abs = abs.movePointLeft(lengthToMoveLeft);
727720

728721
// encode appropriate header byte and/or E value.
729722
if (e > 10) { /* large number, write out {~,}E */
@@ -738,14 +731,7 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
738731

739732
// encode M by peeling off centimal digits, encoding x as 2x+1
740733
startM = dst.getPosition();
741-
// TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
742-
// numeric scale.
743-
for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
744-
abs = abs.movePointRight(2);
745-
d = abs.intValue();
746-
dst.put((byte) (2 * d + 1));
747-
abs = abs.subtract(BigDecimal.valueOf(d));
748-
}
734+
encodeToCentimal(dst, abs);
749735
// terminal digit should be 2x
750736
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
751737
if (isNeg) {
@@ -755,6 +741,32 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
755741
return dst.getPosition() - start;
756742
}
757743

744+
/**
745+
* Encode a value val in [0.01, 1.0) into Centimals.
746+
* Util function for {@link this.encodeNumericLarge()} and {@link this.encodeNumericSmall()}
747+
* @param dst The destination to which encoded digits are written.
748+
* @param val A BigDecimal after the normalization. The value must be in [0.01, 1.0).
749+
*/
750+
private static void encodeToCentimal(PositionedByteRange dst, BigDecimal val) {
751+
// The input value val must be in [0.01, 1.0)
752+
String stringOfAbs = val.stripTrailingZeros().toPlainString();
753+
String value = stringOfAbs.substring(stringOfAbs.indexOf('.') + 1);
754+
int d;
755+
756+
// If the first float digit is 0, we will encode one digit more than MAX_PRECISION
757+
// We encode at most MAX_PRECISION significant digits into centimals,
758+
// because the input value, has been already normalized.
759+
int maxPrecision = value.charAt(0) == '0' ? MAX_PRECISION + 1 : MAX_PRECISION;
760+
maxPrecision = Math.min(maxPrecision, value.length());
761+
for (int i = 0; i < maxPrecision; i += 2) {
762+
d = (value.charAt(i) - '0') * 10;
763+
if (i + 1 < maxPrecision) {
764+
d += (value.charAt(i + 1) - '0');
765+
}
766+
dst.put((byte) (2 * d + 1));
767+
}
768+
}
769+
758770
/**
759771
* Encode a numerical value using the variable-length encoding.
760772
* @param dst The destination to which encoded digits are written.
@@ -795,6 +807,8 @@ public static int encodeNumeric(PositionedByteRange dst, double val, Order ord)
795807

796808
/**
797809
* Encode a numerical value using the variable-length encoding.
810+
* If the number of significant digits of the value exceeds the
811+
* {@link OrderedBytes#MAX_PRECISION}, the exceeding part will be lost.
798812
* @param dst The destination to which encoded digits are written.
799813
* @param val The value to encode.
800814
* @param ord The {@link Order} to respect while encoding {@code val}.

hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,14 @@ public class TestOrderedBytes {
7070
static final BigDecimal[] BD_VALS =
7171
{ null, BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal.valueOf(Long.MIN_VALUE),
7272
BigDecimal.valueOf(Double.MAX_VALUE), BigDecimal.valueOf(Double.MIN_VALUE),
73-
BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100)) };
73+
BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100)),
74+
BigDecimal.valueOf(Long.MAX_VALUE).pow(64),
75+
BigDecimal.valueOf(Long.MAX_VALUE).pow(64).negate(),
76+
new BigDecimal("0." + String.join("", Collections.nCopies(500, "123"))),
77+
new BigDecimal("-0." + String.join("", Collections.nCopies(500, "123")))
78+
};
7479
static final int[] BD_LENGTHS =
75-
{ 1, 11, 11, 11, 4, 12 };
80+
{ 1, 11, 11, 11, 4, 12, 19, 19, 18, 18 };
7681

7782
/*
7883
* This is the smallest difference between two doubles in D_VALS
@@ -335,7 +340,11 @@ public void testNumericOther() {
335340
if (null == BD_VALS[i]) {
336341
assertEquals(BD_VALS[i], decoded);
337342
} else {
338-
assertEquals("Deserialization failed.", 0, BD_VALS[i].compareTo(decoded));
343+
// The num will be rounded to a specific precision in the encoding phase.
344+
// So that big value will lose precision here. Need to add a normalization here to
345+
// make the test pass.
346+
assertEquals("Deserialization failed.", 0,
347+
OrderedBytes.normalize(BD_VALS[i]).compareTo(decoded));
339348
}
340349
assertEquals("Did not consume enough bytes.", BD_LENGTHS[i], buf1.getPosition() - 1);
341350
}

0 commit comments

Comments
 (0)