Skip to content

Commit c508494

Browse files
committed
HBASE-27314 Make index block be customized and configured (apache#4763)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent b733485 commit c508494

18 files changed

Lines changed: 1032 additions & 80 deletions

File tree

hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.hadoop.hbase.exceptions.HBaseException;
2727
import org.apache.hadoop.hbase.io.compress.Compression;
2828
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
29+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2930
import org.apache.hadoop.hbase.regionserver.BloomType;
3031
import org.apache.hadoop.hbase.util.Bytes;
3132
import org.apache.hadoop.hbase.util.PrettyPrinter.Unit;
@@ -346,6 +347,11 @@ public DataBlockEncoding getDataBlockEncoding() {
346347
return delegatee.getDataBlockEncoding();
347348
}
348349

350+
@Override
351+
public IndexBlockEncoding getIndexBlockEncoding() {
352+
return delegatee.getIndexBlockEncoding();
353+
}
354+
349355
/**
350356
* Set data block encoding algorithm used in block cache.
351357
* @param value What kind of data block encoding will be used.

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptor.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.hadoop.hbase.MemoryCompactionPolicy;
2525
import org.apache.hadoop.hbase.io.compress.Compression;
2626
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
27+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2728
import org.apache.hadoop.hbase.regionserver.BloomType;
2829
import org.apache.hadoop.hbase.util.Bytes;
2930
import org.apache.yetus.audience.InterfaceAudience;
@@ -104,6 +105,9 @@ public interface ColumnFamilyDescriptor {
104105
/** Returns the data block encoding algorithm used in block cache and optionally on disk */
105106
DataBlockEncoding getDataBlockEncoding();
106107

108+
/** Return the index block encoding algorithm used in block cache and optionally on disk */
109+
IndexBlockEncoding getIndexBlockEncoding();
110+
107111
/** Returns Return the raw crypto key attribute for the family, or null if not set */
108112
byte[] getEncryptionKey();
109113

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ColumnFamilyDescriptorBuilder.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.hadoop.hbase.exceptions.HBaseException;
3232
import org.apache.hadoop.hbase.io.compress.Compression;
3333
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
34+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
3435
import org.apache.hadoop.hbase.regionserver.BloomType;
3536
import org.apache.hadoop.hbase.util.Bytes;
3637
import org.apache.hadoop.hbase.util.PrettyPrinter;
@@ -81,6 +82,10 @@ public class ColumnFamilyDescriptorBuilder {
8182
public static final String DATA_BLOCK_ENCODING = "DATA_BLOCK_ENCODING";
8283
private static final Bytes DATA_BLOCK_ENCODING_BYTES =
8384
new Bytes(Bytes.toBytes(DATA_BLOCK_ENCODING));
85+
@InterfaceAudience.Private
86+
public static final String INDEX_BLOCK_ENCODING = "INDEX_BLOCK_ENCODING";
87+
private static final Bytes INDEX_BLOCK_ENCODING_BYTES =
88+
new Bytes(Bytes.toBytes(INDEX_BLOCK_ENCODING));
8489
/**
8590
* Key for the BLOCKCACHE attribute. A more exact name would be CACHE_DATA_ON_READ because this
8691
* flag sets whether or not we cache DATA blocks. We always cache INDEX and BLOOM blocks; caching
@@ -196,6 +201,11 @@ public class ColumnFamilyDescriptorBuilder {
196201
*/
197202
public static final DataBlockEncoding DEFAULT_DATA_BLOCK_ENCODING = DataBlockEncoding.NONE;
198203

204+
/**
205+
* Default index block encoding algorithm.
206+
*/
207+
public static final IndexBlockEncoding DEFAULT_INDEX_BLOCK_ENCODING = IndexBlockEncoding.NONE;
208+
199209
/**
200210
* Default number of versions of a record to keep.
201211
*/
@@ -298,6 +308,7 @@ public static Map<String, String> getDefaultValues() {
298308
DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
299309
DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
300310
DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
311+
DEFAULT_VALUES.put(INDEX_BLOCK_ENCODING, String.valueOf(DEFAULT_INDEX_BLOCK_ENCODING));
301312
// Do NOT add this key/value by default. NEW_VERSION_BEHAVIOR is NOT defined in hbase1 so
302313
// it is not possible to make an hbase1 HCD the same as an hbase2 HCD and so the replication
303314
// compare of schemas will fail. It is OK not adding the below to the initial map because of
@@ -477,6 +488,11 @@ public ColumnFamilyDescriptorBuilder setDataBlockEncoding(DataBlockEncoding valu
477488
return this;
478489
}
479490

491+
public ColumnFamilyDescriptorBuilder setIndexBlockEncoding(IndexBlockEncoding value) {
492+
desc.setIndexBlockEncoding(value);
493+
return this;
494+
}
495+
480496
public ColumnFamilyDescriptorBuilder setEncryptionKey(final byte[] value) {
481497
desc.setEncryptionKey(value);
482498
return this;
@@ -814,6 +830,22 @@ public ModifyableColumnFamilyDescriptor setDataBlockEncoding(DataBlockEncoding t
814830
type == null ? DataBlockEncoding.NONE.name() : type.name());
815831
}
816832

833+
@Override
834+
public IndexBlockEncoding getIndexBlockEncoding() {
835+
return getStringOrDefault(INDEX_BLOCK_ENCODING_BYTES,
836+
n -> IndexBlockEncoding.valueOf(n.toUpperCase()), IndexBlockEncoding.NONE);
837+
}
838+
839+
/**
840+
* Set index block encoding algorithm used in block cache.
841+
* @param type What kind of index block encoding will be used.
842+
* @return this (for chained invocation)
843+
*/
844+
public ModifyableColumnFamilyDescriptor setIndexBlockEncoding(IndexBlockEncoding type) {
845+
return setValue(INDEX_BLOCK_ENCODING_BYTES,
846+
type == null ? IndexBlockEncoding.NONE.name() : type.name());
847+
}
848+
817849
/**
818850
* Set whether the tags should be compressed along with DataBlockEncoding. When no
819851
* DataBlockEncoding is been used, this is having no effect. n * @return this (for chained

hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestColumnFamilyDescriptorBuilder.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.hadoop.hbase.io.compress.Compression;
3232
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
3333
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
34+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
3435
import org.apache.hadoop.hbase.regionserver.BloomType;
3536
import org.apache.hadoop.hbase.testclassification.MiscTests;
3637
import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -196,7 +197,7 @@ public void testSetTimeToLive() throws HBaseException {
196197
@Test
197198
public void testDefaultBuilder() {
198199
final Map<String, String> defaultValueMap = ColumnFamilyDescriptorBuilder.getDefaultValues();
199-
assertEquals(defaultValueMap.size(), 11);
200+
assertEquals(defaultValueMap.size(), 12);
200201
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.BLOOMFILTER),
201202
BloomType.ROW.toString());
202203
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.REPLICATION_SCOPE), "0");
@@ -216,6 +217,8 @@ public void testDefaultBuilder() {
216217
KeepDeletedCells.FALSE.toString());
217218
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.DATA_BLOCK_ENCODING),
218219
DataBlockEncoding.NONE.toString());
220+
assertEquals(defaultValueMap.get(ColumnFamilyDescriptorBuilder.INDEX_BLOCK_ENCODING),
221+
IndexBlockEncoding.NONE.toString());
219222
}
220223

221224
@Test
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.io.encoding;
19+
20+
import java.io.IOException;
21+
import java.io.OutputStream;
22+
import org.apache.hadoop.hbase.util.Bytes;
23+
import org.apache.yetus.audience.InterfaceAudience;
24+
25+
/**
26+
* Provide access to all index block encoding algorithms. All of the algorithms are required to have
27+
* unique id which should <b>NEVER</b> be changed. If you want to add a new algorithm/version,
28+
* assign it a new id. Announce the new id in the HBase mailing list to prevent collisions.
29+
*/
30+
@InterfaceAudience.Public
31+
public enum IndexBlockEncoding {
32+
33+
/** Disable index block encoding. */
34+
NONE(0, null),
35+
// id 1 is reserved for the PREFIX_TREE algorithm to be added later
36+
PREFIX_TREE(1, null);
37+
38+
private final short id;
39+
private final byte[] idInBytes;
40+
private final String encoderCls;
41+
42+
public static final int ID_SIZE = Bytes.SIZEOF_SHORT;
43+
44+
/** Maps data block encoding ids to enum instances. */
45+
private static IndexBlockEncoding[] idArray = new IndexBlockEncoding[Byte.MAX_VALUE + 1];
46+
47+
static {
48+
for (IndexBlockEncoding algo : values()) {
49+
if (idArray[algo.id] != null) {
50+
throw new RuntimeException(
51+
String.format("Two data block encoder algorithms '%s' and '%s' have " + "the same id %d",
52+
idArray[algo.id].toString(), algo.toString(), (int) algo.id));
53+
}
54+
idArray[algo.id] = algo;
55+
}
56+
}
57+
58+
private IndexBlockEncoding(int id, String encoderClsName) {
59+
if (id < 0 || id > Byte.MAX_VALUE) {
60+
throw new AssertionError("Data block encoding algorithm id is out of range: " + id);
61+
}
62+
this.id = (short) id;
63+
this.idInBytes = Bytes.toBytes(this.id);
64+
if (idInBytes.length != ID_SIZE) {
65+
// White this may seem redundant, if we accidentally serialize
66+
// the id as e.g. an int instead of a short, all encoders will break.
67+
throw new RuntimeException("Unexpected length of encoder ID byte " + "representation: "
68+
+ Bytes.toStringBinary(idInBytes));
69+
}
70+
this.encoderCls = encoderClsName;
71+
}
72+
73+
/** Returns name converted to bytes. */
74+
public byte[] getNameInBytes() {
75+
return Bytes.toBytes(toString());
76+
}
77+
78+
/** Returns The id of a data block encoder. */
79+
public short getId() {
80+
return id;
81+
}
82+
83+
/**
84+
* Writes id in bytes.
85+
* @param stream where the id should be written.
86+
*/
87+
public void writeIdInBytes(OutputStream stream) throws IOException {
88+
stream.write(idInBytes);
89+
}
90+
91+
/**
92+
* Writes id bytes to the given array starting from offset.
93+
* @param dest output array
94+
* @param offset starting offset of the output array n
95+
*/
96+
public void writeIdInBytes(byte[] dest, int offset) throws IOException {
97+
System.arraycopy(idInBytes, 0, dest, offset, ID_SIZE);
98+
}
99+
100+
/**
101+
* Find and return the name of data block encoder for the given id.
102+
* @param encoderId id of data block encoder
103+
* @return name, same as used in options in column family
104+
*/
105+
public static String getNameFromId(short encoderId) {
106+
return getEncodingById(encoderId).toString();
107+
}
108+
109+
public static IndexBlockEncoding getEncodingById(short indexBlockEncodingId) {
110+
IndexBlockEncoding algorithm = null;
111+
if (indexBlockEncodingId >= 0 && indexBlockEncodingId <= Byte.MAX_VALUE) {
112+
algorithm = idArray[indexBlockEncodingId];
113+
}
114+
if (algorithm == null) {
115+
throw new IllegalArgumentException(String
116+
.format("There is no index block encoder for given id '%d'", (int) indexBlockEncodingId));
117+
}
118+
return algorithm;
119+
}
120+
121+
}

hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.hadoop.hbase.io.compress.Compression;
2525
import org.apache.hadoop.hbase.io.crypto.Encryption;
2626
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
27+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2728
import org.apache.hadoop.hbase.util.Bytes;
2829
import org.apache.hadoop.hbase.util.ChecksumType;
2930
import org.apache.hadoop.hbase.util.ClassSize;
@@ -58,6 +59,7 @@ public class HFileContext implements HeapSize, Cloneable {
5859
/** Number of uncompressed bytes we allow per block. */
5960
private int blocksize = HConstants.DEFAULT_BLOCKSIZE;
6061
private DataBlockEncoding encoding = DataBlockEncoding.NONE;
62+
private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
6163
/** Encryption algorithm and key used */
6264
private Encryption.Context cryptoContext = Encryption.Context.NONE;
6365
private long fileCreateTime;
@@ -89,13 +91,14 @@ public HFileContext(HFileContext context) {
8991
this.columnFamily = context.columnFamily;
9092
this.tableName = context.tableName;
9193
this.cellComparator = context.cellComparator;
94+
this.indexBlockEncoding = context.indexBlockEncoding;
9295
}
9396

9497
HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags,
9598
Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType,
9699
int bytesPerChecksum, int blockSize, DataBlockEncoding encoding,
97100
Encryption.Context cryptoContext, long fileCreateTime, String hfileName, byte[] columnFamily,
98-
byte[] tableName, CellComparator cellComparator) {
101+
byte[] tableName, CellComparator cellComparator, IndexBlockEncoding indexBlockEncoding) {
99102
this.usesHBaseChecksum = useHBaseChecksum;
100103
this.includesMvcc = includesMvcc;
101104
this.includesTags = includesTags;
@@ -107,6 +110,9 @@ public HFileContext(HFileContext context) {
107110
if (encoding != null) {
108111
this.encoding = encoding;
109112
}
113+
if (indexBlockEncoding != null) {
114+
this.indexBlockEncoding = indexBlockEncoding;
115+
}
110116
this.cryptoContext = cryptoContext;
111117
this.fileCreateTime = fileCreateTime;
112118
this.hfileName = hfileName;
@@ -186,6 +192,10 @@ public DataBlockEncoding getDataBlockEncoding() {
186192
return encoding;
187193
}
188194

195+
public IndexBlockEncoding getIndexBlockEncoding() {
196+
return indexBlockEncoding;
197+
}
198+
189199
public Encryption.Context getEncryptionContext() {
190200
return cryptoContext;
191201
}
@@ -253,6 +263,8 @@ public String toString() {
253263
sb.append(blocksize);
254264
sb.append(", encoding=");
255265
sb.append(encoding);
266+
sb.append(", indexBlockEncoding=");
267+
sb.append(indexBlockEncoding);
256268
sb.append(", includesMvcc=");
257269
sb.append(includesMvcc);
258270
sb.append(", includesTags=");

hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
2323
import org.apache.hadoop.hbase.io.crypto.Encryption;
2424
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
25+
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
2526
import org.apache.hadoop.hbase.util.ChecksumType;
2627
import org.apache.yetus.audience.InterfaceAudience;
2728

@@ -50,6 +51,8 @@ public class HFileContextBuilder {
5051
/** Number of uncompressed bytes we allow per block. */
5152
private int blocksize = HConstants.DEFAULT_BLOCKSIZE;
5253
private DataBlockEncoding encoding = DataBlockEncoding.NONE;
54+
/** the index block encoding type **/
55+
private IndexBlockEncoding indexBlockEncoding = IndexBlockEncoding.NONE;
5356
/** Crypto context */
5457
private Encryption.Context cryptoContext = Encryption.Context.NONE;
5558
private long fileCreateTime = 0;
@@ -128,6 +131,11 @@ public HFileContextBuilder withDataBlockEncoding(DataBlockEncoding encoding) {
128131
return this;
129132
}
130133

134+
public HFileContextBuilder withIndexBlockEncoding(IndexBlockEncoding indexBlockEncoding) {
135+
this.indexBlockEncoding = indexBlockEncoding;
136+
return this;
137+
}
138+
131139
public HFileContextBuilder withEncryptionContext(Encryption.Context cryptoContext) {
132140
this.cryptoContext = cryptoContext;
133141
return this;
@@ -161,6 +169,6 @@ public HFileContextBuilder withCellComparator(CellComparator cellComparator) {
161169
public HFileContext build() {
162170
return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression,
163171
compressTags, checksumType, bytesPerChecksum, blocksize, encoding, cryptoContext,
164-
fileCreateTime, hfileName, columnFamily, tableName, cellComparator);
172+
fileCreateTime, hfileName, columnFamily, tableName, cellComparator, indexBlockEncoding);
165173
}
166174
}

0 commit comments

Comments
 (0)