3232import org .apache .hadoop .hbase .HDFSBlocksDistribution ;
3333import org .apache .hadoop .hbase .HDFSBlocksDistribution .HostAndWeight ;
3434import org .apache .hadoop .hbase .HRegionInfo ;
35+ import org .apache .hadoop .hbase .HRegionLocation ;
3536import org .apache .hadoop .hbase .PrivateCellUtil ;
3637import org .apache .hadoop .hbase .client .ClientSideRegionScanner ;
38+ import org .apache .hadoop .hbase .client .Connection ;
39+ import org .apache .hadoop .hbase .client .ConnectionFactory ;
3740import org .apache .hadoop .hbase .client .IsolationLevel ;
41+ import org .apache .hadoop .hbase .client .RegionLocator ;
3842import org .apache .hadoop .hbase .client .Result ;
3943import org .apache .hadoop .hbase .client .Scan ;
4044import org .apache .hadoop .hbase .client .TableDescriptor ;
@@ -101,6 +105,15 @@ public class TableSnapshotInputFormatImpl {
101105 "hbase.TableSnapshotInputFormat.locality.enabled" ;
102106 public static final boolean SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT = true ;
103107
108+ /**
109+ * Whether to calculate the Snapshot region location by region location from meta.
110+ * It is much faster than computing block locations for splits.
111+ */
112+ public static final String SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION =
113+ "hbase.TableSnapshotInputFormat.locality.by.region.location" ;
114+
115+ public static final boolean SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION_DEFAULT = false ;
116+
104117 /**
105118 * In some scenario, scan limited rows on each InputSplit for sampling data extraction
106119 */
@@ -392,17 +405,49 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
392405 SNAPSHOT_INPUTFORMAT_SCAN_METRICS_ENABLED_DEFAULT );
393406 scan .setScanMetricsEnabled (scanMetricsEnabled );
394407
408+ boolean useRegionLoc = conf .getBoolean (SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION ,
409+ SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION_DEFAULT );
410+
411+ Connection connection = null ;
412+ RegionLocator regionLocator = null ;
413+ if (localityEnabled && useRegionLoc ) {
414+ Configuration newConf = new Configuration (conf );
415+ newConf .setInt ("hbase.hconnection.threads.max" , 1 );
416+ try {
417+ connection = ConnectionFactory .createConnection (newConf );
418+ regionLocator = connection .getRegionLocator (htd .getTableName ());
419+
420+ /* Get all locations for the table and cache it */
421+ regionLocator .getAllRegionLocations ();
422+ } finally {
423+ if (connection != null ) {
424+ connection .close ();
425+ }
426+ }
427+ }
428+
395429 List <InputSplit > splits = new ArrayList <>();
396430 for (HRegionInfo hri : regionManifests ) {
397431 // load region descriptor
432+ List <String > hosts = null ;
433+ if (localityEnabled ) {
434+ if (regionLocator != null ) {
435+ /* Get Location from the local cache */
436+ HRegionLocation
437+ location = regionLocator .getRegionLocation (hri .getStartKey (), false );
438+
439+ hosts = new ArrayList <>(1 );
440+ hosts .add (location .getHostname ());
441+ } else {
442+ hosts = calculateLocationsForInputSplit (conf , htd , hri , tableDir );
443+ }
444+ }
398445
399446 if (numSplits > 1 ) {
400447 byte [][] sp = sa .split (hri .getStartKey (), hri .getEndKey (), numSplits , true );
401448 for (int i = 0 ; i < sp .length - 1 ; i ++) {
402449 if (PrivateCellUtil .overlappingKeys (scan .getStartRow (), scan .getStopRow (), sp [i ],
403450 sp [i + 1 ])) {
404- List <String > hosts =
405- calculateLocationsForInputSplit (conf , htd , hri , tableDir , localityEnabled );
406451
407452 Scan boundedScan = new Scan (scan );
408453 if (scan .getStartRow ().length == 0 ) {
@@ -425,8 +470,7 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
425470 } else {
426471 if (PrivateCellUtil .overlappingKeys (scan .getStartRow (), scan .getStopRow (),
427472 hri .getStartKey (), hri .getEndKey ())) {
428- List <String > hosts =
429- calculateLocationsForInputSplit (conf , htd , hri , tableDir , localityEnabled );
473+
430474 splits .add (new InputSplit (htd , hri , hosts , scan , restoreDir ));
431475 }
432476 }
@@ -440,14 +484,9 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
440484 * only when localityEnabled is true.
441485 */
442486 private static List <String > calculateLocationsForInputSplit (Configuration conf ,
443- TableDescriptor htd , HRegionInfo hri , Path tableDir , boolean localityEnabled )
487+ TableDescriptor htd , HRegionInfo hri , Path tableDir )
444488 throws IOException {
445- if (localityEnabled ) { // care block locality
446- return getBestLocations (conf ,
447- HRegion .computeHDFSBlocksDistribution (conf , htd , hri , tableDir ));
448- } else { // do not care block locality
449- return null ;
450- }
489+ return getBestLocations (conf , HRegion .computeHDFSBlocksDistribution (conf , htd , hri , tableDir ));
451490 }
452491
453492 /**
0 commit comments