Skip to content

Commit 71f0354

Browse files
authored
HBASE-23304: RPCs needed for client meta information lookup (apache#904) (apache#1098)
* HBASE-23257: Track clusterID in stand by masters (apache#798) This patch implements a simple cache that all the masters can lookup to serve cluster ID to clients. Active HMaster is still responsible for creating it but all the masters will read it from fs to serve clients. RPCs exposing it will come in a separate patch as a part of HBASE-18095. Signed-off-by: Andrew Purtell <apurtell@apache.org> Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org> Signed-off-by: Guangxu Cheng <guangxucheng@gmail.com> (cherry picked from commit c2e01f2) * HBASE-23275: Track active master's address in ActiveMasterManager (apache#812) Currently we just track whether an active master exists. It helps to also track the address of the active master in all the masters to help serve the client RPC requests to know which master is active. Signed-off-by: Nick Dimiduk <ndimiduk@apache.org> Signed-off-by: Andrew Purtell <apurtell@apache.org> (cherry picked from commit efebb84) * HBASE-23281: Track meta region locations in masters (apache#830) * HBASE-23281: Track meta region changes on masters This patch adds a simple cache that tracks the meta region replica locations. It keeps an eye on the region movements so that the cached locations are not stale. This information is used for servicing client RPCs for connections that use master based registry (HBASE-18095). The RPC end points will be added in a separate patch. Signed-off-by: Nick Dimiduk <ndimiduk@apache.org> (cherry picked from commit 8571d38) * HBASE-23304: RPCs needed for client meta information lookup (apache#904) * HBASE-23304: RPCs needed for client meta information lookup This patch implements the RPCs needed for the meta information lookup during connection init. New tests added to cover the RPC code paths. HBASE-23305 builds on this to implement the client side logic. Fixed a bunch of checkstyle nits around the places the patch touches. Signed-off-by: Andrew Purtell <apurtell@apache.org> (cherry picked from commit 4f8fbba)
1 parent 36cdcad commit 71f0354

29 files changed

Lines changed: 1180 additions & 118 deletions

hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -80,6 +80,7 @@
8080
import org.apache.hadoop.hbase.client.Put;
8181
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
8282
import org.apache.hadoop.hbase.client.RegionLoadStats;
83+
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
8384
import org.apache.hadoop.hbase.client.RegionStatesCount;
8485
import org.apache.hadoop.hbase.client.Result;
8586
import org.apache.hadoop.hbase.client.Scan;
@@ -93,6 +94,7 @@
9394
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
9495
import org.apache.hadoop.hbase.filter.Filter;
9596
import org.apache.hadoop.hbase.io.TimeRange;
97+
import org.apache.hadoop.hbase.master.RegionState;
9698
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
9799
import org.apache.hadoop.hbase.protobuf.ProtobufMessageConverter;
98100
import org.apache.hadoop.hbase.quotas.QuotaScope;
@@ -375,7 +377,9 @@ private static IOException makeIOExceptionOfException(Exception e) {
375377
* @see #toServerName(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName)
376378
*/
377379
public static HBaseProtos.ServerName toServerName(final ServerName serverName) {
378-
if (serverName == null) return null;
380+
if (serverName == null) {
381+
return null;
382+
}
379383
HBaseProtos.ServerName.Builder builder =
380384
HBaseProtos.ServerName.newBuilder();
381385
builder.setHostName(serverName.getHostname());
@@ -3071,6 +3075,44 @@ public static ProcedureDescription buildProcedureDescription(String signature, S
30713075
return builder.build();
30723076
}
30733077

3078+
/**
3079+
* Get the Meta region state from the passed data bytes. Can handle both old and new style
3080+
* server names.
3081+
* @param data protobuf serialized data with meta server name.
3082+
* @param replicaId replica ID for this region
3083+
* @return RegionState instance corresponding to the serialized data.
3084+
* @throws DeserializationException if the data is invalid.
3085+
*/
3086+
public static RegionState parseMetaRegionStateFrom(final byte[] data, int replicaId)
3087+
throws DeserializationException {
3088+
RegionState.State state = RegionState.State.OPEN;
3089+
ServerName serverName;
3090+
if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
3091+
try {
3092+
int prefixLen = ProtobufUtil.lengthOfPBMagic();
3093+
ZooKeeperProtos.MetaRegionServer rl =
3094+
ZooKeeperProtos.MetaRegionServer.parser().parseFrom(data, prefixLen,
3095+
data.length - prefixLen);
3096+
if (rl.hasState()) {
3097+
state = RegionState.State.convert(rl.getState());
3098+
}
3099+
HBaseProtos.ServerName sn = rl.getServer();
3100+
serverName = ServerName.valueOf(
3101+
sn.getHostName(), sn.getPort(), sn.getStartCode());
3102+
} catch (InvalidProtocolBufferException e) {
3103+
throw new DeserializationException("Unable to parse meta region location");
3104+
}
3105+
} else {
3106+
// old style of meta region location?
3107+
serverName = parseServerNameFrom(data);
3108+
}
3109+
if (serverName == null) {
3110+
state = RegionState.State.OFFLINE;
3111+
}
3112+
return new RegionState(RegionReplicaUtil.getRegionInfoForReplica(
3113+
RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), state, serverName);
3114+
}
3115+
30743116
/**
30753117
* Get a ServerName from the passed in data bytes.
30763118
* @param data Data with a serialize server name in it; can handle the old style

hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ public class ZNodePaths {
4141
// TODO: Replace this with ZooKeeper constant when ZOOKEEPER-277 is resolved.
4242
public static final char ZNODE_PATH_SEPARATOR = '/';
4343

44-
private static final String META_ZNODE_PREFIX = "meta-region-server";
44+
public static final String META_ZNODE_PREFIX_CONF_KEY = "zookeeper.znode.metaserver";
45+
public static final String META_ZNODE_PREFIX = "meta-region-server";
4546
private static final String DEFAULT_SNAPSHOT_CLEANUP_ZNODE = "snapshot-cleanup";
4647

4748
// base znode for this cluster
@@ -104,7 +105,7 @@ public class ZNodePaths {
104105
public ZNodePaths(Configuration conf) {
105106
baseZNode = conf.get(ZOOKEEPER_ZNODE_PARENT, DEFAULT_ZOOKEEPER_ZNODE_PARENT);
106107
ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
107-
metaZNodePrefix = conf.get("zookeeper.znode.metaserver", META_ZNODE_PREFIX);
108+
metaZNodePrefix = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX);
108109
String defaultMetaReplicaZNode = ZNodePaths.joinZNode(baseZNode, metaZNodePrefix);
109110
builder.put(DEFAULT_REPLICA_ID, defaultMetaReplicaZNode);
110111
int numMetaReplicas = conf.getInt(META_REPLICAS_NUM, DEFAULT_META_REPLICA_NUM);
@@ -189,7 +190,19 @@ public String getZNodeForReplica(int replicaId) {
189190
}
190191

191192
/**
192-
* Parse the meta replicaId from the passed znode name.
193+
* Parses the meta replicaId from the passed path.
194+
* @param path the name of the full path which includes baseZNode.
195+
* @return replicaId
196+
*/
197+
public int getMetaReplicaIdFromPath(String path) {
198+
// Extract the znode from path. The prefix is of the following format.
199+
// baseZNode + PATH_SEPARATOR.
200+
int prefixLen = baseZNode.length() + 1;
201+
return getMetaReplicaIdFromZnode(path.substring(prefixLen));
202+
}
203+
204+
/**
205+
* Parse the meta replicaId from the passed znode
193206
* @param znode the name of the znode, does not include baseZNode
194207
* @return replicaId
195208
*/

hbase-protocol-shaded/src/main/protobuf/Master.proto

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,3 +1196,47 @@ service HbckService {
11961196
rpc FixMeta(FixMetaRequest)
11971197
returns(FixMetaResponse);
11981198
}
1199+
1200+
/** Request and response to get the clusterID for this cluster */
1201+
message GetClusterIdRequest {
1202+
}
1203+
message GetClusterIdResponse {
1204+
/** Not set if cluster ID could not be determined. */
1205+
optional string cluster_id = 1;
1206+
}
1207+
1208+
/** Request and response to get the currently active master name for this cluster */
1209+
message GetActiveMasterRequest {
1210+
}
1211+
message GetActiveMasterResponse {
1212+
/** Not set if an active master could not be determined. */
1213+
optional ServerName server_name = 1;
1214+
}
1215+
1216+
/** Request and response to get the current list of meta region locations */
1217+
message GetMetaRegionLocationsRequest {
1218+
}
1219+
message GetMetaRegionLocationsResponse {
1220+
/** Not set if meta region locations could not be determined. */
1221+
repeated RegionLocation meta_locations = 1;
1222+
}
1223+
1224+
/**
1225+
* Implements all the RPCs needed by clients to look up cluster meta information needed for connection establishment.
1226+
*/
1227+
service ClientMetaService {
1228+
/**
1229+
* Get Cluster ID for this cluster.
1230+
*/
1231+
rpc GetClusterId(GetClusterIdRequest) returns(GetClusterIdResponse);
1232+
1233+
/**
1234+
* Get active master server name for this cluster.
1235+
*/
1236+
rpc GetActiveMaster(GetActiveMasterRequest) returns(GetActiveMasterResponse);
1237+
1238+
/**
1239+
* Get current meta replicas' region locations.
1240+
*/
1241+
rpc GetMetaRegionLocations(GetMetaRegionLocationsRequest) returns(GetMetaRegionLocationsResponse);
1242+
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
*
33
* Licensed to the Apache Software Foundation (ASF) under one
44
* or more contributor license agreements. See the NOTICE file
@@ -17,25 +17,24 @@
1717
* limitations under the License.
1818
*/
1919
package org.apache.hadoop.hbase.master;
20-
2120
import java.io.IOException;
21+
import java.util.Optional;
2222
import java.util.concurrent.atomic.AtomicBoolean;
23-
24-
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
25-
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
26-
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
27-
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
28-
import org.apache.yetus.audience.InterfaceAudience;
2923
import org.apache.hadoop.hbase.Server;
3024
import org.apache.hadoop.hbase.ServerName;
3125
import org.apache.hadoop.hbase.ZNodeClearer;
3226
import org.apache.hadoop.hbase.exceptions.DeserializationException;
3327
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
34-
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
28+
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
3529
import org.apache.hadoop.hbase.zookeeper.ZKListener;
30+
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
31+
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
32+
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
33+
import org.apache.yetus.audience.InterfaceAudience;
3634
import org.apache.zookeeper.KeeperException;
3735
import org.slf4j.Logger;
3836
import org.slf4j.LoggerFactory;
37+
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
3938

4039
/**
4140
* Handles everything on master-side related to master election.
@@ -57,12 +56,18 @@ public class ActiveMasterManager extends ZKListener {
5756
final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
5857
final AtomicBoolean clusterShutDown = new AtomicBoolean(false);
5958

59+
// This server's information.
6060
private final ServerName sn;
6161
private int infoPort;
6262
private final Server master;
6363

64+
// Active master's server name. Invalidated anytime active master changes (based on ZK
65+
// notifications) and lazily fetched on-demand.
66+
// ServerName is immutable, so we don't need heavy synchronization around it.
67+
private volatile ServerName activeMasterServerName;
68+
6469
/**
65-
* @param watcher
70+
* @param watcher ZK watcher
6671
* @param sn ServerName
6772
* @param master In an instance of a Master.
6873
*/
@@ -106,6 +111,30 @@ void handle(final String path) {
106111
}
107112
}
108113

114+
/**
115+
* Fetches the active master's ServerName from zookeeper.
116+
*/
117+
private void fetchAndSetActiveMasterServerName() {
118+
LOG.debug("Attempting to fetch active master sn from zk");
119+
try {
120+
activeMasterServerName = MasterAddressTracker.getMasterAddress(watcher);
121+
} catch (IOException | KeeperException e) {
122+
// Log and ignore for now and re-fetch later if needed.
123+
LOG.error("Error fetching active master information", e);
124+
}
125+
}
126+
127+
public Optional<ServerName> getActiveMasterServerName() {
128+
if (!clusterHasActiveMaster.get()) {
129+
return Optional.empty();
130+
}
131+
if (activeMasterServerName == null) {
132+
fetchAndSetActiveMasterServerName();
133+
}
134+
// It could still be null, but return whatever we have.
135+
return Optional.ofNullable(activeMasterServerName);
136+
}
137+
109138
/**
110139
* Handle a change in the master node. Doesn't matter whether this was called
111140
* from a nodeCreated or nodeDeleted event because there are no guarantees
@@ -134,6 +163,9 @@ private void handleMasterNodeChange() {
134163
// Notify any thread waiting to become the active master
135164
clusterHasActiveMaster.notifyAll();
136165
}
166+
// Reset the active master sn. Will be re-fetched later if needed.
167+
// We don't want to make a synchronous RPC under a monitor.
168+
activeMasterServerName = null;
137169
}
138170
} catch (KeeperException ke) {
139171
master.abort("Received an unexpected KeeperException, aborting", ke);
@@ -151,8 +183,8 @@ private void handleMasterNodeChange() {
151183
* @param checkInterval the interval to check if the master is stopped
152184
* @param startupStatus the monitor status to track the progress
153185
* @return True if no issue becoming active master else false if another
154-
* master was running or if some other problem (zookeeper, stop flag has been
155-
* set on this Master)
186+
* master was running or if some other problem (zookeeper, stop flag has been
187+
* set on this Master)
156188
*/
157189
boolean blockUntilBecomingActiveMaster(
158190
int checkInterval, MonitoredTask startupStatus) {
@@ -178,10 +210,14 @@ boolean blockUntilBecomingActiveMaster(
178210
// We are the master, return
179211
startupStatus.setStatus("Successfully registered as active master.");
180212
this.clusterHasActiveMaster.set(true);
213+
activeMasterServerName = sn;
181214
LOG.info("Registered as active master=" + this.sn);
182215
return true;
183216
}
184217

218+
// Invalidate the active master name so that subsequent requests do not get any stale
219+
// master information. Will be re-fetched if needed.
220+
activeMasterServerName = null;
185221
// There is another active master running elsewhere or this is a restart
186222
// and the master ephemeral node has not expired yet.
187223
this.clusterHasActiveMaster.set(true);
@@ -208,7 +244,8 @@ boolean blockUntilBecomingActiveMaster(
208244
ZKUtil.deleteNode(this.watcher, this.watcher.getZNodePaths().masterAddressZNode);
209245

210246
// We may have failed to delete the znode at the previous step, but
211-
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
247+
// we delete the file anyway: a second attempt to delete the znode is likely to fail
248+
// again.
212249
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
213250
} else {
214251
msg = "Another master is the active master, " + currentMaster +

0 commit comments

Comments
 (0)