Skip to content

Commit 5bb76bf

Browse files
committed
Revert "HBASE-24743 Reject to add a peer which replicate to itself earlier (#2071)"
This reverts commit 5db3ec2. TestReplicationAdmin and TestReplicationShell are broken on branch-2 and master respectively
1 parent 3c91c33 commit 5bb76bf

9 files changed

Lines changed: 62 additions & 68 deletions

File tree

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ protected void initializeZKBasedSystemTrackers()
801801
this.splitOrMergeTracker = new SplitOrMergeTracker(zooKeeper, conf, this);
802802
this.splitOrMergeTracker.start();
803803

804-
this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf, clusterId);
804+
this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf);
805805

806806
this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
807807
this.drainingServerTracker.start();

hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ReplicationPeerManager.java

Lines changed: 11 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.apache.commons.lang3.StringUtils;
3232
import org.apache.hadoop.conf.Configuration;
3333
import org.apache.hadoop.hbase.DoNotRetryIOException;
34-
import org.apache.hadoop.hbase.HBaseConfiguration;
3534
import org.apache.hadoop.hbase.ServerName;
3635
import org.apache.hadoop.hbase.TableName;
3736
import org.apache.hadoop.hbase.replication.BaseReplicationEndpoint;
@@ -46,11 +45,9 @@
4645
import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
4746
import org.apache.hadoop.hbase.replication.ReplicationUtils;
4847
import org.apache.hadoop.hbase.replication.regionserver.HBaseInterClusterReplicationEndpoint;
49-
import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
5048
import org.apache.hadoop.hbase.zookeeper.ZKConfig;
5149
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
5250
import org.apache.yetus.audience.InterfaceAudience;
53-
import org.apache.zookeeper.KeeperException;
5451

5552
/**
5653
* Manages and performs all replication admin operations.
@@ -66,17 +63,11 @@ public class ReplicationPeerManager {
6663

6764
private final ConcurrentMap<String, ReplicationPeerDescription> peers;
6865

69-
private final String clusterId;
70-
71-
private final Configuration conf;
72-
7366
ReplicationPeerManager(ReplicationPeerStorage peerStorage, ReplicationQueueStorage queueStorage,
74-
ConcurrentMap<String, ReplicationPeerDescription> peers, Configuration conf, String clusterId) {
67+
ConcurrentMap<String, ReplicationPeerDescription> peers) {
7568
this.peerStorage = peerStorage;
7669
this.queueStorage = queueStorage;
7770
this.peers = peers;
78-
this.conf = conf;
79-
this.clusterId = clusterId;
8071
}
8172

8273
private void checkQueuesDeleted(String peerId)
@@ -254,26 +245,26 @@ void removeAllQueuesAndHFileRefs(String peerId) throws ReplicationException {
254245

255246
private void checkPeerConfig(ReplicationPeerConfig peerConfig) throws DoNotRetryIOException {
256247
String replicationEndpointImpl = peerConfig.getReplicationEndpointImpl();
257-
ReplicationEndpoint endpoint = null;
248+
boolean checkClusterKey = true;
258249
if (!StringUtils.isBlank(replicationEndpointImpl)) {
250+
// try creating a instance
251+
ReplicationEndpoint endpoint;
259252
try {
260-
// try creating a instance
261253
endpoint = Class.forName(replicationEndpointImpl)
262254
.asSubclass(ReplicationEndpoint.class).getDeclaredConstructor().newInstance();
263255
} catch (Throwable e) {
264256
throw new DoNotRetryIOException(
265257
"Can not instantiate configured replication endpoint class=" + replicationEndpointImpl,
266258
e);
267259
}
260+
// do not check cluster key if we are not HBaseInterClusterReplicationEndpoint
261+
if (!(endpoint instanceof HBaseInterClusterReplicationEndpoint)) {
262+
checkClusterKey = false;
263+
}
268264
}
269-
// Default is HBaseInterClusterReplicationEndpoint and only it need to check cluster key
270-
if (endpoint == null || endpoint instanceof HBaseInterClusterReplicationEndpoint) {
265+
if (checkClusterKey) {
271266
checkClusterKey(peerConfig.getClusterKey());
272267
}
273-
// Default is HBaseInterClusterReplicationEndpoint which cannot replicate to same cluster
274-
if (endpoint == null || !endpoint.canReplicateToSameCluster()) {
275-
checkClusterId(peerConfig.getClusterKey());
276-
}
277268

278269
if (peerConfig.replicateAllUserTables()) {
279270
// If replicate_all flag is true, it means all user tables will be replicated to peer cluster.
@@ -366,25 +357,6 @@ private void checkClusterKey(String clusterKey) throws DoNotRetryIOException {
366357
}
367358
}
368359

369-
private void checkClusterId(String clusterKey) throws DoNotRetryIOException {
370-
String peerClusterId = "";
371-
try {
372-
// Create the peer cluster config for get peer cluster id
373-
Configuration peerConf = HBaseConfiguration.createClusterConf(conf, clusterKey);
374-
try (ZKWatcher zkWatcher = new ZKWatcher(peerConf, this + "check-peer-cluster-id", null)) {
375-
peerClusterId = ZKClusterId.readClusterIdZNode(zkWatcher);
376-
}
377-
} catch (IOException | KeeperException e) {
378-
throw new DoNotRetryIOException("Can't get peerClusterId for clusterKey=" + clusterKey, e);
379-
}
380-
// In rare case, zookeeper setting may be messed up. That leads to the incorrect
381-
// peerClusterId value, which is the same as the source clusterId
382-
if (clusterId.equals(peerClusterId)) {
383-
throw new DoNotRetryIOException("Invalid cluster key: " + clusterKey
384-
+ ", should not replicate to itself for HBaseInterClusterReplicationEndpoint");
385-
}
386-
}
387-
388360
public List<String> getSerialPeerIdsBelongsTo(TableName tableName) {
389361
return peers.values().stream().filter(p -> p.getPeerConfig().isSerial())
390362
.filter(p -> p.getPeerConfig().needToReplicate(tableName)).map(p -> p.getPeerId())
@@ -395,7 +367,7 @@ public ReplicationQueueStorage getQueueStorage() {
395367
return queueStorage;
396368
}
397369

398-
public static ReplicationPeerManager create(ZKWatcher zk, Configuration conf, String clusterId)
370+
public static ReplicationPeerManager create(ZKWatcher zk, Configuration conf)
399371
throws ReplicationException {
400372
ReplicationPeerStorage peerStorage =
401373
ReplicationStorageFactory.getReplicationPeerStorage(zk, conf);
@@ -406,7 +378,7 @@ public static ReplicationPeerManager create(ZKWatcher zk, Configuration conf, St
406378
peers.put(peerId, new ReplicationPeerDescription(peerId, enabled, peerConfig));
407379
}
408380
return new ReplicationPeerManager(peerStorage,
409-
ReplicationStorageFactory.getReplicationQueueStorage(zk, conf), peers, conf, clusterId);
381+
ReplicationStorageFactory.getReplicationQueueStorage(zk, conf), peers);
410382
}
411383

412384
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,16 @@ private void initialize() {
524524
if (!this.isSourceActive()) {
525525
return;
526526
}
527+
528+
// In rare case, zookeeper setting may be messed up. That leads to the incorrect
529+
// peerClusterId value, which is the same as the source clusterId
530+
if (clusterId.equals(peerClusterId) && !replicationEndpoint.canReplicateToSameCluster()) {
531+
this.terminate("ClusterId " + clusterId + " is replicating to itself: peerClusterId "
532+
+ peerClusterId + " which is not allowed by ReplicationEndpoint:"
533+
+ replicationEndpoint.getClass().getName(), null, false);
534+
this.manager.removeSource(this);
535+
return;
536+
}
527537
LOG.info("{} Source: {}, is now replicating from cluster: {}; to peer cluster: {};",
528538
logPeerId(), this.replicationQueueInfo.getQueueId(), clusterId, peerClusterId);
529539

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncReplicationAdminApi.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ public class TestAsyncReplicationAdminApi extends TestAsyncAdminBase {
7171
HBaseClassTestRule.forClass(TestAsyncReplicationAdminApi.class);
7272

7373
private final String ID_ONE = "1";
74-
private static String KEY_ONE;
74+
private final String KEY_ONE = "127.0.0.1:2181:/hbase";
7575
private final String ID_TWO = "2";
76-
private static String KEY_TWO;
76+
private final String KEY_TWO = "127.0.0.1:2181:/hbase2";
7777

7878
@BeforeClass
7979
public static void setUpBeforeClass() throws Exception {
@@ -82,8 +82,6 @@ public static void setUpBeforeClass() throws Exception {
8282
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
8383
TEST_UTIL.getConfiguration().setInt(START_LOG_ERRORS_AFTER_COUNT_KEY, 0);
8484
TEST_UTIL.startMiniCluster();
85-
KEY_ONE = TEST_UTIL.getClusterKey() + "-test1";
86-
KEY_TWO = TEST_UTIL.getClusterKey() + "-test2";
8785
ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get();
8886
}
8987

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/SerialReplicationTestBase.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,6 @@ protected void doStart() {
113113
protected void doStop() {
114114
notifyStopped();
115115
}
116-
117-
@Override
118-
public boolean canReplicateToSameCluster() {
119-
return true;
120-
}
121116
}
122117

123118
@BeforeClass

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestMasterReplication.java

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.io.Closeable;
2626
import java.io.IOException;
2727
import java.util.Arrays;
28+
import java.util.EnumSet;
2829
import java.util.List;
2930
import java.util.Optional;
3031
import java.util.Random;
@@ -33,14 +34,17 @@
3334
import org.apache.hadoop.fs.FileSystem;
3435
import org.apache.hadoop.fs.Path;
3536
import org.apache.hadoop.hbase.Cell;
36-
import org.apache.hadoop.hbase.DoNotRetryIOException;
37+
import org.apache.hadoop.hbase.ClusterMetrics;
3738
import org.apache.hadoop.hbase.HBaseClassTestRule;
3839
import org.apache.hadoop.hbase.HBaseConfiguration;
3940
import org.apache.hadoop.hbase.HBaseTestingUtility;
4041
import org.apache.hadoop.hbase.HConstants;
4142
import org.apache.hadoop.hbase.KeyValue;
4243
import org.apache.hadoop.hbase.MiniHBaseCluster;
44+
import org.apache.hadoop.hbase.ServerMetrics;
45+
import org.apache.hadoop.hbase.ServerName;
4346
import org.apache.hadoop.hbase.TableName;
47+
import org.apache.hadoop.hbase.Waiter;
4448
import org.apache.hadoop.hbase.client.Admin;
4549
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
4650
import org.apache.hadoop.hbase.client.ConnectionFactory;
@@ -68,7 +72,9 @@
6872
import org.apache.hadoop.hbase.util.HFileTestUtil;
6973
import org.apache.hadoop.hbase.wal.WALEdit;
7074
import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
75+
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
7176
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
77+
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
7278
import org.junit.After;
7379
import org.junit.Before;
7480
import org.junit.ClassRule;
@@ -170,16 +176,40 @@ public void testCyclicReplication1() throws Exception {
170176

171177
/**
172178
* Tests the replication scenario 0 -> 0. By default
173-
* {@link org.apache.hadoop.hbase.replication.regionserver.HBaseInterClusterReplicationEndpoint},
174-
* the replication peer should not be added.
179+
* {@link BaseReplicationEndpoint#canReplicateToSameCluster()} returns false, so the
180+
* ReplicationSource should terminate, and no further logs should get enqueued
175181
*/
176-
@Test(expected = DoNotRetryIOException.class)
177-
public void testLoopedReplication()
178-
throws Exception {
182+
@Test
183+
public void testLoopedReplication() throws Exception {
179184
LOG.info("testLoopedReplication");
180185
startMiniClusters(1);
181186
createTableOnClusters(table);
182187
addPeer("1", 0, 0);
188+
Thread.sleep(SLEEP_TIME);
189+
190+
// wait for source to terminate
191+
final ServerName rsName = utilities[0].getHBaseCluster().getRegionServer(0).getServerName();
192+
Waiter.waitFor(baseConfiguration, 10000, new Waiter.Predicate<Exception>() {
193+
@Override
194+
public boolean evaluate() throws Exception {
195+
ClusterMetrics clusterStatus = utilities[0].getAdmin()
196+
.getClusterMetrics(EnumSet.of(ClusterMetrics.Option.LIVE_SERVERS));
197+
ServerMetrics serverLoad = clusterStatus.getLiveServerMetrics().get(rsName);
198+
List<ReplicationLoadSource> replicationLoadSourceList =
199+
serverLoad.getReplicationLoadSourceList();
200+
return replicationLoadSourceList.isEmpty();
201+
}
202+
});
203+
204+
Table[] htables = getHTablesOnClusters(tableName);
205+
putAndWait(row, famName, htables[0], htables[0]);
206+
rollWALAndWait(utilities[0], table.getTableName(), row);
207+
ZKWatcher zkw = utilities[0].getZooKeeperWatcher();
208+
String queuesZnode = ZNodePaths.joinZNode(zkw.getZNodePaths().baseZNode,
209+
ZNodePaths.joinZNode("replication", "rs"));
210+
List<String> listChildrenNoWatch =
211+
ZKUtil.listChildrenNoWatch(zkw, ZNodePaths.joinZNode(queuesZnode, rsName.toString()));
212+
assertEquals(0, listChildrenNoWatch.size());
183213
}
184214

185215
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationEndpoint.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -500,11 +500,6 @@ protected void doStop() {
500500
stoppedCount.incrementAndGet();
501501
notifyStopped();
502502
}
503-
504-
@Override
505-
public boolean canReplicateToSameCluster() {
506-
return true;
507-
}
508503
}
509504

510505
public static class InterClusterReplicationEndpointForTest

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestRaceWhenCreatingReplicationSource.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,6 @@ protected void doStart() {
127127
protected void doStop() {
128128
notifyStopped();
129129
}
130-
131-
@Override
132-
public boolean canReplicateToSameCluster() {
133-
return true;
134-
}
135130
}
136131

137132
@BeforeClass

hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckCleanReplicationBarriers.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,8 @@ public void testCleanReplicationBarrierWithExistTable() throws Exception {
174174
}
175175

176176
public static void createPeer() throws IOException {
177-
ReplicationPeerConfig rpc =
178-
ReplicationPeerConfig.newBuilder().setClusterKey(UTIL.getClusterKey() + "-test")
179-
.setSerial(true).build();
177+
ReplicationPeerConfig rpc = ReplicationPeerConfig.newBuilder()
178+
.setClusterKey(UTIL.getClusterKey()).setSerial(true).build();
180179
UTIL.getAdmin().addReplicationPeer(PEER_1, rpc);
181180
UTIL.getAdmin().addReplicationPeer(PEER_2, rpc);
182181
}

0 commit comments

Comments
 (0)