Skip to content

Commit 1690414

Browse files
committed
HBASE-23735 [Flakey Tests] TestClusterRestartFailover & TestClusterRestartFailoverSplitWithoutZk
1 parent 753cc99 commit 1690414

1 file changed

Lines changed: 21 additions & 4 deletions

File tree

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -40,8 +40,10 @@
4040
import org.apache.hadoop.hbase.master.assignment.ServerStateNode;
4141
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
4242
import org.apache.hadoop.hbase.procedure2.Procedure;
43+
import org.apache.hadoop.hbase.regionserver.HRegionServer;
4344
import org.apache.hadoop.hbase.testclassification.LargeTests;
4445
import org.apache.hadoop.hbase.testclassification.MasterTests;
46+
import org.apache.hadoop.hbase.util.JVMClusterUtil;
4547
import org.apache.zookeeper.KeeperException;
4648
import org.junit.ClassRule;
4749
import org.junit.Test;
@@ -58,7 +60,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {
5860

5961
private static final Logger LOG = LoggerFactory.getLogger(TestClusterRestartFailover.class);
6062

61-
private static CountDownLatch SCP_LATCH;
63+
private volatile static CountDownLatch SCP_LATCH;
6264
private static ServerName SERVER_FOR_TEST;
6365

6466
@Override
@@ -79,7 +81,16 @@ public void test() throws Exception {
7981
setupCluster();
8082
setupTable();
8183

82-
SERVER_FOR_TEST = UTIL.getHBaseCluster().getRegionServer(0).getServerName();
84+
// Find server that does not have hbase:namespace on it. This tests holds up SCPs. If it
85+
// holds up the server w/ hbase:namespace, the Master initialization will be held up
86+
// because this table is not online and test fails.
87+
for (JVMClusterUtil.RegionServerThread rst:
88+
UTIL.getHBaseCluster().getLiveRegionServerThreads()) {
89+
HRegionServer rs = rst.getRegionServer();
90+
if (rs.getRegions(TableName.NAMESPACE_TABLE_NAME).isEmpty()) {
91+
SERVER_FOR_TEST = rs.getServerName();
92+
}
93+
}
8394
UTIL.waitFor(60000, () -> getServerStateNode(SERVER_FOR_TEST) != null);
8495
ServerStateNode serverNode = getServerStateNode(SERVER_FOR_TEST);
8596
assertNotNull(serverNode);
@@ -98,8 +109,9 @@ public void test() throws Exception {
98109
LOG.info("Restarting cluster");
99110
UTIL.restartHBaseCluster(StartMiniClusterOption.builder().masterClass(HMasterForTest.class)
100111
.numMasters(1).numRegionServers(3).rsPorts(ports).build());
112+
LOG.info("Started cluster");
101113
UTIL.waitFor(60000, () -> UTIL.getHBaseCluster().getMaster().isInitialized());
102-
114+
LOG.info("Started cluster master, waiting for {}", SERVER_FOR_TEST);
103115
UTIL.waitFor(60000, () -> getServerStateNode(SERVER_FOR_TEST) != null);
104116
serverNode = getServerStateNode(SERVER_FOR_TEST);
105117
assertFalse("serverNode should not be ONLINE during SCP processing",
@@ -113,6 +125,7 @@ public void test() throws Exception {
113125
Procedure.NO_PROC_ID);
114126

115127
// Wait the SCP to finish
128+
LOG.info("Waiting on latch");
116129
SCP_LATCH.countDown();
117130
UTIL.waitFor(60000, () -> procedure.get().isFinished());
118131

@@ -126,13 +139,17 @@ public void test() throws Exception {
126139
}
127140

128141
private void setupCluster() throws Exception {
142+
LOG.info("Setup cluster");
129143
UTIL.startMiniCluster(
130144
StartMiniClusterOption.builder().masterClass(HMasterForTest.class).numMasters(1)
131145
.numRegionServers(3).build());
146+
LOG.info("Cluster is up");
132147
UTIL.waitFor(60000, () -> UTIL.getMiniHBaseCluster().getMaster().isInitialized());
148+
LOG.info("Master is up");
133149
// wait for all SCPs finished
134150
UTIL.waitFor(60000, () -> UTIL.getHBaseCluster().getMaster().getProcedures().stream()
135151
.noneMatch(p -> p instanceof ServerCrashProcedure));
152+
LOG.info("No SCPs");
136153
}
137154

138155
private void setupTable() throws Exception {

0 commit comments

Comments
 (0)