1- /**
1+ /*
22 * Licensed to the Apache Software Foundation (ASF) under one
33 * or more contributor license agreements. See the NOTICE file
44 * distributed with this work for additional information
4040import org .apache .hadoop .hbase .master .assignment .ServerStateNode ;
4141import org .apache .hadoop .hbase .master .procedure .ServerCrashProcedure ;
4242import org .apache .hadoop .hbase .procedure2 .Procedure ;
43+ import org .apache .hadoop .hbase .regionserver .HRegionServer ;
4344import org .apache .hadoop .hbase .testclassification .LargeTests ;
4445import org .apache .hadoop .hbase .testclassification .MasterTests ;
46+ import org .apache .hadoop .hbase .util .JVMClusterUtil ;
4547import org .apache .zookeeper .KeeperException ;
4648import org .junit .ClassRule ;
4749import org .junit .Test ;
@@ -58,7 +60,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {
5860
5961 private static final Logger LOG = LoggerFactory .getLogger (TestClusterRestartFailover .class );
6062
61- private static CountDownLatch SCP_LATCH ;
63+ private volatile static CountDownLatch SCP_LATCH ;
6264 private static ServerName SERVER_FOR_TEST ;
6365
6466 @ Override
@@ -79,7 +81,16 @@ public void test() throws Exception {
7981 setupCluster ();
8082 setupTable ();
8183
82- SERVER_FOR_TEST = UTIL .getHBaseCluster ().getRegionServer (0 ).getServerName ();
84+ // Find server that does not have hbase:namespace on it. This tests holds up SCPs. If it
85+ // holds up the server w/ hbase:namespace, the Master initialization will be held up
86+ // because this table is not online and test fails.
87+ for (JVMClusterUtil .RegionServerThread rst :
88+ UTIL .getHBaseCluster ().getLiveRegionServerThreads ()) {
89+ HRegionServer rs = rst .getRegionServer ();
90+ if (rs .getRegions (TableName .NAMESPACE_TABLE_NAME ).isEmpty ()) {
91+ SERVER_FOR_TEST = rs .getServerName ();
92+ }
93+ }
8394 UTIL .waitFor (60000 , () -> getServerStateNode (SERVER_FOR_TEST ) != null );
8495 ServerStateNode serverNode = getServerStateNode (SERVER_FOR_TEST );
8596 assertNotNull (serverNode );
@@ -98,8 +109,9 @@ public void test() throws Exception {
98109 LOG .info ("Restarting cluster" );
99110 UTIL .restartHBaseCluster (StartMiniClusterOption .builder ().masterClass (HMasterForTest .class )
100111 .numMasters (1 ).numRegionServers (3 ).rsPorts (ports ).build ());
112+ LOG .info ("Started cluster" );
101113 UTIL .waitFor (60000 , () -> UTIL .getHBaseCluster ().getMaster ().isInitialized ());
102-
114+ LOG . info ( "Started cluster master, waiting for {}" , SERVER_FOR_TEST );
103115 UTIL .waitFor (60000 , () -> getServerStateNode (SERVER_FOR_TEST ) != null );
104116 serverNode = getServerStateNode (SERVER_FOR_TEST );
105117 assertFalse ("serverNode should not be ONLINE during SCP processing" ,
@@ -113,6 +125,7 @@ public void test() throws Exception {
113125 Procedure .NO_PROC_ID );
114126
115127 // Wait the SCP to finish
128+ LOG .info ("Waiting on latch" );
116129 SCP_LATCH .countDown ();
117130 UTIL .waitFor (60000 , () -> procedure .get ().isFinished ());
118131
@@ -126,13 +139,17 @@ public void test() throws Exception {
126139 }
127140
128141 private void setupCluster () throws Exception {
142+ LOG .info ("Setup cluster" );
129143 UTIL .startMiniCluster (
130144 StartMiniClusterOption .builder ().masterClass (HMasterForTest .class ).numMasters (1 )
131145 .numRegionServers (3 ).build ());
146+ LOG .info ("Cluster is up" );
132147 UTIL .waitFor (60000 , () -> UTIL .getMiniHBaseCluster ().getMaster ().isInitialized ());
148+ LOG .info ("Master is up" );
133149 // wait for all SCPs finished
134150 UTIL .waitFor (60000 , () -> UTIL .getHBaseCluster ().getMaster ().getProcedures ().stream ()
135151 .noneMatch (p -> p instanceof ServerCrashProcedure ));
152+ LOG .info ("No SCPs" );
136153 }
137154
138155 private void setupTable () throws Exception {
0 commit comments