use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TableSnapshotInputFormatTestBase method setupCluster.
@Before
public void setupCluster() throws Exception {
setupConf(UTIL.getConfiguration());
StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(NUM_REGION_SERVERS).numDataNodes(NUM_REGION_SERVERS).createRootDir(true).build();
UTIL.startMiniCluster(option);
rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
fs = rootDir.getFileSystem(UTIL.getConfiguration());
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestReplicasClient method beforeClass.
@BeforeClass
public static void beforeClass() throws Exception {
// enable store file refreshing
HTU.getConfiguration().setInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, REFRESH_PERIOD);
HTU.getConfiguration().setBoolean("hbase.client.log.scanner.activity", true);
HTU.getConfiguration().setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);
StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(1).numAlwaysStandByMasters(1).numMasters(1).build();
HTU.startMiniCluster(option);
// Create table then get the single region for our new table.
TableDescriptorBuilder builder = HTU.createModifyableTableDescriptor(TableName.valueOf(TestReplicasClient.class.getSimpleName()), ColumnFamilyDescriptorBuilder.DEFAULT_MIN_VERSIONS, 3, HConstants.FOREVER, ColumnFamilyDescriptorBuilder.DEFAULT_KEEP_DELETED);
builder.setCoprocessor(SlowMeCopro.class.getName());
TableDescriptor hdt = builder.build();
HTU.createTable(hdt, new byte[][] { f }, null);
TABLE_NAME = hdt.getTableName();
try (RegionLocator locator = HTU.getConnection().getRegionLocator(hdt.getTableName())) {
hriPrimary = locator.getRegionLocation(row, false).getRegion();
}
// mock a secondary region info to open
hriSecondary = RegionReplicaUtil.getRegionInfoForReplica(hriPrimary, 1);
// No master
LOG.info("Master is going to be stopped");
TestRegionServerNoMaster.stopMasterAndCacheMetaLocation(HTU);
Configuration c = new Configuration(HTU.getConfiguration());
c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
LOG.info("Master has stopped");
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestRollingRestart method testBasicRollingRestart.
@Test
public void testBasicRollingRestart() throws Exception {
// Start a cluster with 2 masters and 4 regionservers
final int NUM_MASTERS = 2;
final int NUM_RS = 3;
final int NUM_REGIONS_TO_CREATE = 20;
int expectedNumRS = 3;
// Start the cluster
log("Starting cluster");
Configuration conf = HBaseConfiguration.create();
conf.setBoolean(HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK, splitWALCoordinatedByZK);
TEST_UTIL = new HBaseTestingUtil(conf);
StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(NUM_MASTERS).numRegionServers(NUM_RS).numDataNodes(NUM_RS).build();
TEST_UTIL.startMiniCluster(option);
SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
log("Waiting for active/ready master");
cluster.waitForActiveAndReadyMaster();
// Create a table with regions
final TableName tableName = TableName.valueOf(name.getMethodName().replaceAll("[\\[|\\]]", "-"));
byte[] family = Bytes.toBytes("family");
log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
int numRegions = -1;
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
numRegions = r.getStartKeys().length;
}
// catalogs
numRegions += 1;
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Disabling table\n");
TEST_UTIL.getAdmin().disableTable(tableName);
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
NavigableSet<String> regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
log("Verifying only catalog region is assigned\n");
if (regions.size() != 1) {
for (String oregion : regions) {
log("Region still online: " + oregion);
}
}
assertEquals(1, regions.size());
log("Enabling table\n");
TEST_UTIL.getAdmin().enableTable(tableName);
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster\n");
regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
assertRegionsAssigned(cluster, regions);
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
// Add a new regionserver
log("Adding a fourth RS");
RegionServerThread restarted = cluster.startRegionServer();
expectedNumRS++;
restarted.waitForServerOnline();
log("Additional RS is online");
log("Waiting for no more RIT");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster");
assertRegionsAssigned(cluster, regions);
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
// Master Restarts
List<MasterThread> masterThreads = cluster.getMasterThreads();
MasterThread activeMaster = null;
MasterThread backupMaster = null;
assertEquals(2, masterThreads.size());
if (masterThreads.get(0).getMaster().isActiveMaster()) {
activeMaster = masterThreads.get(0);
backupMaster = masterThreads.get(1);
} else {
activeMaster = masterThreads.get(1);
backupMaster = masterThreads.get(0);
}
// Bring down the backup master
log("Stopping backup master\n\n");
backupMaster.getMaster().stop("Stop of backup during rolling restart");
cluster.hbaseCluster.waitOnMaster(backupMaster);
// Bring down the primary master
log("Stopping primary master\n\n");
activeMaster.getMaster().stop("Stop of active during rolling restart");
cluster.hbaseCluster.waitOnMaster(activeMaster);
// Start primary master
log("Restarting primary master\n\n");
activeMaster = cluster.startMaster();
cluster.waitForActiveAndReadyMaster();
// Start backup master
log("Restarting backup master\n\n");
backupMaster = cluster.startMaster();
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
// RegionServer Restarts
// Bring them down, one at a time, waiting between each to complete
List<RegionServerThread> regionServers = cluster.getLiveRegionServerThreads();
int num = 1;
int total = regionServers.size();
for (RegionServerThread rst : regionServers) {
ServerName serverName = rst.getRegionServer().getServerName();
log("Stopping region server " + num + " of " + total + " [ " + serverName + "]");
rst.getRegionServer().stop("Stopping RS during rolling restart");
cluster.hbaseCluster.waitOnRegionServer(rst);
log("Waiting for RS shutdown to be handled by master");
waitForRSShutdownToStartAndFinish(activeMaster, serverName);
log("RS shutdown done, waiting for no more RIT");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster");
assertRegionsAssigned(cluster, regions);
expectedNumRS--;
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
log("Restarting region server " + num + " of " + total);
restarted = cluster.startRegionServer();
restarted.waitForServerOnline();
expectedNumRS++;
log("Region server " + num + " is back online");
log("Waiting for no more RIT");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster");
assertRegionsAssigned(cluster, regions);
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
num++;
}
Thread.sleep(1000);
assertRegionsAssigned(cluster, regions);
// TODO: Bring random 3 of 4 RS down at the same time
ht.close();
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestMasterShutdown method testMasterShutdown.
/**
* Simple test of shutdown.
* <p>
* Starts with three masters. Tells the active master to shutdown the cluster.
* Verifies that all masters are properly shutdown.
*/
@Test
public void testMasterShutdown() throws Exception {
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
// Start the cluster
try {
htu = new HBaseTestingUtil(conf);
StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(3).numRegionServers(1).numDataNodes(1).build();
final SingleProcessHBaseCluster cluster = htu.startMiniCluster(option);
// wait for all master thread to spawn and start their run loop.
final long thirtySeconds = TimeUnit.SECONDS.toMillis(30);
final long oneSecond = TimeUnit.SECONDS.toMillis(1);
assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond, () -> {
final List<MasterThread> masterThreads = cluster.getMasterThreads();
return masterThreads != null && masterThreads.size() >= 3 && masterThreads.stream().allMatch(Thread::isAlive);
}));
// find the active master
final HMaster active = cluster.getMaster();
assertNotNull(active);
// make sure the other two are backup masters
ClusterMetrics status = active.getClusterMetrics();
assertEquals(2, status.getBackupMasterNames().size());
// tell the active master to shutdown the cluster
active.shutdown();
assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond, () -> CollectionUtils.isEmpty(cluster.getLiveMasterThreads())));
assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond, () -> CollectionUtils.isEmpty(cluster.getLiveRegionServerThreads())));
} finally {
if (htu != null) {
htu.shutdownMiniCluster();
htu = null;
}
}
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestMasterShutdown method testMasterShutdownBeforeStartingAnyRegionServer.
/**
* This test appears to be an intentional race between a thread that issues a shutdown RPC to the
* master, while the master is concurrently realizing it cannot initialize because there are no
* region servers available to it. The expected behavior is that master initialization is
* interruptable via said shutdown RPC.
*/
@Test
public void testMasterShutdownBeforeStartingAnyRegionServer() throws Exception {
LocalHBaseCluster hbaseCluster = null;
try {
htu = new HBaseTestingUtil(createMasterShutdownBeforeStartingAnyRegionServerConfiguration());
// configure a cluster with
final StartTestingClusterOption options = StartTestingClusterOption.builder().numDataNodes(1).numMasters(1).numRegionServers(0).masterClass(HMaster.class).rsClass(SingleProcessHBaseCluster.MiniHBaseClusterRegionServer.class).createRootDir(true).build();
// Can't simply `htu.startMiniCluster(options)` because that method waits for the master to
// start completely. However, this test's premise is that a partially started master should
// still respond to a shutdown RPC. So instead, we manage each component lifecycle
// independently.
// I think it's not worth refactoring HTU's helper methods just for this class.
htu.startMiniDFSCluster(options.getNumDataNodes());
htu.startMiniZKCluster(options.getNumZkServers());
htu.createRootDir();
hbaseCluster = new LocalHBaseCluster(htu.getConfiguration(), options.getNumMasters(), options.getNumRegionServers(), options.getMasterClass(), options.getRsClass());
final MasterThread masterThread = hbaseCluster.getMasters().get(0);
masterThread.start();
// Switching to master registry exacerbated a race in the master bootstrap that can result
// in a lost shutdown command (HBASE-8422, HBASE-23836). The race is essentially because
// the server manager in HMaster is not initialized by the time shutdown() RPC (below) is
// made to the master. The suspected reason as to why it was uncommon before HBASE-18095
// is because the connection creation with ZK registry is so slow that by then the server
// manager is usually init'ed in time for the RPC to be made. For now, adding an explicit
// wait() in the test, waiting for the server manager to become available.
final long timeout = TimeUnit.MINUTES.toMillis(10);
assertNotEquals("timeout waiting for server manager to become available.", -1, htu.waitFor(timeout, () -> masterThread.getMaster().getServerManager() != null));
// Master has come up far enough that we can terminate it without creating a zombie.
try {
// HBASE-24327 : (Resolve Flaky connection issues)
// shutdown() RPC can have flaky ZK connection issues.
// e.g
// ERROR [RpcServer.priority.RWQ.Fifo.read.handler=1,queue=1,port=53033]
// master.HMaster(2878): ZooKeeper exception trying to set cluster as down in ZK
// org.apache.zookeeper.KeeperException$SystemErrorException:
// KeeperErrorCode = SystemError
//
// However, even when above flakes happen, shutdown call does get completed even if
// RPC call has failure. Hence, subsequent retries will never succeed as HMaster is
// already shutdown. Hence, it can fail. To resolve it, after making one shutdown()
// call, we are ignoring IOException.
htu.getConnection().getAdmin().shutdown();
} catch (RetriesExhaustedException e) {
if (e.getCause() instanceof ConnectionClosedException) {
LOG.info("Connection is Closed to the cluster. The cluster is already down.", e);
} else {
throw e;
}
}
LOG.info("Shutdown RPC sent.");
masterThread.join();
} finally {
if (hbaseCluster != null) {
hbaseCluster.shutdown();
}
if (htu != null) {
htu.shutdownMiniCluster();
htu = null;
}
}
}
Aggregations