use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestMasterFailover method testSimpleMasterFailover.
/**
* Simple test of master failover.
* <p>
* Starts with three masters. Kills a backup master. Then kills the active
* master. Ensures the final master becomes active and we can still contact
* the cluster.
*/
@Test
public void testSimpleMasterFailover() throws Exception {
final int NUM_MASTERS = 3;
final int NUM_RS = 3;
// Start the cluster
HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
try {
StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(NUM_MASTERS).numRegionServers(NUM_RS).numDataNodes(NUM_RS).build();
TEST_UTIL.startMiniCluster(option);
SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
// wait for each to come online
for (MasterThread mt : masterThreads) {
assertTrue(mt.isAlive());
}
// verify only one is the active master and we have right number
int numActive = 0;
int activeIndex = -1;
ServerName activeName = null;
HMaster active = null;
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
numActive++;
activeIndex = i;
active = masterThreads.get(activeIndex).getMaster();
activeName = active.getServerName();
}
}
assertEquals(1, numActive);
assertEquals(NUM_MASTERS, masterThreads.size());
LOG.info("Active master " + activeName);
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);
ClusterMetrics status = active.getClusterMetrics();
assertEquals(activeName, status.getMasterName());
assertEquals(2, status.getBackupMasterNames().size());
// attempt to stop one of the inactive masters
int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
HMaster master = cluster.getMaster(backupIndex);
LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
cluster.stopMaster(backupIndex, false);
cluster.waitOnMaster(backupIndex);
// Verify still one active master and it's the same
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
assertEquals(activeName, masterThreads.get(i).getMaster().getServerName());
activeIndex = i;
active = masterThreads.get(activeIndex).getMaster();
}
}
assertEquals(1, numActive);
assertEquals(2, masterThreads.size());
int rsCount = masterThreads.get(activeIndex).getMaster().getClusterMetrics().getLiveServerMetrics().size();
LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
assertEquals(3, rsCount);
// wait for the active master to acknowledge loss of the backup from ZK
final HMaster activeFinal = active;
TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), () -> activeFinal.getBackupMasters().size() == 1);
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);
status = active.getClusterMetrics();
assertEquals(activeName, status.getMasterName());
assertEquals(1, status.getBackupMasterNames().size());
// kill the active master
LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
cluster.stopMaster(activeIndex, false);
cluster.waitOnMaster(activeIndex);
// wait for an active master to show up and be ready
assertTrue(cluster.waitForActiveAndReadyMaster());
LOG.debug("\n\nVerifying backup master is now active\n");
// should only have one master now
assertEquals(1, masterThreads.size());
// and he should be active
active = masterThreads.get(0).getMaster();
assertNotNull(active);
status = active.getClusterMetrics();
ServerName masterName = status.getMasterName();
assertNotNull(masterName);
assertEquals(active.getServerName(), masterName);
assertTrue(active.isActiveMaster());
assertEquals(0, status.getBackupMasterNames().size());
int rss = status.getLiveServerMetrics().size();
LOG.info("Active master {} managing {} region servers", masterName.getServerName(), rss);
assertEquals(3, rss);
} finally {
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestMasterFailoverBalancerPersistence method testMasterFailoverBalancerPersistence.
/**
* Test that if the master fails, the load balancer maintains its
* state (running or not) when the next master takes over
*
* @throws Exception
*/
@Test
public void testMasterFailoverBalancerPersistence() throws Exception {
// Start the cluster
HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(3).build();
TEST_UTIL.startMiniCluster(option);
SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
assertTrue(cluster.waitForActiveAndReadyMaster());
HMaster active = cluster.getMaster();
// check that the balancer is on by default for the active master
ClusterMetrics clusterStatus = active.getClusterMetrics();
assertTrue(clusterStatus.getBalancerOn());
active = killActiveAndWaitForNewActive(cluster);
// ensure the load balancer is still running on new master
clusterStatus = active.getClusterMetrics();
assertTrue(clusterStatus.getBalancerOn());
// turn off the load balancer
active.balanceSwitch(false);
// once more, kill active master and wait for new active master to show up
active = killActiveAndWaitForNewActive(cluster);
// ensure the load balancer is not running on the new master
clusterStatus = active.getClusterMetrics();
assertFalse(clusterStatus.getBalancerOn());
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestMasterMetrics method startCluster.
@BeforeClass
public static void startCluster() throws Exception {
LOG.info("Starting cluster");
// Set master class and use default values for other options.
StartTestingClusterOption option = StartTestingClusterOption.builder().masterClass(MyMaster.class).rsClass(MyRegionServer.class).build();
TEST_UTIL.startMiniCluster(option);
cluster = TEST_UTIL.getHBaseCluster();
LOG.info("Waiting for active/ready master");
cluster.waitForActiveAndReadyMaster();
master = cluster.getMaster();
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestMasterOperationsForRegionReplicas method testCreateTableWithMultipleReplicas.
@Test
public void testCreateTableWithMultipleReplicas() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final int numRegions = 3;
final int numReplica = 2;
try {
TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName).setRegionReplication(numReplica).setColumnFamily(ColumnFamilyDescriptorBuilder.of("family")).build();
ADMIN.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), numRegions);
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
TEST_UTIL.waitUntilNoRegionsInTransition();
validateNumberOfRowsInMeta(tableName, numRegions, ADMIN.getConnection());
List<RegionInfo> hris = MetaTableAccessor.getTableRegions(ADMIN.getConnection(), tableName);
assertEquals(numRegions * numReplica, hris.size());
assertRegionStateNotNull(hris, numRegions, numReplica);
List<Result> metaRows = MetaTableAccessor.fullScanRegions(ADMIN.getConnection());
int numRows = 0;
for (Result result : metaRows) {
RegionLocations locations = CatalogFamilyFormat.getRegionLocations(result);
RegionInfo hri = locations.getRegionLocation().getRegion();
if (!hri.getTable().equals(tableName))
continue;
numRows += 1;
HRegionLocation[] servers = locations.getRegionLocations();
// have two locations for the replicas of a region, and the locations should be different
assertEquals(2, servers.length);
assertNotEquals(servers[1], servers[0]);
}
assertEquals(numRegions, numRows);
// The same verification of the meta as above but with the SnapshotOfRegionAssignmentFromMeta
// class
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
// Now kill the master, restart it and see if the assignments are kept
ServerName master = TEST_UTIL.getHBaseClusterInterface().getClusterMetrics().getMasterName();
TEST_UTIL.getHBaseClusterInterface().stopMaster(master);
TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(master, 30000);
TEST_UTIL.getHBaseClusterInterface().startMaster(master.getHostname(), master.getPort());
TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster();
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
TEST_UTIL.waitUntilNoRegionsInTransition();
assertRegionStateNotNull(hris, numRegions, numReplica);
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
// Now shut the whole cluster down, and verify the assignments are kept so that the
// availability constraints are met. MiniHBaseCluster chooses arbitrary ports on each
// restart. This messes with our being able to test that we retain locality. Therefore,
// figure current cluster ports and pass them in on next cluster start so new cluster comes
// up at same coordinates -- and the assignment retention logic has a chance to cut in.
List<Integer> rsports = new ArrayList<>();
for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads()) {
rsports.add(rst.getRegionServer().getRpcServer().getListenerAddress().getPort());
}
TEST_UTIL.shutdownMiniHBaseCluster();
StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(numSlaves).rsPorts(rsports).build();
TEST_UTIL.startMiniHBaseCluster(option);
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
TEST_UTIL.waitUntilNoRegionsInTransition();
resetConnections();
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
// Now shut the whole cluster down, and verify regions are assigned even if there is only
// one server running
TEST_UTIL.shutdownMiniHBaseCluster();
TEST_UTIL.startMiniHBaseCluster();
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
TEST_UTIL.waitUntilNoRegionsInTransition();
resetConnections();
validateSingleRegionServerAssignment(ADMIN.getConnection(), numRegions, numReplica);
for (int i = 1; i < numSlaves; i++) {
// restore the cluster
TEST_UTIL.getMiniHBaseCluster().startRegionServer();
}
// Check on alter table
ADMIN.disableTable(tableName);
assertTrue(ADMIN.isTableDisabled(tableName));
// increase the replica
ADMIN.modifyTable(TableDescriptorBuilder.newBuilder(desc).setRegionReplication(numReplica + 1).build());
ADMIN.enableTable(tableName);
LOG.info(ADMIN.getDescriptor(tableName).toString());
assertTrue(ADMIN.isTableEnabled(tableName));
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
TEST_UTIL.waitUntilNoRegionsInTransition();
List<RegionInfo> regions = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionsOfTable(tableName);
assertTrue("regions.size=" + regions.size() + ", numRegions=" + numRegions + ", numReplica=" + numReplica, regions.size() == numRegions * (numReplica + 1));
// decrease the replica(earlier, table was modified to have a replica count of numReplica + 1)
ADMIN.disableTable(tableName);
ADMIN.modifyTable(TableDescriptorBuilder.newBuilder(desc).setRegionReplication(numReplica).build());
ADMIN.enableTable(tableName);
assertTrue(ADMIN.isTableEnabled(tableName));
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
TEST_UTIL.waitUntilNoRegionsInTransition();
regions = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionsOfTable(tableName);
assertEquals(numRegions * numReplica, regions.size());
// also make sure the meta table has the replica locations removed
hris = MetaTableAccessor.getTableRegions(ADMIN.getConnection(), tableName);
assertEquals(numRegions * numReplica, hris.size());
// just check that the number of default replica regions in the meta table are the same
// as the number of regions the table was created with, and the count of the
// replicas is numReplica for each region
Map<RegionInfo, Integer> defaultReplicas = new HashMap<>();
for (RegionInfo hri : hris) {
RegionInfo regionReplica0 = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
Integer i = defaultReplicas.get(regionReplica0);
defaultReplicas.put(regionReplica0, i == null ? 1 : i + 1);
}
assertEquals(numRegions, defaultReplicas.size());
Collection<Integer> counts = new HashSet<>(defaultReplicas.values());
assertEquals(1, counts.size());
assertTrue(counts.contains(numReplica));
} finally {
ADMIN.disableTable(tableName);
ADMIN.deleteTable(tableName);
}
}
use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.
the class TestMasterRestartAfterDisablingTable method testForCheckingIfEnableAndDisableWorksFineAfterSwitch.
@Test
public void testForCheckingIfEnableAndDisableWorksFineAfterSwitch() throws Exception {
final int NUM_MASTERS = 2;
final int NUM_REGIONS_TO_CREATE = 4;
// Start the cluster
log("Starting cluster");
Configuration conf = HBaseConfiguration.create();
HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(conf);
StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(NUM_MASTERS).build();
TEST_UTIL.startMiniCluster(option);
SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
log("Waiting for active/ready master");
cluster.waitForActiveAndReadyMaster();
// Create a table with regions
final TableName tableName = TableName.valueOf(name.getMethodName());
byte[] family = Bytes.toBytes("family");
log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
int numRegions = -1;
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
numRegions = r.getStartKeys().length;
}
// catalogs
numRegions += 1;
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Disabling table\n");
TEST_UTIL.getAdmin().disableTable(tableName);
NavigableSet<String> regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
assertEquals("The number of regions for the table tableRestart should be 0 and only" + "the catalog table should be present.", 1, regions.size());
List<MasterThread> masterThreads = cluster.getMasterThreads();
MasterThread activeMaster = null;
if (masterThreads.get(0).getMaster().isActiveMaster()) {
activeMaster = masterThreads.get(0);
} else {
activeMaster = masterThreads.get(1);
}
activeMaster.getMaster().stop("stopping the active master so that the backup can become active");
cluster.hbaseCluster.waitOnMaster(activeMaster);
cluster.waitForActiveAndReadyMaster();
assertTrue("The table should not be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.DISABLED, TableState.State.DISABLING));
log("Enabling table\n");
// Need a new Admin, the previous one is on the old master
Admin admin = TEST_UTIL.getAdmin();
admin.enableTable(tableName);
admin.close();
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster\n");
regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
assertEquals("The assigned regions were not onlined after master" + " switch except for the catalog table.", 5, regions.size());
assertTrue("The table should be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.ENABLED));
ht.close();
TEST_UTIL.shutdownMiniCluster();
}
Aggregations