use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestDistributedLogSplitting method after.
@After
public void after() throws Exception {
try {
if (TEST_UTIL.getHBaseCluster() != null) {
for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
mt.getMaster().abort("closing...", null);
}
}
TEST_UTIL.shutdownMiniHBaseCluster();
} finally {
TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
}
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestMasterFailover method testSimpleMasterFailover.
/**
* Simple test of master failover.
* <p>
* Starts with three masters. Kills a backup master. Then kills the active
* master. Ensures the final master becomes active and we can still contact
* the cluster.
* @throws Exception
*/
@Test(timeout = 240000)
public void testSimpleMasterFailover() throws Exception {
final int NUM_MASTERS = 3;
final int NUM_RS = 3;
// Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
// wait for each to come online
for (MasterThread mt : masterThreads) {
assertTrue(mt.isAlive());
}
// verify only one is the active master and we have right number
int numActive = 0;
int activeIndex = -1;
ServerName activeName = null;
HMaster active = null;
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
numActive++;
activeIndex = i;
active = masterThreads.get(activeIndex).getMaster();
activeName = active.getServerName();
}
}
assertEquals(1, numActive);
assertEquals(NUM_MASTERS, masterThreads.size());
LOG.info("Active master " + activeName);
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);
ClusterStatus status = active.getClusterStatus();
assertTrue(status.getMaster().equals(activeName));
assertEquals(2, status.getBackupMastersSize());
assertEquals(2, status.getBackupMasters().size());
// attempt to stop one of the inactive masters
int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
HMaster master = cluster.getMaster(backupIndex);
LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
cluster.stopMaster(backupIndex, false);
cluster.waitOnMaster(backupIndex);
// Verify still one active master and it's the same
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
activeIndex = i;
active = masterThreads.get(activeIndex).getMaster();
}
}
assertEquals(1, numActive);
assertEquals(2, masterThreads.size());
int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
assertEquals(4, rsCount);
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);
status = active.getClusterStatus();
assertTrue(status.getMaster().equals(activeName));
assertEquals(1, status.getBackupMastersSize());
assertEquals(1, status.getBackupMasters().size());
// kill the active master
LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
cluster.stopMaster(activeIndex, false);
cluster.waitOnMaster(activeIndex);
// wait for an active master to show up and be ready
assertTrue(cluster.waitForActiveAndReadyMaster());
LOG.debug("\n\nVerifying backup master is now active\n");
// should only have one master now
assertEquals(1, masterThreads.size());
// and he should be active
active = masterThreads.get(0).getMaster();
assertNotNull(active);
status = active.getClusterStatus();
ServerName mastername = status.getMaster();
assertTrue(mastername.equals(active.getServerName()));
assertTrue(active.isActiveMaster());
assertEquals(0, status.getBackupMastersSize());
assertEquals(0, status.getBackupMasters().size());
int rss = status.getServersSize();
LOG.info("Active master " + mastername.getServerName() + " managing " + rss + " region servers");
assertEquals(4, rss);
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestMasterFailover method testPendingOpenOrCloseWhenMasterFailover.
/**
* Test region in pending_open/close when master failover
*/
@Test(timeout = 180000)
public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
final int NUM_MASTERS = 1;
final int NUM_RS = 1;
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
// Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
log("Cluster started");
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
assertEquals(1, masterThreads.size());
// only one master thread, let's wait for it to be initialized
assertTrue(cluster.waitForActiveAndReadyMaster());
HMaster master = masterThreads.get(0).getMaster();
assertTrue(master.isActiveMaster());
assertTrue(master.isInitialized());
// Create a table with a region online
Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
onlineTable.close();
// Create a table in META, so it has a region offline
HTableDescriptor offlineTable = new HTableDescriptor(TableName.valueOf(Bytes.toBytes("offlineTable")));
offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
FileSystem filesystem = FileSystem.get(conf);
Path rootdir = FSUtils.getRootDir(conf);
FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
fstd.createTableDescriptor(offlineTable);
HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
createRegion(hriOffline, rootdir, conf, offlineTable);
MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
log("Regions in hbase:meta and namespace have been created");
// at this point we only expect 3 regions to be assigned out
// (catalogs and namespace, + 1 online region)
assertEquals(3, cluster.countServedRegions());
HRegionInfo hriOnline = null;
try (RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
}
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
// Put the online region in pending_close. It is actually already opened.
// This is to simulate that the region close RPC is not sent out before failover
RegionState oldState = regionStates.getRegionState(hriOnline);
RegionState newState = new RegionState(hriOnline, State.PENDING_CLOSE, oldState.getServerName());
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
// Put the offline region in pending_open. It is actually not opened yet.
// This is to simulate that the region open RPC is not sent out before failover
oldState = new RegionState(hriOffline, State.OFFLINE);
newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
createRegion(failedClose, rootdir, conf, offlineTable);
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
oldState = new RegionState(failedClose, State.PENDING_CLOSE);
newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
createRegion(failedOpen, rootdir, conf, offlineTable);
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
// Simulate a region transitioning to failed open when the region server reports the
// transition as FAILED_OPEN
oldState = new RegionState(failedOpen, State.PENDING_OPEN);
newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
// Simulate a region transitioning to failed open when the master couldn't find a plan for
// the region
oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
// Stop the master
log("Aborting master");
cluster.abortMaster(0);
cluster.waitOnMaster(0);
log("Master has aborted");
// Start up a new master
log("Starting up a new master");
master = cluster.startMaster().getMaster();
log("Waiting for master to be ready");
cluster.waitForActiveAndReadyMaster();
log("Master is ready");
// Wait till no region in transition any more
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
// Get new region states since master restarted
regionStates = master.getAssignmentManager().getRegionStates();
// Both pending_open (RPC sent/not yet) regions should be online
assertTrue(regionStates.isRegionOnline(hriOffline));
assertTrue(regionStates.isRegionOnline(hriOnline));
assertTrue(regionStates.isRegionOnline(failedClose));
assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
assertTrue(regionStates.isRegionOnline(failedOpen));
log("Done with verification, shutting down cluster");
// Done, shutdown the cluster
TEST_UTIL.shutdownMiniCluster();
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestMasterRestartAfterDisablingTable method testForCheckingIfEnableAndDisableWorksFineAfterSwitch.
@Test
public void testForCheckingIfEnableAndDisableWorksFineAfterSwitch() throws Exception {
final int NUM_MASTERS = 2;
final int NUM_RS = 1;
final int NUM_REGIONS_TO_CREATE = 4;
// Start the cluster
log("Starting cluster");
Configuration conf = HBaseConfiguration.create();
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
log("Waiting for active/ready master");
cluster.waitForActiveAndReadyMaster();
// Create a table with regions
final TableName tableName = TableName.valueOf(name.getMethodName());
byte[] family = Bytes.toBytes("family");
log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
int numRegions = -1;
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
numRegions = r.getStartKeys().length;
}
// catalogs
numRegions += 1;
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Disabling table\n");
TEST_UTIL.getAdmin().disableTable(tableName);
NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
assertEquals("The number of regions for the table tableRestart should be 0 and only" + "the catalog and namespace tables should be present.", 2, regions.size());
List<MasterThread> masterThreads = cluster.getMasterThreads();
MasterThread activeMaster = null;
if (masterThreads.get(0).getMaster().isActiveMaster()) {
activeMaster = masterThreads.get(0);
} else {
activeMaster = masterThreads.get(1);
}
activeMaster.getMaster().stop("stopping the active master so that the backup can become active");
cluster.hbaseCluster.waitOnMaster(activeMaster);
cluster.waitForActiveAndReadyMaster();
assertTrue("The table should not be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.DISABLED, TableState.State.DISABLING));
log("Enabling table\n");
// Need a new Admin, the previous one is on the old master
Admin admin = TEST_UTIL.getAdmin();
admin.enableTable(tableName);
admin.close();
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster\n");
regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
assertEquals("The assigned regions were not onlined after master" + " switch except for the catalog and namespace tables.", 6, regions.size());
assertTrue("The table should be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.ENABLED));
ht.close();
TEST_UTIL.shutdownMiniCluster();
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestRSKilledWhenInitializing method testRSTerminationAfterRegisteringToMasterBeforeCreatingEphemeralNode.
/**
* Test verifies whether a region server is removing from online servers list in master if it went
* down after registering with master. Test will TIMEOUT if an error!!!!
* @throws Exception
*/
@Test
public void testRSTerminationAfterRegisteringToMasterBeforeCreatingEphemeralNode() throws Exception {
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
// Start the cluster
final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniDFSCluster(3);
TEST_UTIL.startMiniZKCluster();
TEST_UTIL.createRootDir();
final LocalHBaseCluster cluster = new LocalHBaseCluster(conf, NUM_MASTERS, NUM_RS, HMaster.class, RegisterAndDieRegionServer.class);
final MasterThread master = startMaster(cluster.getMasters().get(0));
try {
// Master is up waiting on RegionServers to check in. Now start RegionServers.
for (int i = 0; i < NUM_RS; i++) {
cluster.getRegionServers().get(i).start();
}
// Now wait on master to see NUM_RS + 1 servers as being online, thats NUM_RS plus
// the Master itself (because Master hosts hbase:meta and checks in as though it a RS).
List<ServerName> onlineServersList = null;
do {
onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
} while (onlineServersList.size() < (NUM_RS + 1));
// Wait until killedRS is set. Means RegionServer is starting to go down.
while (killedRS.get() == null) {
Threads.sleep(1);
}
// Wait on the RegionServer to fully die.
while (cluster.getLiveRegionServers().size() > NUM_RS) {
Threads.sleep(1);
}
// being reassigned.
while (!master.getMaster().isInitialized()) {
Threads.sleep(1);
}
// Now in steady state. How many regions open? Master should have too many regionservers
// showing still. The downed RegionServer should still be showing as registered.
assertTrue(master.getMaster().getServerManager().isServerOnline(killedRS.get()));
// Find non-meta region (namespace?) and assign to the killed server. That'll trigger cleanup.
Map<HRegionInfo, ServerName> assignments = null;
do {
assignments = master.getMaster().getAssignmentManager().getRegionStates().getRegionAssignments();
} while (assignments == null || assignments.size() < 2);
HRegionInfo hri = null;
for (Map.Entry<HRegionInfo, ServerName> e : assignments.entrySet()) {
if (e.getKey().isMetaRegion())
continue;
hri = e.getKey();
break;
}
// Try moving region to the killed server. It will fail. As by-product, we will
// remove the RS from Master online list because no corresponding znode.
assertEquals(NUM_RS + 1, master.getMaster().getServerManager().getOnlineServersList().size());
LOG.info("Move " + hri.getEncodedName() + " to " + killedRS.get());
master.getMaster().move(hri.getEncodedNameAsBytes(), Bytes.toBytes(killedRS.get().toString()));
// Wait until the RS no longer shows as registered in Master.
while (onlineServersList.size() > (NUM_RS + 1)) {
Thread.sleep(100);
onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
}
} finally {
// Shutdown is messy with complaints about fs being closed. Why? TODO.
cluster.shutdown();
cluster.join();
TEST_UTIL.shutdownMiniDFSCluster();
TEST_UTIL.shutdownMiniZKCluster();
TEST_UTIL.cleanupTestDir();
}
}
Aggregations