use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestRollingRestart method testBasicRollingRestart.
@Test(timeout = 500000)
public void testBasicRollingRestart() throws Exception {
// Start a cluster with 2 masters and 4 regionservers
final int NUM_MASTERS = 2;
final int NUM_RS = 3;
final int NUM_REGIONS_TO_CREATE = 20;
int expectedNumRS = 3;
// Start the cluster
log("Starting cluster");
Configuration conf = HBaseConfiguration.create();
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
log("Waiting for active/ready master");
cluster.waitForActiveAndReadyMaster();
// Create a table with regions
final TableName tableName = TableName.valueOf(name.getMethodName());
byte[] family = Bytes.toBytes("family");
log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
int numRegions = -1;
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
numRegions = r.getStartKeys().length;
}
// catalogs
numRegions += 1;
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Disabling table\n");
TEST_UTIL.getAdmin().disableTable(tableName);
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
log("Verifying only catalog and namespace regions are assigned\n");
if (regions.size() != 2) {
for (String oregion : regions) log("Region still online: " + oregion);
}
assertEquals(2, regions.size());
log("Enabling table\n");
TEST_UTIL.getAdmin().enableTable(tableName);
log("Waiting for no more RIT\n");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster\n");
regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
assertRegionsAssigned(cluster, regions);
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
// Add a new regionserver
log("Adding a fourth RS");
RegionServerThread restarted = cluster.startRegionServer();
expectedNumRS++;
restarted.waitForServerOnline();
log("Additional RS is online");
log("Waiting for no more RIT");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster");
assertRegionsAssigned(cluster, regions);
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
// Master Restarts
List<MasterThread> masterThreads = cluster.getMasterThreads();
MasterThread activeMaster = null;
MasterThread backupMaster = null;
assertEquals(2, masterThreads.size());
if (masterThreads.get(0).getMaster().isActiveMaster()) {
activeMaster = masterThreads.get(0);
backupMaster = masterThreads.get(1);
} else {
activeMaster = masterThreads.get(1);
backupMaster = masterThreads.get(0);
}
// Bring down the backup master
log("Stopping backup master\n\n");
backupMaster.getMaster().stop("Stop of backup during rolling restart");
cluster.hbaseCluster.waitOnMaster(backupMaster);
// Bring down the primary master
log("Stopping primary master\n\n");
activeMaster.getMaster().stop("Stop of active during rolling restart");
cluster.hbaseCluster.waitOnMaster(activeMaster);
// Start primary master
log("Restarting primary master\n\n");
activeMaster = cluster.startMaster();
cluster.waitForActiveAndReadyMaster();
// Start backup master
log("Restarting backup master\n\n");
backupMaster = cluster.startMaster();
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
// RegionServer Restarts
// Bring them down, one at a time, waiting between each to complete
List<RegionServerThread> regionServers = cluster.getLiveRegionServerThreads();
int num = 1;
int total = regionServers.size();
for (RegionServerThread rst : regionServers) {
ServerName serverName = rst.getRegionServer().getServerName();
log("Stopping region server " + num + " of " + total + " [ " + serverName + "]");
rst.getRegionServer().stop("Stopping RS during rolling restart");
cluster.hbaseCluster.waitOnRegionServer(rst);
log("Waiting for RS shutdown to be handled by master");
waitForRSShutdownToStartAndFinish(activeMaster, serverName);
log("RS shutdown done, waiting for no more RIT");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster");
assertRegionsAssigned(cluster, regions);
expectedNumRS--;
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
log("Restarting region server " + num + " of " + total);
restarted = cluster.startRegionServer();
restarted.waitForServerOnline();
expectedNumRS++;
log("Region server " + num + " is back online");
log("Waiting for no more RIT");
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
log("Verifying there are " + numRegions + " assigned on cluster");
assertRegionsAssigned(cluster, regions);
assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
num++;
}
Thread.sleep(1000);
assertRegionsAssigned(cluster, regions);
// TODO: Bring random 3 of 4 RS down at the same time
ht.close();
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestMasterShutdown method testMasterShutdown.
/**
* Simple test of shutdown.
* <p>
* Starts with three masters. Tells the active master to shutdown the cluster.
* Verifies that all masters are properly shutdown.
* @throws Exception
*/
@Test(timeout = 120000)
public void testMasterShutdown() throws Exception {
final int NUM_MASTERS = 3;
final int NUM_RS = 3;
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
// Start the cluster
HBaseTestingUtility htu = new HBaseTestingUtility(conf);
htu.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = htu.getHBaseCluster();
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
// wait for each to come online
for (MasterThread mt : masterThreads) {
assertTrue(mt.isAlive());
}
// find the active master
HMaster active = null;
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
active = masterThreads.get(i).getMaster();
break;
}
}
assertNotNull(active);
// make sure the other two are backup masters
ClusterStatus status = active.getClusterStatus();
assertEquals(2, status.getBackupMastersSize());
assertEquals(2, status.getBackupMasters().size());
// tell the active master to shutdown the cluster
active.shutdown();
for (int i = NUM_MASTERS - 1; i >= 0; --i) {
cluster.waitOnMaster(i);
}
// make sure all the masters properly shutdown
assertEquals(0, masterThreads.size());
htu.shutdownMiniCluster();
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestMasterShutdown method testMasterShutdownBeforeStartingAnyRegionServer.
@Test(timeout = 60000)
public void testMasterShutdownBeforeStartingAnyRegionServer() throws Exception {
final int NUM_MASTERS = 1;
final int NUM_RS = 0;
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
conf.setInt("hbase.ipc.client.failed.servers.expiry", 200);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
// Start the cluster
final HBaseTestingUtility util = new HBaseTestingUtility(conf);
util.startMiniDFSCluster(3);
util.startMiniZKCluster();
util.createRootDir();
final LocalHBaseCluster cluster = new LocalHBaseCluster(conf, NUM_MASTERS, NUM_RS, HMaster.class, MiniHBaseCluster.MiniHBaseClusterRegionServer.class);
final int MASTER_INDEX = 0;
final MasterThread master = cluster.getMasters().get(MASTER_INDEX);
master.start();
LOG.info("Called master start on " + master.getName());
Thread shutdownThread = new Thread("Shutdown-Thread") {
public void run() {
LOG.info("Before call to shutdown master");
try {
try (Connection connection = ConnectionFactory.createConnection(util.getConfiguration())) {
try (Admin admin = connection.getAdmin()) {
admin.shutdown();
}
}
LOG.info("After call to shutdown master");
cluster.waitOnMaster(MASTER_INDEX);
} catch (Exception e) {
}
}
;
};
shutdownThread.start();
LOG.info("Called master join on " + master.getName());
master.join();
shutdownThread.join();
List<MasterThread> masterThreads = cluster.getMasters();
// make sure all the masters properly shutdown
assertEquals(0, masterThreads.size());
util.shutdownMiniZKCluster();
util.shutdownMiniDFSCluster();
util.cleanupTestDir();
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestRSKilledWhenInitializing method testRSTerminationAfterRegisteringToMasterBeforeCreatingEphemeralNode.
/**
* Test verifies whether a region server is removing from online servers list in master if it went
* down after registering with master. Test will TIMEOUT if an error!!!!
* @throws Exception
*/
@Test
public void testRSTerminationAfterRegisteringToMasterBeforeCreatingEphemeralNode() throws Exception {
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
// Start the cluster
final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniDFSCluster(3);
TEST_UTIL.startMiniZKCluster();
TEST_UTIL.createRootDir();
final LocalHBaseCluster cluster = new LocalHBaseCluster(conf, NUM_MASTERS, NUM_RS, HMaster.class, RegisterAndDieRegionServer.class);
final MasterThread master = startMaster(cluster.getMasters().get(0));
try {
// Master is up waiting on RegionServers to check in. Now start RegionServers.
for (int i = 0; i < NUM_RS; i++) {
cluster.getRegionServers().get(i).start();
}
// Now wait on master to see NUM_RS + 1 servers as being online, thats NUM_RS plus
// the Master itself (because Master hosts hbase:meta and checks in as though it a RS).
List<ServerName> onlineServersList = null;
do {
onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
} while (onlineServersList.size() < (NUM_RS + 1));
// Wait until killedRS is set. Means RegionServer is starting to go down.
while (killedRS.get() == null) {
Threads.sleep(1);
}
// Wait on the RegionServer to fully die.
while (cluster.getLiveRegionServers().size() > NUM_RS) {
Threads.sleep(1);
}
// being reassigned.
while (!master.getMaster().isInitialized()) {
Threads.sleep(1);
}
// Now in steady state. How many regions open? Master should have too many regionservers
// showing still. The downed RegionServer should still be showing as registered.
assertTrue(master.getMaster().getServerManager().isServerOnline(killedRS.get()));
// Find non-meta region (namespace?) and assign to the killed server. That'll trigger cleanup.
Map<HRegionInfo, ServerName> assignments = null;
do {
assignments = master.getMaster().getAssignmentManager().getRegionStates().getRegionAssignments();
} while (assignments == null || assignments.size() < 2);
HRegionInfo hri = null;
for (Map.Entry<HRegionInfo, ServerName> e : assignments.entrySet()) {
if (e.getKey().isMetaRegion())
continue;
hri = e.getKey();
break;
}
// Try moving region to the killed server. It will fail. As by-product, we will
// remove the RS from Master online list because no corresponding znode.
assertEquals(NUM_RS + 1, master.getMaster().getServerManager().getOnlineServersList().size());
LOG.info("Move " + hri.getEncodedName() + " to " + killedRS.get());
master.getMaster().move(hri.getEncodedNameAsBytes(), Bytes.toBytes(killedRS.get().toString()));
// Wait until the RS no longer shows as registered in Master.
while (onlineServersList.size() > (NUM_RS + 1)) {
Thread.sleep(100);
onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
}
} finally {
// Shutdown is messy with complaints about fs being closed. Why? TODO.
cluster.shutdown();
cluster.join();
TEST_UTIL.shutdownMiniDFSCluster();
TEST_UTIL.shutdownMiniZKCluster();
TEST_UTIL.cleanupTestDir();
}
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.
the class TestDistributedLogSplitting method populateDataInTable.
void populateDataInTable(int nrows, String fname) throws Exception {
byte[] family = Bytes.toBytes(fname);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
assertEquals(NUM_RS, rsts.size());
for (RegionServerThread rst : rsts) {
HRegionServer hrs = rst.getRegionServer();
List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
for (HRegionInfo hri : hris) {
if (hri.getTable().isSystemTable()) {
continue;
}
LOG.debug("adding data to rs = " + rst.getName() + " region = " + hri.getRegionNameAsString());
Region region = hrs.getOnlineRegion(hri.getRegionName());
assertTrue(region != null);
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
}
}
for (MasterThread mt : cluster.getLiveMasterThreads()) {
HRegionServer hrs = mt.getMaster();
List<HRegionInfo> hris;
try {
hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
} catch (ServerNotRunningYetException e) {
// It's ok: this master may be a backup. Ignored.
continue;
}
for (HRegionInfo hri : hris) {
if (hri.getTable().isSystemTable()) {
continue;
}
LOG.debug("adding data to rs = " + mt.getName() + " region = " + hri.getRegionNameAsString());
Region region = hrs.getOnlineRegion(hri.getRegionName());
assertTrue(region != null);
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
}
}
}
Aggregations