Search in sources :

Example 6 with MasterThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.

the class TestDistributedLogSplitting method after.

@After
public void after() throws Exception {
    try {
        if (TEST_UTIL.getHBaseCluster() != null) {
            for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
                mt.getMaster().abort("closing...", null);
            }
        }
        TEST_UTIL.shutdownMiniHBaseCluster();
    } finally {
        TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
        ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
    }
}
Also used : MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) After(org.junit.After)

Example 7 with MasterThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.

the class TestMasterFailover method testSimpleMasterFailover.

/**
   * Simple test of master failover.
   * <p>
   * Starts with three masters.  Kills a backup master.  Then kills the active
   * master.  Ensures the final master becomes active and we can still contact
   * the cluster.
   * @throws Exception
   */
@Test(timeout = 240000)
public void testSimpleMasterFailover() throws Exception {
    final int NUM_MASTERS = 3;
    final int NUM_RS = 3;
    // Start the cluster
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    // get all the master threads
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    // wait for each to come online
    for (MasterThread mt : masterThreads) {
        assertTrue(mt.isAlive());
    }
    // verify only one is the active master and we have right number
    int numActive = 0;
    int activeIndex = -1;
    ServerName activeName = null;
    HMaster active = null;
    for (int i = 0; i < masterThreads.size(); i++) {
        if (masterThreads.get(i).getMaster().isActiveMaster()) {
            numActive++;
            activeIndex = i;
            active = masterThreads.get(activeIndex).getMaster();
            activeName = active.getServerName();
        }
    }
    assertEquals(1, numActive);
    assertEquals(NUM_MASTERS, masterThreads.size());
    LOG.info("Active master " + activeName);
    // Check that ClusterStatus reports the correct active and backup masters
    assertNotNull(active);
    ClusterStatus status = active.getClusterStatus();
    assertTrue(status.getMaster().equals(activeName));
    assertEquals(2, status.getBackupMastersSize());
    assertEquals(2, status.getBackupMasters().size());
    // attempt to stop one of the inactive masters
    int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
    HMaster master = cluster.getMaster(backupIndex);
    LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
    cluster.stopMaster(backupIndex, false);
    cluster.waitOnMaster(backupIndex);
    // Verify still one active master and it's the same
    for (int i = 0; i < masterThreads.size(); i++) {
        if (masterThreads.get(i).getMaster().isActiveMaster()) {
            assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
            activeIndex = i;
            active = masterThreads.get(activeIndex).getMaster();
        }
    }
    assertEquals(1, numActive);
    assertEquals(2, masterThreads.size());
    int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
    LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
    assertEquals(4, rsCount);
    // Check that ClusterStatus reports the correct active and backup masters
    assertNotNull(active);
    status = active.getClusterStatus();
    assertTrue(status.getMaster().equals(activeName));
    assertEquals(1, status.getBackupMastersSize());
    assertEquals(1, status.getBackupMasters().size());
    // kill the active master
    LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
    cluster.stopMaster(activeIndex, false);
    cluster.waitOnMaster(activeIndex);
    // wait for an active master to show up and be ready
    assertTrue(cluster.waitForActiveAndReadyMaster());
    LOG.debug("\n\nVerifying backup master is now active\n");
    // should only have one master now
    assertEquals(1, masterThreads.size());
    // and he should be active
    active = masterThreads.get(0).getMaster();
    assertNotNull(active);
    status = active.getClusterStatus();
    ServerName mastername = status.getMaster();
    assertTrue(mastername.equals(active.getServerName()));
    assertTrue(active.isActiveMaster());
    assertEquals(0, status.getBackupMastersSize());
    assertEquals(0, status.getBackupMasters().size());
    int rss = status.getServersSize();
    LOG.info("Active master " + mastername.getServerName() + " managing " + rss + " region servers");
    assertEquals(4, rss);
    // Stop the cluster
    TEST_UTIL.shutdownMiniCluster();
}
Also used : HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) ServerName(org.apache.hadoop.hbase.ServerName) MiniHBaseCluster(org.apache.hadoop.hbase.MiniHBaseCluster) ClusterStatus(org.apache.hadoop.hbase.ClusterStatus) Test(org.junit.Test)

Example 8 with MasterThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.

the class TestMasterFailover method testPendingOpenOrCloseWhenMasterFailover.

/**
   * Test region in pending_open/close when master failover
   */
@Test(timeout = 180000)
public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
    final int NUM_MASTERS = 1;
    final int NUM_RS = 1;
    // Create config to use for this cluster
    Configuration conf = HBaseConfiguration.create();
    // Start the cluster
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Cluster started");
    // get all the master threads
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    assertEquals(1, masterThreads.size());
    // only one master thread, let's wait for it to be initialized
    assertTrue(cluster.waitForActiveAndReadyMaster());
    HMaster master = masterThreads.get(0).getMaster();
    assertTrue(master.isActiveMaster());
    assertTrue(master.isInitialized());
    // Create a table with a region online
    Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
    onlineTable.close();
    // Create a table in META, so it has a region offline
    HTableDescriptor offlineTable = new HTableDescriptor(TableName.valueOf(Bytes.toBytes("offlineTable")));
    offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
    FileSystem filesystem = FileSystem.get(conf);
    Path rootdir = FSUtils.getRootDir(conf);
    FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
    fstd.createTableDescriptor(offlineTable);
    HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
    createRegion(hriOffline, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
    log("Regions in hbase:meta and namespace have been created");
    // at this point we only expect 3 regions to be assigned out
    // (catalogs and namespace, + 1 online region)
    assertEquals(3, cluster.countServedRegions());
    HRegionInfo hriOnline = null;
    try (RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
        hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
    }
    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
    RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
    // Put the online region in pending_close. It is actually already opened.
    // This is to simulate that the region close RPC is not sent out before failover
    RegionState oldState = regionStates.getRegionState(hriOnline);
    RegionState newState = new RegionState(hriOnline, State.PENDING_CLOSE, oldState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
    // Put the offline region in pending_open. It is actually not opened yet.
    // This is to simulate that the region open RPC is not sent out before failover
    oldState = new RegionState(hriOffline, State.OFFLINE);
    newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
    HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
    createRegion(failedClose, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
    oldState = new RegionState(failedClose, State.PENDING_CLOSE);
    newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
    HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
    createRegion(failedOpen, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
    // Simulate a region transitioning to failed open when the region server reports the
    // transition as FAILED_OPEN
    oldState = new RegionState(failedOpen, State.PENDING_OPEN);
    newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
    HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
    LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
    createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
    // Simulate a region transitioning to failed open when the master couldn't find a plan for
    // the region
    oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
    newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
    stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
    // Stop the master
    log("Aborting master");
    cluster.abortMaster(0);
    cluster.waitOnMaster(0);
    log("Master has aborted");
    // Start up a new master
    log("Starting up a new master");
    master = cluster.startMaster().getMaster();
    log("Waiting for master to be ready");
    cluster.waitForActiveAndReadyMaster();
    log("Master is ready");
    // Wait till no region in transition any more
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    // Get new region states since master restarted
    regionStates = master.getAssignmentManager().getRegionStates();
    // Both pending_open (RPC sent/not yet) regions should be online
    assertTrue(regionStates.isRegionOnline(hriOffline));
    assertTrue(regionStates.isRegionOnline(hriOnline));
    assertTrue(regionStates.isRegionOnline(failedClose));
    assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
    assertTrue(regionStates.isRegionOnline(failedOpen));
    log("Done with verification, shutting down cluster");
    // Done, shutdown the cluster
    TEST_UTIL.shutdownMiniCluster();
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) MiniHBaseCluster(org.apache.hadoop.hbase.MiniHBaseCluster) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) FileSystem(org.apache.hadoop.fs.FileSystem) FSTableDescriptors(org.apache.hadoop.hbase.util.FSTableDescriptors) Test(org.junit.Test)

Example 9 with MasterThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.

the class TestMasterRestartAfterDisablingTable method testForCheckingIfEnableAndDisableWorksFineAfterSwitch.

@Test
public void testForCheckingIfEnableAndDisableWorksFineAfterSwitch() throws Exception {
    final int NUM_MASTERS = 2;
    final int NUM_RS = 1;
    final int NUM_REGIONS_TO_CREATE = 4;
    // Start the cluster
    log("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();
    // Create a table with regions
    final TableName tableName = TableName.valueOf(name.getMethodName());
    byte[] family = Bytes.toBytes("family");
    log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
    Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
    int numRegions = -1;
    try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
        numRegions = r.getStartKeys().length;
    }
    // catalogs
    numRegions += 1;
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Disabling table\n");
    TEST_UTIL.getAdmin().disableTable(tableName);
    NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
    assertEquals("The number of regions for the table tableRestart should be 0 and only" + "the catalog and namespace tables should be present.", 2, regions.size());
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    MasterThread activeMaster = null;
    if (masterThreads.get(0).getMaster().isActiveMaster()) {
        activeMaster = masterThreads.get(0);
    } else {
        activeMaster = masterThreads.get(1);
    }
    activeMaster.getMaster().stop("stopping the active master so that the backup can become active");
    cluster.hbaseCluster.waitOnMaster(activeMaster);
    cluster.waitForActiveAndReadyMaster();
    assertTrue("The table should not be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.DISABLED, TableState.State.DISABLING));
    log("Enabling table\n");
    // Need a new Admin, the previous one is on the old master
    Admin admin = TEST_UTIL.getAdmin();
    admin.enableTable(tableName);
    admin.close();
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Verifying there are " + numRegions + " assigned on cluster\n");
    regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
    assertEquals("The assigned regions were not onlined after master" + " switch except for the catalog and namespace tables.", 6, regions.size());
    assertTrue("The table should be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.ENABLED));
    ht.close();
    TEST_UTIL.shutdownMiniCluster();
}
Also used : RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Configuration(org.apache.hadoop.conf.Configuration) MiniHBaseCluster(org.apache.hadoop.hbase.MiniHBaseCluster) Admin(org.apache.hadoop.hbase.client.Admin) TableName(org.apache.hadoop.hbase.TableName) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) Test(org.junit.Test)

Example 10 with MasterThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread in project hbase by apache.

the class TestRSKilledWhenInitializing method testRSTerminationAfterRegisteringToMasterBeforeCreatingEphemeralNode.

/**
   * Test verifies whether a region server is removing from online servers list in master if it went
   * down after registering with master. Test will TIMEOUT if an error!!!!
   * @throws Exception
   */
@Test
public void testRSTerminationAfterRegisteringToMasterBeforeCreatingEphemeralNode() throws Exception {
    // Create config to use for this cluster
    Configuration conf = HBaseConfiguration.create();
    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
    // Start the cluster
    final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniDFSCluster(3);
    TEST_UTIL.startMiniZKCluster();
    TEST_UTIL.createRootDir();
    final LocalHBaseCluster cluster = new LocalHBaseCluster(conf, NUM_MASTERS, NUM_RS, HMaster.class, RegisterAndDieRegionServer.class);
    final MasterThread master = startMaster(cluster.getMasters().get(0));
    try {
        // Master is up waiting on RegionServers to check in. Now start RegionServers.
        for (int i = 0; i < NUM_RS; i++) {
            cluster.getRegionServers().get(i).start();
        }
        // Now wait on master to see NUM_RS + 1 servers as being online, thats NUM_RS plus
        // the Master itself (because Master hosts hbase:meta and checks in as though it a RS).
        List<ServerName> onlineServersList = null;
        do {
            onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
        } while (onlineServersList.size() < (NUM_RS + 1));
        // Wait until killedRS is set. Means RegionServer is starting to go down.
        while (killedRS.get() == null) {
            Threads.sleep(1);
        }
        // Wait on the RegionServer to fully die.
        while (cluster.getLiveRegionServers().size() > NUM_RS) {
            Threads.sleep(1);
        }
        // being reassigned.
        while (!master.getMaster().isInitialized()) {
            Threads.sleep(1);
        }
        // Now in steady state. How many regions open? Master should have too many regionservers
        // showing still. The downed RegionServer should still be showing as registered.
        assertTrue(master.getMaster().getServerManager().isServerOnline(killedRS.get()));
        // Find non-meta region (namespace?) and assign to the killed server. That'll trigger cleanup.
        Map<HRegionInfo, ServerName> assignments = null;
        do {
            assignments = master.getMaster().getAssignmentManager().getRegionStates().getRegionAssignments();
        } while (assignments == null || assignments.size() < 2);
        HRegionInfo hri = null;
        for (Map.Entry<HRegionInfo, ServerName> e : assignments.entrySet()) {
            if (e.getKey().isMetaRegion())
                continue;
            hri = e.getKey();
            break;
        }
        // Try moving region to the killed server. It will fail. As by-product, we will
        // remove the RS from Master online list because no corresponding znode.
        assertEquals(NUM_RS + 1, master.getMaster().getServerManager().getOnlineServersList().size());
        LOG.info("Move " + hri.getEncodedName() + " to " + killedRS.get());
        master.getMaster().move(hri.getEncodedNameAsBytes(), Bytes.toBytes(killedRS.get().toString()));
        // Wait until the RS no longer shows as registered in Master.
        while (onlineServersList.size() > (NUM_RS + 1)) {
            Thread.sleep(100);
            onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
        }
    } finally {
        // Shutdown is messy with complaints about fs being closed. Why? TODO.
        cluster.shutdown();
        cluster.join();
        TEST_UTIL.shutdownMiniDFSCluster();
        TEST_UTIL.shutdownMiniZKCluster();
        TEST_UTIL.cleanupTestDir();
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) ServerName(org.apache.hadoop.hbase.ServerName) LocalHBaseCluster(org.apache.hadoop.hbase.LocalHBaseCluster) Map(java.util.Map) Test(org.junit.Test)

Aggregations

MasterThread (org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread)10 HBaseTestingUtility (org.apache.hadoop.hbase.HBaseTestingUtility)7 Test (org.junit.Test)7 Configuration (org.apache.hadoop.conf.Configuration)6 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)6 MiniHBaseCluster (org.apache.hadoop.hbase.MiniHBaseCluster)6 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)3 ServerName (org.apache.hadoop.hbase.ServerName)3 RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)3 Table (org.apache.hadoop.hbase.client.Table)3 ClusterStatus (org.apache.hadoop.hbase.ClusterStatus)2 LocalHBaseCluster (org.apache.hadoop.hbase.LocalHBaseCluster)2 TableName (org.apache.hadoop.hbase.TableName)2 Admin (org.apache.hadoop.hbase.client.Admin)2 ServerNotRunningYetException (org.apache.hadoop.hbase.ipc.ServerNotRunningYetException)2 RegionServerThread (org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)2 IOException (java.io.IOException)1 Map (java.util.Map)1 TimeoutException (java.util.concurrent.TimeoutException)1 FileSystem (org.apache.hadoop.fs.FileSystem)1