Search in sources :

Example 31 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestMasterFailover method testSimpleMasterFailover.

/**
 * Simple test of master failover.
 * <p>
 * Starts with three masters.  Kills a backup master.  Then kills the active
 * master.  Ensures the final master becomes active and we can still contact
 * the cluster.
 */
@Test
public void testSimpleMasterFailover() throws Exception {
    final int NUM_MASTERS = 3;
    final int NUM_RS = 3;
    // Start the cluster
    HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
    try {
        StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(NUM_MASTERS).numRegionServers(NUM_RS).numDataNodes(NUM_RS).build();
        TEST_UTIL.startMiniCluster(option);
        SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
        // get all the master threads
        List<MasterThread> masterThreads = cluster.getMasterThreads();
        // wait for each to come online
        for (MasterThread mt : masterThreads) {
            assertTrue(mt.isAlive());
        }
        // verify only one is the active master and we have right number
        int numActive = 0;
        int activeIndex = -1;
        ServerName activeName = null;
        HMaster active = null;
        for (int i = 0; i < masterThreads.size(); i++) {
            if (masterThreads.get(i).getMaster().isActiveMaster()) {
                numActive++;
                activeIndex = i;
                active = masterThreads.get(activeIndex).getMaster();
                activeName = active.getServerName();
            }
        }
        assertEquals(1, numActive);
        assertEquals(NUM_MASTERS, masterThreads.size());
        LOG.info("Active master " + activeName);
        // Check that ClusterStatus reports the correct active and backup masters
        assertNotNull(active);
        ClusterMetrics status = active.getClusterMetrics();
        assertEquals(activeName, status.getMasterName());
        assertEquals(2, status.getBackupMasterNames().size());
        // attempt to stop one of the inactive masters
        int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
        HMaster master = cluster.getMaster(backupIndex);
        LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
        cluster.stopMaster(backupIndex, false);
        cluster.waitOnMaster(backupIndex);
        // Verify still one active master and it's the same
        for (int i = 0; i < masterThreads.size(); i++) {
            if (masterThreads.get(i).getMaster().isActiveMaster()) {
                assertEquals(activeName, masterThreads.get(i).getMaster().getServerName());
                activeIndex = i;
                active = masterThreads.get(activeIndex).getMaster();
            }
        }
        assertEquals(1, numActive);
        assertEquals(2, masterThreads.size());
        int rsCount = masterThreads.get(activeIndex).getMaster().getClusterMetrics().getLiveServerMetrics().size();
        LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
        assertEquals(3, rsCount);
        // wait for the active master to acknowledge loss of the backup from ZK
        final HMaster activeFinal = active;
        TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), () -> activeFinal.getBackupMasters().size() == 1);
        // Check that ClusterStatus reports the correct active and backup masters
        assertNotNull(active);
        status = active.getClusterMetrics();
        assertEquals(activeName, status.getMasterName());
        assertEquals(1, status.getBackupMasterNames().size());
        // kill the active master
        LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
        cluster.stopMaster(activeIndex, false);
        cluster.waitOnMaster(activeIndex);
        // wait for an active master to show up and be ready
        assertTrue(cluster.waitForActiveAndReadyMaster());
        LOG.debug("\n\nVerifying backup master is now active\n");
        // should only have one master now
        assertEquals(1, masterThreads.size());
        // and he should be active
        active = masterThreads.get(0).getMaster();
        assertNotNull(active);
        status = active.getClusterMetrics();
        ServerName masterName = status.getMasterName();
        assertNotNull(masterName);
        assertEquals(active.getServerName(), masterName);
        assertTrue(active.isActiveMaster());
        assertEquals(0, status.getBackupMasterNames().size());
        int rss = status.getLiveServerMetrics().size();
        LOG.info("Active master {} managing {} region servers", masterName.getServerName(), rss);
        assertEquals(3, rss);
    } finally {
        // Stop the cluster
        TEST_UTIL.shutdownMiniCluster();
    }
}
Also used : SingleProcessHBaseCluster(org.apache.hadoop.hbase.SingleProcessHBaseCluster) ClusterMetrics(org.apache.hadoop.hbase.ClusterMetrics) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) ServerName(org.apache.hadoop.hbase.ServerName) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) Test(org.junit.Test)

Example 32 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestMasterFailoverBalancerPersistence method testMasterFailoverBalancerPersistence.

/**
 * Test that if the master fails, the load balancer maintains its
 * state (running or not) when the next master takes over
 *
 * @throws Exception
 */
@Test
public void testMasterFailoverBalancerPersistence() throws Exception {
    // Start the cluster
    HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
    StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(3).build();
    TEST_UTIL.startMiniCluster(option);
    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    assertTrue(cluster.waitForActiveAndReadyMaster());
    HMaster active = cluster.getMaster();
    // check that the balancer is on by default for the active master
    ClusterMetrics clusterStatus = active.getClusterMetrics();
    assertTrue(clusterStatus.getBalancerOn());
    active = killActiveAndWaitForNewActive(cluster);
    // ensure the load balancer is still running on new master
    clusterStatus = active.getClusterMetrics();
    assertTrue(clusterStatus.getBalancerOn());
    // turn off the load balancer
    active.balanceSwitch(false);
    // once more, kill active master and wait for new active master to show up
    active = killActiveAndWaitForNewActive(cluster);
    // ensure the load balancer is not running on the new master
    clusterStatus = active.getClusterMetrics();
    assertFalse(clusterStatus.getBalancerOn());
    // Stop the cluster
    TEST_UTIL.shutdownMiniCluster();
}
Also used : SingleProcessHBaseCluster(org.apache.hadoop.hbase.SingleProcessHBaseCluster) ClusterMetrics(org.apache.hadoop.hbase.ClusterMetrics) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) Test(org.junit.Test)

Example 33 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestMasterMetrics method startCluster.

@BeforeClass
public static void startCluster() throws Exception {
    LOG.info("Starting cluster");
    // Set master class and use default values for other options.
    StartTestingClusterOption option = StartTestingClusterOption.builder().masterClass(MyMaster.class).rsClass(MyRegionServer.class).build();
    TEST_UTIL.startMiniCluster(option);
    cluster = TEST_UTIL.getHBaseCluster();
    LOG.info("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();
    master = cluster.getMaster();
}
Also used : StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) BeforeClass(org.junit.BeforeClass)

Example 34 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestMasterOperationsForRegionReplicas method testCreateTableWithMultipleReplicas.

@Test
public void testCreateTableWithMultipleReplicas() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    final int numRegions = 3;
    final int numReplica = 2;
    try {
        TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName).setRegionReplication(numReplica).setColumnFamily(ColumnFamilyDescriptorBuilder.of("family")).build();
        ADMIN.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), numRegions);
        TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
        TEST_UTIL.waitUntilNoRegionsInTransition();
        validateNumberOfRowsInMeta(tableName, numRegions, ADMIN.getConnection());
        List<RegionInfo> hris = MetaTableAccessor.getTableRegions(ADMIN.getConnection(), tableName);
        assertEquals(numRegions * numReplica, hris.size());
        assertRegionStateNotNull(hris, numRegions, numReplica);
        List<Result> metaRows = MetaTableAccessor.fullScanRegions(ADMIN.getConnection());
        int numRows = 0;
        for (Result result : metaRows) {
            RegionLocations locations = CatalogFamilyFormat.getRegionLocations(result);
            RegionInfo hri = locations.getRegionLocation().getRegion();
            if (!hri.getTable().equals(tableName))
                continue;
            numRows += 1;
            HRegionLocation[] servers = locations.getRegionLocations();
            // have two locations for the replicas of a region, and the locations should be different
            assertEquals(2, servers.length);
            assertNotEquals(servers[1], servers[0]);
        }
        assertEquals(numRegions, numRows);
        // The same verification of the meta as above but with the SnapshotOfRegionAssignmentFromMeta
        // class
        validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
        // Now kill the master, restart it and see if the assignments are kept
        ServerName master = TEST_UTIL.getHBaseClusterInterface().getClusterMetrics().getMasterName();
        TEST_UTIL.getHBaseClusterInterface().stopMaster(master);
        TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(master, 30000);
        TEST_UTIL.getHBaseClusterInterface().startMaster(master.getHostname(), master.getPort());
        TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster();
        TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
        TEST_UTIL.waitUntilNoRegionsInTransition();
        assertRegionStateNotNull(hris, numRegions, numReplica);
        validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
        // Now shut the whole cluster down, and verify the assignments are kept so that the
        // availability constraints are met. MiniHBaseCluster chooses arbitrary ports on each
        // restart. This messes with our being able to test that we retain locality. Therefore,
        // figure current cluster ports and pass them in on next cluster start so new cluster comes
        // up at same coordinates -- and the assignment retention logic has a chance to cut in.
        List<Integer> rsports = new ArrayList<>();
        for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads()) {
            rsports.add(rst.getRegionServer().getRpcServer().getListenerAddress().getPort());
        }
        TEST_UTIL.shutdownMiniHBaseCluster();
        StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(numSlaves).rsPorts(rsports).build();
        TEST_UTIL.startMiniHBaseCluster(option);
        TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
        TEST_UTIL.waitUntilNoRegionsInTransition();
        resetConnections();
        validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
        // Now shut the whole cluster down, and verify regions are assigned even if there is only
        // one server running
        TEST_UTIL.shutdownMiniHBaseCluster();
        TEST_UTIL.startMiniHBaseCluster();
        TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
        TEST_UTIL.waitUntilNoRegionsInTransition();
        resetConnections();
        validateSingleRegionServerAssignment(ADMIN.getConnection(), numRegions, numReplica);
        for (int i = 1; i < numSlaves; i++) {
            // restore the cluster
            TEST_UTIL.getMiniHBaseCluster().startRegionServer();
        }
        // Check on alter table
        ADMIN.disableTable(tableName);
        assertTrue(ADMIN.isTableDisabled(tableName));
        // increase the replica
        ADMIN.modifyTable(TableDescriptorBuilder.newBuilder(desc).setRegionReplication(numReplica + 1).build());
        ADMIN.enableTable(tableName);
        LOG.info(ADMIN.getDescriptor(tableName).toString());
        assertTrue(ADMIN.isTableEnabled(tableName));
        TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
        TEST_UTIL.waitUntilNoRegionsInTransition();
        List<RegionInfo> regions = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionsOfTable(tableName);
        assertTrue("regions.size=" + regions.size() + ", numRegions=" + numRegions + ", numReplica=" + numReplica, regions.size() == numRegions * (numReplica + 1));
        // decrease the replica(earlier, table was modified to have a replica count of numReplica + 1)
        ADMIN.disableTable(tableName);
        ADMIN.modifyTable(TableDescriptorBuilder.newBuilder(desc).setRegionReplication(numReplica).build());
        ADMIN.enableTable(tableName);
        assertTrue(ADMIN.isTableEnabled(tableName));
        TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
        TEST_UTIL.waitUntilNoRegionsInTransition();
        regions = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionsOfTable(tableName);
        assertEquals(numRegions * numReplica, regions.size());
        // also make sure the meta table has the replica locations removed
        hris = MetaTableAccessor.getTableRegions(ADMIN.getConnection(), tableName);
        assertEquals(numRegions * numReplica, hris.size());
        // just check that the number of default replica regions in the meta table are the same
        // as the number of regions the table was created with, and the count of the
        // replicas is numReplica for each region
        Map<RegionInfo, Integer> defaultReplicas = new HashMap<>();
        for (RegionInfo hri : hris) {
            RegionInfo regionReplica0 = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
            Integer i = defaultReplicas.get(regionReplica0);
            defaultReplicas.put(regionReplica0, i == null ? 1 : i + 1);
        }
        assertEquals(numRegions, defaultReplicas.size());
        Collection<Integer> counts = new HashSet<>(defaultReplicas.values());
        assertEquals(1, counts.size());
        assertTrue(counts.contains(numReplica));
    } finally {
        ADMIN.disableTable(tableName);
        ADMIN.deleteTable(tableName);
    }
}
Also used : RegionLocations(org.apache.hadoop.hbase.RegionLocations) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) Result(org.apache.hadoop.hbase.client.Result) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TableName(org.apache.hadoop.hbase.TableName) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) JVMClusterUtil(org.apache.hadoop.hbase.util.JVMClusterUtil) ServerName(org.apache.hadoop.hbase.ServerName) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 35 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestMasterRestartAfterDisablingTable method testForCheckingIfEnableAndDisableWorksFineAfterSwitch.

@Test
public void testForCheckingIfEnableAndDisableWorksFineAfterSwitch() throws Exception {
    final int NUM_MASTERS = 2;
    final int NUM_REGIONS_TO_CREATE = 4;
    // Start the cluster
    log("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(conf);
    StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(NUM_MASTERS).build();
    TEST_UTIL.startMiniCluster(option);
    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();
    // Create a table with regions
    final TableName tableName = TableName.valueOf(name.getMethodName());
    byte[] family = Bytes.toBytes("family");
    log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
    Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
    int numRegions = -1;
    try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
        numRegions = r.getStartKeys().length;
    }
    // catalogs
    numRegions += 1;
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Disabling table\n");
    TEST_UTIL.getAdmin().disableTable(tableName);
    NavigableSet<String> regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
    assertEquals("The number of regions for the table tableRestart should be 0 and only" + "the catalog table should be present.", 1, regions.size());
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    MasterThread activeMaster = null;
    if (masterThreads.get(0).getMaster().isActiveMaster()) {
        activeMaster = masterThreads.get(0);
    } else {
        activeMaster = masterThreads.get(1);
    }
    activeMaster.getMaster().stop("stopping the active master so that the backup can become active");
    cluster.hbaseCluster.waitOnMaster(activeMaster);
    cluster.waitForActiveAndReadyMaster();
    assertTrue("The table should not be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.DISABLED, TableState.State.DISABLING));
    log("Enabling table\n");
    // Need a new Admin, the previous one is on the old master
    Admin admin = TEST_UTIL.getAdmin();
    admin.enableTable(tableName);
    admin.close();
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Verifying there are " + numRegions + " assigned on cluster\n");
    regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
    assertEquals("The assigned regions were not onlined after master" + " switch except for the catalog table.", 5, regions.size());
    assertTrue("The table should be in enabled state", cluster.getMaster().getTableStateManager().isTableState(TableName.valueOf(name.getMethodName()), TableState.State.ENABLED));
    ht.close();
    TEST_UTIL.shutdownMiniCluster();
}
Also used : SingleProcessHBaseCluster(org.apache.hadoop.hbase.SingleProcessHBaseCluster) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) Admin(org.apache.hadoop.hbase.client.Admin) TableName(org.apache.hadoop.hbase.TableName) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) Test(org.junit.Test)

Aggregations

StartTestingClusterOption (org.apache.hadoop.hbase.StartTestingClusterOption)42 BeforeClass (org.junit.BeforeClass)21 HBaseTestingUtil (org.apache.hadoop.hbase.HBaseTestingUtil)13 Test (org.junit.Test)13 Configuration (org.apache.hadoop.conf.Configuration)10 SingleProcessHBaseCluster (org.apache.hadoop.hbase.SingleProcessHBaseCluster)8 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 TableName (org.apache.hadoop.hbase.TableName)5 Table (org.apache.hadoop.hbase.client.Table)5 MasterThread (org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread)5 ServerName (org.apache.hadoop.hbase.ServerName)4 RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)4 Path (org.apache.hadoop.fs.Path)3 ClusterMetrics (org.apache.hadoop.hbase.ClusterMetrics)3 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)3 Before (org.junit.Before)3 InetAddress (java.net.InetAddress)2 NetworkInterface (java.net.NetworkInterface)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2