Search in sources :

Example 1 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TableSnapshotInputFormatTestBase method setupCluster.

@Before
public void setupCluster() throws Exception {
    setupConf(UTIL.getConfiguration());
    StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(NUM_REGION_SERVERS).numDataNodes(NUM_REGION_SERVERS).createRootDir(true).build();
    UTIL.startMiniCluster(option);
    rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
    fs = rootDir.getFileSystem(UTIL.getConfiguration());
}
Also used : StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) Before(org.junit.Before)

Example 2 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestReplicasClient method beforeClass.

@BeforeClass
public static void beforeClass() throws Exception {
    // enable store file refreshing
    HTU.getConfiguration().setInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, REFRESH_PERIOD);
    HTU.getConfiguration().setBoolean("hbase.client.log.scanner.activity", true);
    HTU.getConfiguration().setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);
    StartTestingClusterOption option = StartTestingClusterOption.builder().numRegionServers(1).numAlwaysStandByMasters(1).numMasters(1).build();
    HTU.startMiniCluster(option);
    // Create table then get the single region for our new table.
    TableDescriptorBuilder builder = HTU.createModifyableTableDescriptor(TableName.valueOf(TestReplicasClient.class.getSimpleName()), ColumnFamilyDescriptorBuilder.DEFAULT_MIN_VERSIONS, 3, HConstants.FOREVER, ColumnFamilyDescriptorBuilder.DEFAULT_KEEP_DELETED);
    builder.setCoprocessor(SlowMeCopro.class.getName());
    TableDescriptor hdt = builder.build();
    HTU.createTable(hdt, new byte[][] { f }, null);
    TABLE_NAME = hdt.getTableName();
    try (RegionLocator locator = HTU.getConnection().getRegionLocator(hdt.getTableName())) {
        hriPrimary = locator.getRegionLocation(row, false).getRegion();
    }
    // mock a secondary region info to open
    hriSecondary = RegionReplicaUtil.getRegionInfoForReplica(hriPrimary, 1);
    // No master
    LOG.info("Master is going to be stopped");
    TestRegionServerNoMaster.stopMasterAndCacheMetaLocation(HTU);
    Configuration c = new Configuration(HTU.getConfiguration());
    c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
    LOG.info("Master has stopped");
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) BeforeClass(org.junit.BeforeClass)

Example 3 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestRollingRestart method testBasicRollingRestart.

@Test
public void testBasicRollingRestart() throws Exception {
    // Start a cluster with 2 masters and 4 regionservers
    final int NUM_MASTERS = 2;
    final int NUM_RS = 3;
    final int NUM_REGIONS_TO_CREATE = 20;
    int expectedNumRS = 3;
    // Start the cluster
    log("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    conf.setBoolean(HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK, splitWALCoordinatedByZK);
    TEST_UTIL = new HBaseTestingUtil(conf);
    StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(NUM_MASTERS).numRegionServers(NUM_RS).numDataNodes(NUM_RS).build();
    TEST_UTIL.startMiniCluster(option);
    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();
    // Create a table with regions
    final TableName tableName = TableName.valueOf(name.getMethodName().replaceAll("[\\[|\\]]", "-"));
    byte[] family = Bytes.toBytes("family");
    log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
    Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
    int numRegions = -1;
    try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
        numRegions = r.getStartKeys().length;
    }
    // catalogs
    numRegions += 1;
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Disabling table\n");
    TEST_UTIL.getAdmin().disableTable(tableName);
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    NavigableSet<String> regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
    log("Verifying only catalog region is assigned\n");
    if (regions.size() != 1) {
        for (String oregion : regions) {
            log("Region still online: " + oregion);
        }
    }
    assertEquals(1, regions.size());
    log("Enabling table\n");
    TEST_UTIL.getAdmin().enableTable(tableName);
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Verifying there are " + numRegions + " assigned on cluster\n");
    regions = HBaseTestingUtil.getAllOnlineRegions(cluster);
    assertRegionsAssigned(cluster, regions);
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
    // Add a new regionserver
    log("Adding a fourth RS");
    RegionServerThread restarted = cluster.startRegionServer();
    expectedNumRS++;
    restarted.waitForServerOnline();
    log("Additional RS is online");
    log("Waiting for no more RIT");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Verifying there are " + numRegions + " assigned on cluster");
    assertRegionsAssigned(cluster, regions);
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
    // Master Restarts
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    MasterThread activeMaster = null;
    MasterThread backupMaster = null;
    assertEquals(2, masterThreads.size());
    if (masterThreads.get(0).getMaster().isActiveMaster()) {
        activeMaster = masterThreads.get(0);
        backupMaster = masterThreads.get(1);
    } else {
        activeMaster = masterThreads.get(1);
        backupMaster = masterThreads.get(0);
    }
    // Bring down the backup master
    log("Stopping backup master\n\n");
    backupMaster.getMaster().stop("Stop of backup during rolling restart");
    cluster.hbaseCluster.waitOnMaster(backupMaster);
    // Bring down the primary master
    log("Stopping primary master\n\n");
    activeMaster.getMaster().stop("Stop of active during rolling restart");
    cluster.hbaseCluster.waitOnMaster(activeMaster);
    // Start primary master
    log("Restarting primary master\n\n");
    activeMaster = cluster.startMaster();
    cluster.waitForActiveAndReadyMaster();
    // Start backup master
    log("Restarting backup master\n\n");
    backupMaster = cluster.startMaster();
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
    // RegionServer Restarts
    // Bring them down, one at a time, waiting between each to complete
    List<RegionServerThread> regionServers = cluster.getLiveRegionServerThreads();
    int num = 1;
    int total = regionServers.size();
    for (RegionServerThread rst : regionServers) {
        ServerName serverName = rst.getRegionServer().getServerName();
        log("Stopping region server " + num + " of " + total + " [ " + serverName + "]");
        rst.getRegionServer().stop("Stopping RS during rolling restart");
        cluster.hbaseCluster.waitOnRegionServer(rst);
        log("Waiting for RS shutdown to be handled by master");
        waitForRSShutdownToStartAndFinish(activeMaster, serverName);
        log("RS shutdown done, waiting for no more RIT");
        TEST_UTIL.waitUntilNoRegionsInTransition(60000);
        log("Verifying there are " + numRegions + " assigned on cluster");
        assertRegionsAssigned(cluster, regions);
        expectedNumRS--;
        assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
        log("Restarting region server " + num + " of " + total);
        restarted = cluster.startRegionServer();
        restarted.waitForServerOnline();
        expectedNumRS++;
        log("Region server " + num + " is back online");
        log("Waiting for no more RIT");
        TEST_UTIL.waitUntilNoRegionsInTransition(60000);
        log("Verifying there are " + numRegions + " assigned on cluster");
        assertRegionsAssigned(cluster, regions);
        assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
        num++;
    }
    Thread.sleep(1000);
    assertRegionsAssigned(cluster, regions);
    // TODO: Bring random 3 of 4 RS down at the same time
    ht.close();
    // Stop the cluster
    TEST_UTIL.shutdownMiniCluster();
}
Also used : SingleProcessHBaseCluster(org.apache.hadoop.hbase.SingleProcessHBaseCluster) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) TableName(org.apache.hadoop.hbase.TableName) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) ServerName(org.apache.hadoop.hbase.ServerName) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) Test(org.junit.Test)

Example 4 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestMasterShutdown method testMasterShutdown.

/**
 * Simple test of shutdown.
 * <p>
 * Starts with three masters.  Tells the active master to shutdown the cluster.
 * Verifies that all masters are properly shutdown.
 */
@Test
public void testMasterShutdown() throws Exception {
    // Create config to use for this cluster
    Configuration conf = HBaseConfiguration.create();
    // Start the cluster
    try {
        htu = new HBaseTestingUtil(conf);
        StartTestingClusterOption option = StartTestingClusterOption.builder().numMasters(3).numRegionServers(1).numDataNodes(1).build();
        final SingleProcessHBaseCluster cluster = htu.startMiniCluster(option);
        // wait for all master thread to spawn and start their run loop.
        final long thirtySeconds = TimeUnit.SECONDS.toMillis(30);
        final long oneSecond = TimeUnit.SECONDS.toMillis(1);
        assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond, () -> {
            final List<MasterThread> masterThreads = cluster.getMasterThreads();
            return masterThreads != null && masterThreads.size() >= 3 && masterThreads.stream().allMatch(Thread::isAlive);
        }));
        // find the active master
        final HMaster active = cluster.getMaster();
        assertNotNull(active);
        // make sure the other two are backup masters
        ClusterMetrics status = active.getClusterMetrics();
        assertEquals(2, status.getBackupMasterNames().size());
        // tell the active master to shutdown the cluster
        active.shutdown();
        assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond, () -> CollectionUtils.isEmpty(cluster.getLiveMasterThreads())));
        assertNotEquals(-1, htu.waitFor(thirtySeconds, oneSecond, () -> CollectionUtils.isEmpty(cluster.getLiveRegionServerThreads())));
    } finally {
        if (htu != null) {
            htu.shutdownMiniCluster();
            htu = null;
        }
    }
}
Also used : SingleProcessHBaseCluster(org.apache.hadoop.hbase.SingleProcessHBaseCluster) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ClusterMetrics(org.apache.hadoop.hbase.ClusterMetrics) List(java.util.List) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) Test(org.junit.Test)

Example 5 with StartTestingClusterOption

use of org.apache.hadoop.hbase.StartTestingClusterOption in project hbase by apache.

the class TestMasterShutdown method testMasterShutdownBeforeStartingAnyRegionServer.

/**
 * This test appears to be an intentional race between a thread that issues a shutdown RPC to the
 * master, while the master is concurrently realizing it cannot initialize because there are no
 * region servers available to it. The expected behavior is that master initialization is
 * interruptable via said shutdown RPC.
 */
@Test
public void testMasterShutdownBeforeStartingAnyRegionServer() throws Exception {
    LocalHBaseCluster hbaseCluster = null;
    try {
        htu = new HBaseTestingUtil(createMasterShutdownBeforeStartingAnyRegionServerConfiguration());
        // configure a cluster with
        final StartTestingClusterOption options = StartTestingClusterOption.builder().numDataNodes(1).numMasters(1).numRegionServers(0).masterClass(HMaster.class).rsClass(SingleProcessHBaseCluster.MiniHBaseClusterRegionServer.class).createRootDir(true).build();
        // Can't simply `htu.startMiniCluster(options)` because that method waits for the master to
        // start completely. However, this test's premise is that a partially started master should
        // still respond to a shutdown RPC. So instead, we manage each component lifecycle
        // independently.
        // I think it's not worth refactoring HTU's helper methods just for this class.
        htu.startMiniDFSCluster(options.getNumDataNodes());
        htu.startMiniZKCluster(options.getNumZkServers());
        htu.createRootDir();
        hbaseCluster = new LocalHBaseCluster(htu.getConfiguration(), options.getNumMasters(), options.getNumRegionServers(), options.getMasterClass(), options.getRsClass());
        final MasterThread masterThread = hbaseCluster.getMasters().get(0);
        masterThread.start();
        // Switching to master registry exacerbated a race in the master bootstrap that can result
        // in a lost shutdown command (HBASE-8422, HBASE-23836). The race is essentially because
        // the server manager in HMaster is not initialized by the time shutdown() RPC (below) is
        // made to the master. The suspected reason as to why it was uncommon before HBASE-18095
        // is because the connection creation with ZK registry is so slow that by then the server
        // manager is usually init'ed in time for the RPC to be made. For now, adding an explicit
        // wait() in the test, waiting for the server manager to become available.
        final long timeout = TimeUnit.MINUTES.toMillis(10);
        assertNotEquals("timeout waiting for server manager to become available.", -1, htu.waitFor(timeout, () -> masterThread.getMaster().getServerManager() != null));
        // Master has come up far enough that we can terminate it without creating a zombie.
        try {
            // HBASE-24327 : (Resolve Flaky connection issues)
            // shutdown() RPC can have flaky ZK connection issues.
            // e.g
            // ERROR [RpcServer.priority.RWQ.Fifo.read.handler=1,queue=1,port=53033]
            // master.HMaster(2878): ZooKeeper exception trying to set cluster as down in ZK
            // org.apache.zookeeper.KeeperException$SystemErrorException:
            // KeeperErrorCode = SystemError
            // 
            // However, even when above flakes happen, shutdown call does get completed even if
            // RPC call has failure. Hence, subsequent retries will never succeed as HMaster is
            // already shutdown. Hence, it can fail. To resolve it, after making one shutdown()
            // call, we are ignoring IOException.
            htu.getConnection().getAdmin().shutdown();
        } catch (RetriesExhaustedException e) {
            if (e.getCause() instanceof ConnectionClosedException) {
                LOG.info("Connection is Closed to the cluster. The cluster is already down.", e);
            } else {
                throw e;
            }
        }
        LOG.info("Shutdown RPC sent.");
        masterThread.join();
    } finally {
        if (hbaseCluster != null) {
            hbaseCluster.shutdown();
        }
        if (htu != null) {
            htu.shutdownMiniCluster();
            htu = null;
        }
    }
}
Also used : SingleProcessHBaseCluster(org.apache.hadoop.hbase.SingleProcessHBaseCluster) RetriesExhaustedException(org.apache.hadoop.hbase.client.RetriesExhaustedException) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) ConnectionClosedException(org.apache.hadoop.hbase.exceptions.ConnectionClosedException) LocalHBaseCluster(org.apache.hadoop.hbase.LocalHBaseCluster) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) StartTestingClusterOption(org.apache.hadoop.hbase.StartTestingClusterOption) Test(org.junit.Test)

Aggregations

StartTestingClusterOption (org.apache.hadoop.hbase.StartTestingClusterOption)42 BeforeClass (org.junit.BeforeClass)21 HBaseTestingUtil (org.apache.hadoop.hbase.HBaseTestingUtil)13 Test (org.junit.Test)13 Configuration (org.apache.hadoop.conf.Configuration)10 SingleProcessHBaseCluster (org.apache.hadoop.hbase.SingleProcessHBaseCluster)8 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 TableName (org.apache.hadoop.hbase.TableName)5 Table (org.apache.hadoop.hbase.client.Table)5 MasterThread (org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread)5 ServerName (org.apache.hadoop.hbase.ServerName)4 RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)4 Path (org.apache.hadoop.fs.Path)3 ClusterMetrics (org.apache.hadoop.hbase.ClusterMetrics)3 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)3 Before (org.junit.Before)3 InetAddress (java.net.InetAddress)2 NetworkInterface (java.net.NetworkInterface)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2