Search in sources :

Example 1 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestRollingRestart method getDoubleAssignedRegions.

private NavigableSet<String> getDoubleAssignedRegions(MiniHBaseCluster cluster) throws IOException {
    NavigableSet<String> online = new TreeSet<>();
    NavigableSet<String> doubled = new TreeSet<>();
    for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
        for (HRegionInfo region : ProtobufUtil.getOnlineRegions(rst.getRegionServer().getRSRpcServices())) {
            if (!online.add(region.getRegionNameAsString())) {
                doubled.add(region.getRegionNameAsString());
            }
        }
    }
    return doubled;
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TreeSet(java.util.TreeSet) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)

Example 2 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestRollingRestart method testBasicRollingRestart.

@Test(timeout = 500000)
public void testBasicRollingRestart() throws Exception {
    // Start a cluster with 2 masters and 4 regionservers
    final int NUM_MASTERS = 2;
    final int NUM_RS = 3;
    final int NUM_REGIONS_TO_CREATE = 20;
    int expectedNumRS = 3;
    // Start the cluster
    log("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();
    // Create a table with regions
    final TableName tableName = TableName.valueOf(name.getMethodName());
    byte[] family = Bytes.toBytes("family");
    log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
    Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
    int numRegions = -1;
    try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
        numRegions = r.getStartKeys().length;
    }
    // catalogs
    numRegions += 1;
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Disabling table\n");
    TEST_UTIL.getAdmin().disableTable(tableName);
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
    log("Verifying only catalog and namespace regions are assigned\n");
    if (regions.size() != 2) {
        for (String oregion : regions) log("Region still online: " + oregion);
    }
    assertEquals(2, regions.size());
    log("Enabling table\n");
    TEST_UTIL.getAdmin().enableTable(tableName);
    log("Waiting for no more RIT\n");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Verifying there are " + numRegions + " assigned on cluster\n");
    regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
    assertRegionsAssigned(cluster, regions);
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
    // Add a new regionserver
    log("Adding a fourth RS");
    RegionServerThread restarted = cluster.startRegionServer();
    expectedNumRS++;
    restarted.waitForServerOnline();
    log("Additional RS is online");
    log("Waiting for no more RIT");
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    log("Verifying there are " + numRegions + " assigned on cluster");
    assertRegionsAssigned(cluster, regions);
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
    // Master Restarts
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    MasterThread activeMaster = null;
    MasterThread backupMaster = null;
    assertEquals(2, masterThreads.size());
    if (masterThreads.get(0).getMaster().isActiveMaster()) {
        activeMaster = masterThreads.get(0);
        backupMaster = masterThreads.get(1);
    } else {
        activeMaster = masterThreads.get(1);
        backupMaster = masterThreads.get(0);
    }
    // Bring down the backup master
    log("Stopping backup master\n\n");
    backupMaster.getMaster().stop("Stop of backup during rolling restart");
    cluster.hbaseCluster.waitOnMaster(backupMaster);
    // Bring down the primary master
    log("Stopping primary master\n\n");
    activeMaster.getMaster().stop("Stop of active during rolling restart");
    cluster.hbaseCluster.waitOnMaster(activeMaster);
    // Start primary master
    log("Restarting primary master\n\n");
    activeMaster = cluster.startMaster();
    cluster.waitForActiveAndReadyMaster();
    // Start backup master
    log("Restarting backup master\n\n");
    backupMaster = cluster.startMaster();
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
    // RegionServer Restarts
    // Bring them down, one at a time, waiting between each to complete
    List<RegionServerThread> regionServers = cluster.getLiveRegionServerThreads();
    int num = 1;
    int total = regionServers.size();
    for (RegionServerThread rst : regionServers) {
        ServerName serverName = rst.getRegionServer().getServerName();
        log("Stopping region server " + num + " of " + total + " [ " + serverName + "]");
        rst.getRegionServer().stop("Stopping RS during rolling restart");
        cluster.hbaseCluster.waitOnRegionServer(rst);
        log("Waiting for RS shutdown to be handled by master");
        waitForRSShutdownToStartAndFinish(activeMaster, serverName);
        log("RS shutdown done, waiting for no more RIT");
        TEST_UTIL.waitUntilNoRegionsInTransition(60000);
        log("Verifying there are " + numRegions + " assigned on cluster");
        assertRegionsAssigned(cluster, regions);
        expectedNumRS--;
        assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
        log("Restarting region server " + num + " of " + total);
        restarted = cluster.startRegionServer();
        restarted.waitForServerOnline();
        expectedNumRS++;
        log("Region server " + num + " is back online");
        log("Waiting for no more RIT");
        TEST_UTIL.waitUntilNoRegionsInTransition(60000);
        log("Verifying there are " + numRegions + " assigned on cluster");
        assertRegionsAssigned(cluster, regions);
        assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
        num++;
    }
    Thread.sleep(1000);
    assertRegionsAssigned(cluster, regions);
    // TODO: Bring random 3 of 4 RS down at the same time
    ht.close();
    // Stop the cluster
    TEST_UTIL.shutdownMiniCluster();
}
Also used : RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) MiniHBaseCluster(org.apache.hadoop.hbase.MiniHBaseCluster) TableName(org.apache.hadoop.hbase.TableName) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) ServerName(org.apache.hadoop.hbase.ServerName) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Test(org.junit.Test)

Example 3 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestReplicationEndpoint method setup.

@Before
public void setup() throws Exception {
    ReplicationEndpointForTest.contructedCount.set(0);
    ReplicationEndpointForTest.startedCount.set(0);
    ReplicationEndpointForTest.replicateCount.set(0);
    ReplicationEndpointReturningFalse.replicated.set(false);
    ReplicationEndpointForTest.lastEntries = null;
    final List<RegionServerThread> rsThreads = utility1.getMiniHBaseCluster().getRegionServerThreads();
    for (RegionServerThread rs : rsThreads) {
        utility1.getAdmin().rollWALWriter(rs.getRegionServer().getServerName());
    }
    // Wait for  all log roll to finish
    utility1.waitFor(3000, new Waiter.ExplainingPredicate<Exception>() {

        @Override
        public boolean evaluate() throws Exception {
            for (RegionServerThread rs : rsThreads) {
                if (!rs.getRegionServer().walRollRequestFinished()) {
                    return false;
                }
            }
            return true;
        }

        @Override
        public String explainFailure() throws Exception {
            List<String> logRollInProgressRsList = new ArrayList<>();
            for (RegionServerThread rs : rsThreads) {
                if (!rs.getRegionServer().walRollRequestFinished()) {
                    logRollInProgressRsList.add(rs.getRegionServer().toString());
                }
            }
            return "Still waiting for log roll on regionservers: " + logRollInProgressRsList;
        }
    });
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) IOException(java.io.IOException) Before(org.junit.Before)

Example 4 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testLogReplayForDisablingTable.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayForDisablingTable() throws Exception {
    LOG.info("testLogReplayForDisablingTable");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
    try {
        // turn off load balancing to prevent regions from moving around otherwise
        // they will consume recovered.edits
        master.balanceSwitch(false);
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        boolean hasRegionsForBothTables = false;
        String tableName = null;
        for (int i = 0; i < NUM_RS; i++) {
            tableName = null;
            hasRegionsForBothTables = false;
            boolean isCarryingSystem = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.getTable().isSystemTable()) {
                    isCarryingSystem = true;
                    break;
                }
                if (tableName != null && !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
                    // make sure that we find a RS has online regions for both "table" and "disableTable"
                    hasRegionsForBothTables = true;
                    break;
                } else if (tableName == null) {
                    tableName = region.getTable().getNameAsString();
                }
            }
            if (isCarryingSystem) {
                continue;
            }
            if (hasRegionsForBothTables) {
                break;
            }
        }
        // make sure we found a good RS
        Assert.assertTrue(hasRegionsForBothTables);
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.isMetaTable()) {
                it.remove();
            }
        }
        makeWAL(hrs, regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        LOG.info("Disabling table\n");
        TEST_UTIL.getAdmin().disableTable(TableName.valueOf(name.getMethodName()));
        TEST_UTIL.waitTableDisabled(TableName.valueOf(name.getMethodName()).getName());
        // abort RS
        LOG.info("Aborting region server: " + hrs.getServerName());
        hrs.abort("testing");
        // wait for abort completes
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
            }
        });
        // wait for regions come online
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
            }
        });
        // wait for all regions are fully recovered
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
                ServerManager serverManager = master.getServerManager();
                return (!serverManager.areDeadServersInProgress() && recoveringRegions != null && recoveringRegions.isEmpty());
            }
        });
        int count = 0;
        FileSystem fs = master.getMasterFileSystem().getFileSystem();
        Path rootdir = FSUtils.getRootDir(conf);
        Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf(name.getMethodName()));
        for (HRegionInfo hri : regions) {
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            LOG.debug("checking edits dir " + editsdir);
            if (!fs.exists(editsdir))
                continue;
            FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitter.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            if (files != null) {
                for (FileStatus file : files) {
                    int c = countWAL(file.getPath(), fs, conf);
                    count += c;
                    LOG.info(c + " edits in " + file.getPath());
                }
            }
        }
        LOG.info("Verify edits in recovered.edits files");
        assertEquals(NUM_LOG_LINES, count);
        LOG.info("Verify replayed edits");
        assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
        // clean up
        for (HRegionInfo hri : regions) {
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            fs.delete(editsdir, true);
        }
        disablingHT.close();
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 5 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testSameVersionUpdatesRecoveryWithCompaction.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testSameVersionUpdatesRecoveryWithCompaction() throws Exception {
    LOG.info("testSameVersionUpdatesRecoveryWithWrites");
    conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 30 * 1024);
    conf.setInt("hbase.hstore.compactionThreshold", 3);
    startCluster(NUM_RS);
    final AtomicLong sequenceId = new AtomicLong(100);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 2000;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        for (int i = 0; i < NUM_RS; i++) {
            boolean isCarryingMeta = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.isMetaRegion()) {
                    isCarryingMeta = true;
                    break;
                }
            }
            if (isCarryingMeta) {
                continue;
            }
            break;
        }
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.isMetaTable() || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
                it.remove();
            }
        }
        if (regions.isEmpty())
            return;
        HRegionInfo curRegionInfo = regions.get(0);
        byte[] startRow = curRegionInfo.getStartKey();
        if (startRow == null || startRow.length == 0) {
            startRow = new byte[] { 0, 0, 0, 0, 1 };
        }
        byte[] row = Bytes.incrementBytes(startRow, 1);
        // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
        row = Arrays.copyOfRange(row, 3, 8);
        long value = 0;
        final TableName tableName = TableName.valueOf(name.getMethodName());
        byte[] family = Bytes.toBytes("family");
        byte[] qualifier = Bytes.toBytes("c1");
        long timeStamp = System.currentTimeMillis();
        HTableDescriptor htd = new HTableDescriptor(tableName);
        htd.addFamily(new HColumnDescriptor(family));
        final WAL wal = hrs.getWAL(curRegionInfo);
        for (int i = 0; i < NUM_LOG_LINES; i += 1) {
            WALEdit e = new WALEdit();
            value++;
            e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
            wal.append(curRegionInfo, new WALKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()), e, true);
        }
        wal.sync();
        wal.shutdown();
        // wait for abort completes
        this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
        // verify we got the last value
        LOG.info("Verification Starts...");
        Get g = new Get(row);
        Result r = ht.get(g);
        long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
        assertEquals(value, theStoredVal);
        // after flush & compaction
        LOG.info("Verification after flush...");
        TEST_UTIL.getAdmin().flush(tableName);
        TEST_UTIL.getAdmin().compact(tableName);
        // wait for compaction completes
        TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (TEST_UTIL.getAdmin().getCompactionState(tableName) == CompactionState.NONE);
            }
        });
        r = ht.get(g);
        theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
        assertEquals(value, theStoredVal);
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) KeyValue(org.apache.hadoop.hbase.KeyValue) Result(org.apache.hadoop.hbase.client.Result) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) Table(org.apache.hadoop.hbase.client.Table) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) TableName(org.apache.hadoop.hbase.TableName) AtomicLong(java.util.concurrent.atomic.AtomicLong) Get(org.apache.hadoop.hbase.client.Get) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

RegionServerThread (org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)34 Test (org.junit.Test)24 Table (org.apache.hadoop.hbase.client.Table)22 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)15 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)14 IOException (java.io.IOException)13 ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)12 Ignore (org.junit.Ignore)11 TableName (org.apache.hadoop.hbase.TableName)9 Waiter (org.apache.hadoop.hbase.Waiter)8 Result (org.apache.hadoop.hbase.client.Result)8 OperationConflictException (org.apache.hadoop.hbase.exceptions.OperationConflictException)8 ServerNotRunningYetException (org.apache.hadoop.hbase.ipc.ServerNotRunningYetException)8 ArrayList (java.util.ArrayList)7 TimeoutException (java.util.concurrent.TimeoutException)7 RetriesExhaustedWithDetailsException (org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException)7 RegionInRecoveryException (org.apache.hadoop.hbase.exceptions.RegionInRecoveryException)7 Path (org.apache.hadoop.fs.Path)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 ServerName (org.apache.hadoop.hbase.ServerName)5