Search in sources :

Example 26 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method populateDataInTable.

void populateDataInTable(int nrows, String fname) throws Exception {
    byte[] family = Bytes.toBytes(fname);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    assertEquals(NUM_RS, rsts.size());
    for (RegionServerThread rst : rsts) {
        HRegionServer hrs = rst.getRegionServer();
        List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
        for (HRegionInfo hri : hris) {
            if (hri.getTable().isSystemTable()) {
                continue;
            }
            LOG.debug("adding data to rs = " + rst.getName() + " region = " + hri.getRegionNameAsString());
            Region region = hrs.getOnlineRegion(hri.getRegionName());
            assertTrue(region != null);
            putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
        }
    }
    for (MasterThread mt : cluster.getLiveMasterThreads()) {
        HRegionServer hrs = mt.getMaster();
        List<HRegionInfo> hris;
        try {
            hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
        } catch (ServerNotRunningYetException e) {
            // It's ok: this master may be a backup. Ignored.
            continue;
        }
        for (HRegionInfo hri : hris) {
            if (hri.getTable().isSystemTable()) {
                continue;
            }
            LOG.debug("adding data to rs = " + mt.getName() + " region = " + hri.getRegionNameAsString());
            Region region = hrs.getOnlineRegion(hri.getRegionName());
            assertTrue(region != null);
            putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
        }
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) Region(org.apache.hadoop.hbase.regionserver.Region) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer)

Example 27 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testLogReplayTwoSequentialRSDown.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayTwoSequentialRSDown() throws Exception {
    LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        List<HRegionInfo> regions = null;
        HRegionServer hrs1 = findRSToKill(false, "table");
        regions = ProtobufUtil.getOnlineRegions(hrs1.getRSRpcServices());
        makeWAL(hrs1, regions, "table", "family", NUM_LOG_LINES, 100);
        // abort RS1
        LOG.info("Aborting region server: " + hrs1.getServerName());
        hrs1.abort("testing");
        // wait for abort completes
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
            }
        });
        // wait for regions come online
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
            }
        });
        // sleep a little bit in order to interrupt recovering in the middle
        Thread.sleep(300);
        // abort second region server
        rsts = cluster.getLiveRegionServerThreads();
        HRegionServer hrs2 = rsts.get(0).getRegionServer();
        LOG.info("Aborting one more region server: " + hrs2.getServerName());
        hrs2.abort("testing");
        // wait for abort completes
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
            }
        });
        // wait for regions come online
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
            }
        });
        // wait for all regions are fully recovered
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
                return (recoveringRegions != null && recoveringRegions.isEmpty());
            }
        });
        assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 28 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testMarkRegionsRecoveringInZK.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMarkRegionsRecoveringInZK() throws Exception {
    LOG.info("testMarkRegionsRecoveringInZK");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = master.getZooKeeper();
    Table ht = installTable(zkw, "table", "family", 40);
    try {
        final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
        Set<HRegionInfo> regionSet = new HashSet<>();
        HRegionInfo region = null;
        HRegionServer hrs = null;
        ServerName firstFailedServer = null;
        ServerName secondFailedServer = null;
        for (int i = 0; i < NUM_RS; i++) {
            hrs = rsts.get(i).getRegionServer();
            List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            if (regions.isEmpty())
                continue;
            region = regions.get(0);
            regionSet.add(region);
            firstFailedServer = hrs.getServerName();
            secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
            break;
        }
        slm.markRegionsRecovering(firstFailedServer, regionSet);
        slm.markRegionsRecovering(secondFailedServer, regionSet);
        List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw, ZKUtil.joinZNode(zkw.znodePaths.recoveringRegionsZNode, region.getEncodedName()));
        assertEquals(recoveringRegions.size(), 2);
        // wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
        final HRegionServer tmphrs = hrs;
        TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (tmphrs.getRecoveringRegions().isEmpty());
            }
        });
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ServerName(org.apache.hadoop.hbase.ServerName) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) HashSet(java.util.HashSet) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 29 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testSameVersionUpdatesRecovery.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testSameVersionUpdatesRecovery() throws Exception {
    LOG.info("testSameVersionUpdatesRecovery");
    conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    final AtomicLong sequenceId = new AtomicLong(100);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, name.getMethodName(), "family", NUM_REGIONS_TO_CREATE);
    try {
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        for (int i = 0; i < NUM_RS; i++) {
            boolean isCarryingMeta = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.isMetaRegion()) {
                    isCarryingMeta = true;
                    break;
                }
            }
            if (isCarryingMeta) {
                continue;
            }
            break;
        }
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.isMetaTable() || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
                it.remove();
            }
        }
        if (regions.isEmpty())
            return;
        HRegionInfo curRegionInfo = regions.get(0);
        byte[] startRow = curRegionInfo.getStartKey();
        if (startRow == null || startRow.length == 0) {
            startRow = new byte[] { 0, 0, 0, 0, 1 };
        }
        byte[] row = Bytes.incrementBytes(startRow, 1);
        // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
        row = Arrays.copyOfRange(row, 3, 8);
        long value = 0;
        TableName tableName = TableName.valueOf(name.getMethodName());
        byte[] family = Bytes.toBytes("family");
        byte[] qualifier = Bytes.toBytes("c1");
        long timeStamp = System.currentTimeMillis();
        HTableDescriptor htd = new HTableDescriptor(tableName);
        htd.addFamily(new HColumnDescriptor(family));
        final WAL wal = hrs.getWAL(curRegionInfo);
        for (int i = 0; i < NUM_LOG_LINES; i += 1) {
            WALEdit e = new WALEdit();
            value++;
            e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
            wal.append(curRegionInfo, new WALKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()), e, true);
        }
        wal.sync();
        wal.shutdown();
        // wait for abort completes
        this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
        // verify we got the last value
        LOG.info("Verification Starts...");
        Get g = new Get(row);
        Result r = ht.get(g);
        long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
        assertEquals(value, theStoredVal);
        // after flush
        LOG.info("Verification after flush...");
        TEST_UTIL.getAdmin().flush(tableName);
        r = ht.get(g);
        theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
        assertEquals(value, theStoredVal);
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) WAL(org.apache.hadoop.hbase.wal.WAL) KeyValue(org.apache.hadoop.hbase.KeyValue) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Result(org.apache.hadoop.hbase.client.Result) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) WALKey(org.apache.hadoop.hbase.wal.WALKey) TableName(org.apache.hadoop.hbase.TableName) AtomicLong(java.util.concurrent.atomic.AtomicLong) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) Get(org.apache.hadoop.hbase.client.Get) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 30 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestSnapshotFromMaster method testSnapshotHFileArchiving.

/**
   * Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots
   * should be retained, while those that are not in a snapshot should be deleted.
   * @throws Exception on failure
   */
@Test(timeout = 300000)
public void testSnapshotHFileArchiving() throws Exception {
    Admin admin = UTIL.getAdmin();
    // make sure we don't fail on listing snapshots
    SnapshotTestingUtils.assertNoSnapshots(admin);
    // recreate test table with disabled compactions; otherwise compaction may happen before
    // snapshot, the call after snapshot will be a no-op and checks will fail
    UTIL.deleteTable(TABLE_NAME);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    htd.setCompactionEnabled(false);
    UTIL.createTable(htd, new byte[][] { TEST_FAM }, null);
    // load the table
    for (int i = 0; i < blockingStoreFiles / 2; i++) {
        UTIL.loadTable(UTIL.getConnection().getTable(TABLE_NAME), TEST_FAM);
        UTIL.flush(TABLE_NAME);
    }
    // disable the table so we can take a snapshot
    admin.disableTable(TABLE_NAME);
    htd.setCompactionEnabled(true);
    // take a snapshot of the table
    String snapshotName = "snapshot";
    byte[] snapshotNameBytes = Bytes.toBytes(snapshotName);
    admin.snapshot(snapshotNameBytes, TABLE_NAME);
    LOG.info("After snapshot File-System state");
    FSUtils.logFileSystemState(fs, rootDir, LOG);
    // ensure we only have one snapshot
    SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshotNameBytes, TABLE_NAME);
    // enable compactions now
    admin.modifyTable(TABLE_NAME, htd);
    // renable the table so we can compact the regions
    admin.enableTable(TABLE_NAME);
    // compact the files so we get some archived files for the table we just snapshotted
    List<HRegion> regions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
    for (HRegion region : regions) {
        // enable can trigger a compaction, wait for it.
        region.waitForFlushesAndCompactions();
        // min is 2 so will compact and archive
        region.compactStores();
    }
    List<RegionServerThread> regionServerThreads = UTIL.getMiniHBaseCluster().getRegionServerThreads();
    HRegionServer hrs = null;
    for (RegionServerThread rs : regionServerThreads) {
        if (!rs.getRegionServer().getOnlineRegions(TABLE_NAME).isEmpty()) {
            hrs = rs.getRegionServer();
            break;
        }
    }
    CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null, hrs, false);
    cleaner.chore();
    LOG.info("After compaction File-System state");
    FSUtils.logFileSystemState(fs, rootDir, LOG);
    // make sure the cleaner has run
    LOG.debug("Running hfile cleaners");
    ensureHFileCleanersRun();
    LOG.info("After cleaners File-System state: " + rootDir);
    FSUtils.logFileSystemState(fs, rootDir, LOG);
    // get the snapshot files for the table
    Path snapshotTable = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
    Set<String> snapshotHFiles = SnapshotReferenceUtil.getHFileNames(UTIL.getConfiguration(), fs, snapshotTable);
    // check that the files in the archive contain the ones that we need for the snapshot
    LOG.debug("Have snapshot hfiles:");
    for (String fileName : snapshotHFiles) {
        LOG.debug(fileName);
    }
    // get the archived files for the table
    Collection<String> archives = getHFiles(archiveDir, fs, TABLE_NAME);
    // get the hfiles for the table
    Collection<String> hfiles = getHFiles(rootDir, fs, TABLE_NAME);
    // and make sure that there is a proper subset
    for (String fileName : snapshotHFiles) {
        boolean exist = archives.contains(fileName) || hfiles.contains(fileName);
        assertTrue("Archived hfiles " + archives + " and table hfiles " + hfiles + " is missing snapshot file:" + fileName, exist);
    }
    // delete the existing snapshot
    admin.deleteSnapshot(snapshotNameBytes);
    SnapshotTestingUtils.assertNoSnapshots(admin);
    // make sure that we don't keep around the hfiles that aren't in a snapshot
    // make sure we wait long enough to refresh the snapshot hfile
    List<BaseHFileCleanerDelegate> delegates = UTIL.getMiniHBaseCluster().getMaster().getHFileCleaner().cleanersChain;
    for (BaseHFileCleanerDelegate delegate : delegates) {
        if (delegate instanceof SnapshotHFileCleaner) {
            ((SnapshotHFileCleaner) delegate).getFileCacheForTesting().triggerCacheRefreshForTesting();
        }
    }
    // run the cleaner again
    LOG.debug("Running hfile cleaners");
    ensureHFileCleanersRun();
    LOG.info("After delete snapshot cleaners run File-System state");
    FSUtils.logFileSystemState(fs, rootDir, LOG);
    archives = getHFiles(archiveDir, fs, TABLE_NAME);
    assertEquals("Still have some hfiles in the archive, when their snapshot has been deleted.", 0, archives.size());
}
Also used : Path(org.apache.hadoop.fs.Path) SnapshotHFileCleaner(org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner) Admin(org.apache.hadoop.hbase.client.Admin) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) CompactedHFilesDischarger(org.apache.hadoop.hbase.regionserver.CompactedHFilesDischarger) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Test(org.junit.Test)

Aggregations

RegionServerThread (org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)34 Test (org.junit.Test)24 Table (org.apache.hadoop.hbase.client.Table)22 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)15 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)14 IOException (java.io.IOException)13 ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)12 Ignore (org.junit.Ignore)11 TableName (org.apache.hadoop.hbase.TableName)9 Waiter (org.apache.hadoop.hbase.Waiter)8 Result (org.apache.hadoop.hbase.client.Result)8 OperationConflictException (org.apache.hadoop.hbase.exceptions.OperationConflictException)8 ServerNotRunningYetException (org.apache.hadoop.hbase.ipc.ServerNotRunningYetException)8 ArrayList (java.util.ArrayList)7 TimeoutException (java.util.concurrent.TimeoutException)7 RetriesExhaustedWithDetailsException (org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException)7 RegionInRecoveryException (org.apache.hadoop.hbase.exceptions.RegionInRecoveryException)7 Path (org.apache.hadoop.fs.Path)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 ServerName (org.apache.hadoop.hbase.ServerName)5