Search in sources :

Example 6 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testRecoveredEdits.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testRecoveredEdits() throws Exception {
    LOG.info("testRecoveredEdits");
    // create more than one wal
    conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
    startCluster(NUM_RS);
    final int NUM_LOG_LINES = 1000;
    final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    Path rootdir = FSUtils.getRootDir(conf);
    Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40);
    try {
        TableName table = t.getName();
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        for (int i = 0; i < NUM_RS; i++) {
            boolean foundRs = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
                    foundRs = true;
                    break;
                }
            }
            if (foundRs)
                break;
        }
        final Path logDir = new Path(rootdir, AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.getTable().getNamespaceAsString().equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
                it.remove();
            }
        }
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        slm.splitLogDistributed(logDir);
        int count = 0;
        for (HRegionInfo hri : regions) {
            Path tdir = FSUtils.getTableDir(rootdir, table);
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            LOG.debug("checking edits dir " + editsdir);
            FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitter.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            assertTrue("edits dir should have more than a single file in it. instead has " + files.length, files.length > 1);
            for (int i = 0; i < files.length; i++) {
                int c = countWAL(files[i].getPath(), fs, conf);
                count += c;
            }
            LOG.info(count + " edits in " + files.length + " recovered edits files.");
        }
        // check that the log file is moved
        assertFalse(fs.exists(logDir));
        assertEquals(NUM_LOG_LINES, count);
    } finally {
        if (t != null)
            t.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 7 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testWorkerAbort.

/**
   * The original intention of this test was to force an abort of a region
   * server and to make sure that the failure path in the region servers is
   * properly evaluated. But it is difficult to ensure that the region server
   * doesn't finish the log splitting before it aborts. Also now, there is
   * this code path where the master will preempt the region server when master
   * detects that the region server has aborted.
   * @throws Exception
   */
@Ignore("Disabled because flakey")
@Test(timeout = 300000)
public void testWorkerAbort() throws Exception {
    LOG.info("testWorkerAbort");
    startCluster(3);
    final int NUM_LOG_LINES = 10000;
    final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    HRegionServer hrs = findRSToKill(false, "table");
    Path rootdir = FSUtils.getRootDir(conf);
    final Path logDir = new Path(rootdir, AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
    Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40);
    try {
        makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()), "table", "family", NUM_LOG_LINES, 100);
        new Thread() {

            @Override
            public void run() {
                waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
                for (RegionServerThread rst : rsts) {
                    rst.getRegionServer().abort("testing");
                    break;
                }
            }
        }.start();
        // slm.splitLogDistributed(logDir);
        FileStatus[] logfiles = fs.listStatus(logDir);
        TaskBatch batch = new TaskBatch();
        slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
        //waitForCounter but for one of the 2 counters
        long curt = System.currentTimeMillis();
        long waitTime = 80000;
        long endt = curt + waitTime;
        while (curt < endt) {
            if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() + tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() + tot_wkr_preempt_task.get()) == 0) {
                Thread.yield();
                curt = System.currentTimeMillis();
            } else {
                assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() + tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() + tot_wkr_preempt_task.get()));
                return;
            }
        }
        fail("none of the following counters went up in " + waitTime + " milliseconds - " + "tot_wkr_task_resigned, tot_wkr_task_err, " + "tot_wkr_final_transition_failed, tot_wkr_task_done, " + "tot_wkr_preempt_task");
    } finally {
        if (t != null)
            t.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) TaskBatch(org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 8 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testNonceRecovery.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testNonceRecovery() throws Exception {
    LOG.info("testNonceRecovery");
    final String TABLE_NAME = "table";
    final String FAMILY_NAME = "family";
    final int NUM_REGIONS_TO_CREATE = 40;
    conf.setLong("hbase.regionserver.hlog.blocksize", 100 * 1024);
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    master.balanceSwitch(false);
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, TABLE_NAME, FAMILY_NAME, NUM_REGIONS_TO_CREATE);
    NonceGeneratorWithDups ng = new NonceGeneratorWithDups();
    NonceGenerator oldNg = ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection) TEST_UTIL.getConnection(), ng);
    try {
        List<Increment> reqs = new ArrayList<>();
        for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
            HRegionServer hrs = rst.getRegionServer();
            List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo hri : hris) {
                if (TABLE_NAME.equalsIgnoreCase(hri.getTable().getNameAsString())) {
                    byte[] key = hri.getStartKey();
                    if (key == null || key.length == 0) {
                        key = Bytes.copy(hri.getEndKey());
                        --(key[key.length - 1]);
                    }
                    Increment incr = new Increment(key);
                    incr.addColumn(Bytes.toBytes(FAMILY_NAME), Bytes.toBytes("q"), 1);
                    ht.increment(incr);
                    reqs.add(incr);
                }
            }
        }
        HRegionServer hrs = findRSToKill(false, "table");
        abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
        ng.startDups();
        for (Increment incr : reqs) {
            try {
                ht.increment(incr);
                fail("should have thrown");
            } catch (OperationConflictException ope) {
                LOG.debug("Caught as expected: " + ope.getMessage());
            }
        }
    } finally {
        ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection) TEST_UTIL.getConnection(), oldNg);
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ArrayList(java.util.ArrayList) NonceGenerator(org.apache.hadoop.hbase.client.NonceGenerator) PerClientRandomNonceGenerator(org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) Increment(org.apache.hadoop.hbase.client.Increment) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 9 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testReplayCmd.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testReplayCmd() throws Exception {
    LOG.info("testReplayCmd");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        for (int i = 0; i < NUM_RS; i++) {
            boolean isCarryingMeta = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.isMetaRegion()) {
                    isCarryingMeta = true;
                    break;
                }
            }
            if (isCarryingMeta) {
                continue;
            }
            if (regions.size() > 0)
                break;
        }
        this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
        String originalCheckSum = TEST_UTIL.checksumRows(ht);
        // abort RA and trigger replay
        abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
        assertEquals("Data should remain after reopening of regions", originalCheckSum, TEST_UTIL.checksumRows(ht));
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Table(org.apache.hadoop.hbase.client.Table) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 10 with RegionServerThread

use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.

the class TestDistributedLogSplitting method testDisallowWritesInRecovering.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testDisallowWritesInRecovering() throws Exception {
    LOG.info("testDisallowWritesInRecovering");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
    conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
        Set<HRegionInfo> regionSet = new HashSet<>();
        HRegionInfo region = null;
        HRegionServer hrs = null;
        HRegionServer dstRS = null;
        for (int i = 0; i < NUM_RS; i++) {
            hrs = rsts.get(i).getRegionServer();
            List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            if (regions.isEmpty())
                continue;
            region = regions.get(0);
            regionSet.add(region);
            dstRS = rsts.get((i + 1) % NUM_RS).getRegionServer();
            break;
        }
        slm.markRegionsRecovering(hrs.getServerName(), regionSet);
        // move region in order for the region opened in recovering state
        final HRegionInfo hri = region;
        final HRegionServer tmpRS = dstRS;
        TEST_UTIL.getAdmin().move(region.getEncodedNameAsBytes(), Bytes.toBytes(dstRS.getServerName().getServerName()));
        // wait for region move completes
        final RegionStates regionStates = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
        TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                ServerName sn = regionStates.getRegionServerOfRegion(hri);
                return (sn != null && sn.equals(tmpRS.getServerName()));
            }
        });
        try {
            byte[] key = region.getStartKey();
            if (key == null || key.length == 0) {
                key = new byte[] { 0, 0, 0, 0, 1 };
            }
            Put put = new Put(key);
            put.addColumn(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[] { 'b' });
            ht.put(put);
        } catch (IOException ioe) {
            Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
            RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
            boolean foundRegionInRecoveryException = false;
            for (Throwable t : re.getCauses()) {
                if (t instanceof RegionInRecoveryException) {
                    foundRegionInRecoveryException = true;
                    break;
                }
            }
            Assert.assertTrue("No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(), foundRegionInRecoveryException);
        }
    } finally {
        if (ht != null)
            ht.close();
        if (ht != null)
            zkw.close();
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) Put(org.apache.hadoop.hbase.client.Put) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ServerName(org.apache.hadoop.hbase.ServerName) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) HashSet(java.util.HashSet) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

RegionServerThread (org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)34 Test (org.junit.Test)24 Table (org.apache.hadoop.hbase.client.Table)22 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)15 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)14 IOException (java.io.IOException)13 ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)12 Ignore (org.junit.Ignore)11 TableName (org.apache.hadoop.hbase.TableName)9 Waiter (org.apache.hadoop.hbase.Waiter)8 Result (org.apache.hadoop.hbase.client.Result)8 OperationConflictException (org.apache.hadoop.hbase.exceptions.OperationConflictException)8 ServerNotRunningYetException (org.apache.hadoop.hbase.ipc.ServerNotRunningYetException)8 ArrayList (java.util.ArrayList)7 TimeoutException (java.util.concurrent.TimeoutException)7 RetriesExhaustedWithDetailsException (org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException)7 RegionInRecoveryException (org.apache.hadoop.hbase.exceptions.RegionInRecoveryException)7 Path (org.apache.hadoop.fs.Path)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 ServerName (org.apache.hadoop.hbase.ServerName)5