Search in sources :

Example 71 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestDistributedLogSplitting method testMasterStartsUpWithLogReplayWork.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMasterStartsUpWithLogReplayWork() throws Exception {
    LOG.info("testMasterStartsUpWithLogReplayWork");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        HRegionServer hrs = findRSToKill(false, "table");
        List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        // abort master
        abortMaster(cluster);
        // abort RS
        LOG.info("Aborting region server: " + hrs.getServerName());
        hrs.abort("testing");
        // wait for the RS dies
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
            }
        });
        Thread.sleep(2000);
        LOG.info("Current Open Regions:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
        // wait for all regions are fully recovered
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
                boolean done = recoveringRegions != null && recoveringRegions.isEmpty();
                if (!done) {
                    LOG.info("Recovering regions: " + recoveringRegions);
                }
                return done;
            }
        });
        LOG.info("Current Open Regions After Master Node Starts Up:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
        assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Table(org.apache.hadoop.hbase.client.Table) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) Waiter(org.apache.hadoop.hbase.Waiter) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 72 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestDistributedLogSplitting method testLogReplayWithMetaRSDown.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayWithMetaRSDown() throws Exception {
    LOG.info("testRecoveredEditsReplayWithMetaRSDown");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        HRegionServer hrs = findRSToKill(true, "table");
        List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Table(org.apache.hadoop.hbase.client.Table) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 73 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestDistributedLogSplitting method testMarkRegionsRecoveringInZK.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMarkRegionsRecoveringInZK() throws Exception {
    LOG.info("testMarkRegionsRecoveringInZK");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = master.getZooKeeper();
    Table ht = installTable(zkw, "table", "family", 40);
    try {
        final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
        Set<HRegionInfo> regionSet = new HashSet<>();
        HRegionInfo region = null;
        HRegionServer hrs = null;
        ServerName firstFailedServer = null;
        ServerName secondFailedServer = null;
        for (int i = 0; i < NUM_RS; i++) {
            hrs = rsts.get(i).getRegionServer();
            List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            if (regions.isEmpty())
                continue;
            region = regions.get(0);
            regionSet.add(region);
            firstFailedServer = hrs.getServerName();
            secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
            break;
        }
        slm.markRegionsRecovering(firstFailedServer, regionSet);
        slm.markRegionsRecovering(secondFailedServer, regionSet);
        List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw, ZKUtil.joinZNode(zkw.znodePaths.recoveringRegionsZNode, region.getEncodedName()));
        assertEquals(recoveringRegions.size(), 2);
        // wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
        final HRegionServer tmphrs = hrs;
        TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (tmphrs.getRecoveringRegions().isEmpty());
            }
        });
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ServerName(org.apache.hadoop.hbase.ServerName) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) HashSet(java.util.HashSet) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 74 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestDistributedLogSplitting method testMetaRecoveryInZK.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMetaRecoveryInZK() throws Exception {
    LOG.info("testMetaRecoveryInZK");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    // only testing meta recovery in ZK operation
    HRegionServer hrs = findRSToKill(true, null);
    List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
    LOG.info("#regions = " + regions.size());
    Set<HRegionInfo> tmpRegions = new HashSet<>();
    tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
    master.getMasterWalManager().prepareLogReplay(hrs.getServerName(), tmpRegions);
    Set<HRegionInfo> userRegionSet = new HashSet<>();
    userRegionSet.addAll(regions);
    master.getMasterWalManager().prepareLogReplay(hrs.getServerName(), userRegionSet);
    boolean isMetaRegionInRecovery = false;
    List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
    for (String curEncodedRegionName : recoveringRegions) {
        if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
            isMetaRegionInRecovery = true;
            break;
        }
    }
    assertTrue(isMetaRegionInRecovery);
    master.getMasterWalManager().splitMetaLog(hrs.getServerName());
    isMetaRegionInRecovery = false;
    recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
    for (String curEncodedRegionName : recoveringRegions) {
        if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
            isMetaRegionInRecovery = true;
            break;
        }
    }
    // meta region should be recovered
    assertFalse(isMetaRegionInRecovery);
    zkw.close();
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HashSet(java.util.HashSet) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 75 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestDistributedLogSplitting method testMasterStartsUpWithLogSplittingWork.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMasterStartsUpWithLogSplittingWork() throws Exception {
    LOG.info("testMasterStartsUpWithLogSplittingWork");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        HRegionServer hrs = findRSToKill(false, "table");
        List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        // abort master
        abortMaster(cluster);
        // abort RS
        LOG.info("Aborting region server: " + hrs.getServerName());
        hrs.abort("testing");
        // wait for abort completes
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
            }
        });
        Thread.sleep(2000);
        LOG.info("Current Open Regions:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
        // wait for abort completes
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
            }
        });
        LOG.info("Current Open Regions After Master Node Starts Up:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
        assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Table(org.apache.hadoop.hbase.client.Table) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) Waiter(org.apache.hadoop.hbase.Waiter) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)105 Test (org.junit.Test)46 Configuration (org.apache.hadoop.conf.Configuration)33 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)21 Table (org.apache.hadoop.hbase.client.Table)20 IOException (java.io.IOException)19 ServerName (org.apache.hadoop.hbase.ServerName)16 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)15 Ignore (org.junit.Ignore)15 ArrayList (java.util.ArrayList)14 RegionServerThread (org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)13 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)12 BeforeClass (org.junit.BeforeClass)12 HBaseTestingUtility (org.apache.hadoop.hbase.HBaseTestingUtility)11 List (java.util.List)10 KeeperException (org.apache.zookeeper.KeeperException)10 TimeoutException (java.util.concurrent.TimeoutException)9 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)9 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)9 Waiter (org.apache.hadoop.hbase.Waiter)9