use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.
the class TestDistributedLogSplitting method testMasterStartsUpWithLogReplayWork.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMasterStartsUpWithLogReplayWork() throws Exception {
LOG.info("testMasterStartsUpWithLogReplayWork");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
startCluster(NUM_RS);
final int NUM_REGIONS_TO_CREATE = 40;
final int NUM_LOG_LINES = 1000;
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
HRegionServer hrs = findRSToKill(false, "table");
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
// abort master
abortMaster(cluster);
// abort RS
LOG.info("Aborting region server: " + hrs.getServerName());
hrs.abort("testing");
// wait for the RS dies
TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
}
});
Thread.sleep(2000);
LOG.info("Current Open Regions:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
// wait for all regions are fully recovered
TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
boolean done = recoveringRegions != null && recoveringRegions.isEmpty();
if (!done) {
LOG.info("Recovering regions: " + recoveringRegions);
}
return done;
}
});
LOG.info("Current Open Regions After Master Node Starts Up:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.
the class TestDistributedLogSplitting method testLogReplayWithMetaRSDown.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayWithMetaRSDown() throws Exception {
LOG.info("testRecoveredEditsReplayWithMetaRSDown");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
startCluster(NUM_RS);
final int NUM_REGIONS_TO_CREATE = 40;
final int NUM_LOG_LINES = 1000;
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
HRegionServer hrs = findRSToKill(true, "table");
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.
the class TestDistributedLogSplitting method testMarkRegionsRecoveringInZK.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMarkRegionsRecoveringInZK() throws Exception {
LOG.info("testMarkRegionsRecoveringInZK");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
startCluster(NUM_RS);
master.balanceSwitch(false);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = master.getZooKeeper();
Table ht = installTable(zkw, "table", "family", 40);
try {
final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
Set<HRegionInfo> regionSet = new HashSet<>();
HRegionInfo region = null;
HRegionServer hrs = null;
ServerName firstFailedServer = null;
ServerName secondFailedServer = null;
for (int i = 0; i < NUM_RS; i++) {
hrs = rsts.get(i).getRegionServer();
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
if (regions.isEmpty())
continue;
region = regions.get(0);
regionSet.add(region);
firstFailedServer = hrs.getServerName();
secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
break;
}
slm.markRegionsRecovering(firstFailedServer, regionSet);
slm.markRegionsRecovering(secondFailedServer, regionSet);
List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw, ZKUtil.joinZNode(zkw.znodePaths.recoveringRegionsZNode, region.getEncodedName()));
assertEquals(recoveringRegions.size(), 2);
// wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
final HRegionServer tmphrs = hrs;
TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (tmphrs.getRecoveringRegions().isEmpty());
}
});
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.
the class TestDistributedLogSplitting method testMetaRecoveryInZK.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMetaRecoveryInZK() throws Exception {
LOG.info("testMetaRecoveryInZK");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
startCluster(NUM_RS);
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
// only testing meta recovery in ZK operation
HRegionServer hrs = findRSToKill(true, null);
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
LOG.info("#regions = " + regions.size());
Set<HRegionInfo> tmpRegions = new HashSet<>();
tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
master.getMasterWalManager().prepareLogReplay(hrs.getServerName(), tmpRegions);
Set<HRegionInfo> userRegionSet = new HashSet<>();
userRegionSet.addAll(regions);
master.getMasterWalManager().prepareLogReplay(hrs.getServerName(), userRegionSet);
boolean isMetaRegionInRecovery = false;
List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
for (String curEncodedRegionName : recoveringRegions) {
if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
isMetaRegionInRecovery = true;
break;
}
}
assertTrue(isMetaRegionInRecovery);
master.getMasterWalManager().splitMetaLog(hrs.getServerName());
isMetaRegionInRecovery = false;
recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
for (String curEncodedRegionName : recoveringRegions) {
if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
isMetaRegionInRecovery = true;
break;
}
}
// meta region should be recovered
assertFalse(isMetaRegionInRecovery);
zkw.close();
}
use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.
the class TestDistributedLogSplitting method testMasterStartsUpWithLogSplittingWork.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMasterStartsUpWithLogSplittingWork() throws Exception {
LOG.info("testMasterStartsUpWithLogSplittingWork");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
startCluster(NUM_RS);
final int NUM_REGIONS_TO_CREATE = 40;
final int NUM_LOG_LINES = 1000;
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
HRegionServer hrs = findRSToKill(false, "table");
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
// abort master
abortMaster(cluster);
// abort RS
LOG.info("Aborting region server: " + hrs.getServerName());
hrs.abort("testing");
// wait for abort completes
TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
}
});
Thread.sleep(2000);
LOG.info("Current Open Regions:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
// wait for abort completes
TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
}
});
LOG.info("Current Open Regions After Master Node Starts Up:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
Aggregations