use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.
the class TestDistributedLogSplitting method populateDataInTable.
void populateDataInTable(int nrows, String fname) throws Exception {
byte[] family = Bytes.toBytes(fname);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
assertEquals(NUM_RS, rsts.size());
for (RegionServerThread rst : rsts) {
HRegionServer hrs = rst.getRegionServer();
List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
for (HRegionInfo hri : hris) {
if (hri.getTable().isSystemTable()) {
continue;
}
LOG.debug("adding data to rs = " + rst.getName() + " region = " + hri.getRegionNameAsString());
Region region = hrs.getOnlineRegion(hri.getRegionName());
assertTrue(region != null);
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
}
}
for (MasterThread mt : cluster.getLiveMasterThreads()) {
HRegionServer hrs = mt.getMaster();
List<HRegionInfo> hris;
try {
hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
} catch (ServerNotRunningYetException e) {
// It's ok: this master may be a backup. Ignored.
continue;
}
for (HRegionInfo hri : hris) {
if (hri.getTable().isSystemTable()) {
continue;
}
LOG.debug("adding data to rs = " + mt.getName() + " region = " + hri.getRegionNameAsString());
Region region = hrs.getOnlineRegion(hri.getRegionName());
assertTrue(region != null);
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
}
}
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.
the class TestDistributedLogSplitting method testLogReplayTwoSequentialRSDown.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayTwoSequentialRSDown() throws Exception {
LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
startCluster(NUM_RS);
final int NUM_REGIONS_TO_CREATE = 40;
final int NUM_LOG_LINES = 1000;
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
List<HRegionInfo> regions = null;
HRegionServer hrs1 = findRSToKill(false, "table");
regions = ProtobufUtil.getOnlineRegions(hrs1.getRSRpcServices());
makeWAL(hrs1, regions, "table", "family", NUM_LOG_LINES, 100);
// abort RS1
LOG.info("Aborting region server: " + hrs1.getServerName());
hrs1.abort("testing");
// wait for abort completes
TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
}
});
// wait for regions come online
TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
}
});
// sleep a little bit in order to interrupt recovering in the middle
Thread.sleep(300);
// abort second region server
rsts = cluster.getLiveRegionServerThreads();
HRegionServer hrs2 = rsts.get(0).getRegionServer();
LOG.info("Aborting one more region server: " + hrs2.getServerName());
hrs2.abort("testing");
// wait for abort completes
TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
}
});
// wait for regions come online
TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
}
});
// wait for all regions are fully recovered
TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
return (recoveringRegions != null && recoveringRegions.isEmpty());
}
});
assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.
the class TestDistributedLogSplitting method testMarkRegionsRecoveringInZK.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testMarkRegionsRecoveringInZK() throws Exception {
LOG.info("testMarkRegionsRecoveringInZK");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
startCluster(NUM_RS);
master.balanceSwitch(false);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = master.getZooKeeper();
Table ht = installTable(zkw, "table", "family", 40);
try {
final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
Set<HRegionInfo> regionSet = new HashSet<>();
HRegionInfo region = null;
HRegionServer hrs = null;
ServerName firstFailedServer = null;
ServerName secondFailedServer = null;
for (int i = 0; i < NUM_RS; i++) {
hrs = rsts.get(i).getRegionServer();
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
if (regions.isEmpty())
continue;
region = regions.get(0);
regionSet.add(region);
firstFailedServer = hrs.getServerName();
secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
break;
}
slm.markRegionsRecovering(firstFailedServer, regionSet);
slm.markRegionsRecovering(secondFailedServer, regionSet);
List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw, ZKUtil.joinZNode(zkw.znodePaths.recoveringRegionsZNode, region.getEncodedName()));
assertEquals(recoveringRegions.size(), 2);
// wait for splitLogWorker to mark them up because there is no WAL files recorded in ZK
final HRegionServer tmphrs = hrs;
TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (tmphrs.getRecoveringRegions().isEmpty());
}
});
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.
the class TestDistributedLogSplitting method testSameVersionUpdatesRecovery.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testSameVersionUpdatesRecovery() throws Exception {
LOG.info("testSameVersionUpdatesRecovery");
conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
startCluster(NUM_RS);
final AtomicLong sequenceId = new AtomicLong(100);
final int NUM_REGIONS_TO_CREATE = 40;
final int NUM_LOG_LINES = 1000;
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, name.getMethodName(), "family", NUM_REGIONS_TO_CREATE);
try {
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
for (int i = 0; i < NUM_RS; i++) {
boolean isCarryingMeta = false;
hrs = rsts.get(i).getRegionServer();
regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
for (HRegionInfo region : regions) {
if (region.isMetaRegion()) {
isCarryingMeta = true;
break;
}
}
if (isCarryingMeta) {
continue;
}
break;
}
LOG.info("#regions = " + regions.size());
Iterator<HRegionInfo> it = regions.iterator();
while (it.hasNext()) {
HRegionInfo region = it.next();
if (region.isMetaTable() || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
it.remove();
}
}
if (regions.isEmpty())
return;
HRegionInfo curRegionInfo = regions.get(0);
byte[] startRow = curRegionInfo.getStartKey();
if (startRow == null || startRow.length == 0) {
startRow = new byte[] { 0, 0, 0, 0, 1 };
}
byte[] row = Bytes.incrementBytes(startRow, 1);
// use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
row = Arrays.copyOfRange(row, 3, 8);
long value = 0;
TableName tableName = TableName.valueOf(name.getMethodName());
byte[] family = Bytes.toBytes("family");
byte[] qualifier = Bytes.toBytes("c1");
long timeStamp = System.currentTimeMillis();
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(family));
final WAL wal = hrs.getWAL(curRegionInfo);
for (int i = 0; i < NUM_LOG_LINES; i += 1) {
WALEdit e = new WALEdit();
value++;
e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
wal.append(curRegionInfo, new WALKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()), e, true);
}
wal.sync();
wal.shutdown();
// wait for abort completes
this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
// verify we got the last value
LOG.info("Verification Starts...");
Get g = new Get(row);
Result r = ht.get(g);
long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
assertEquals(value, theStoredVal);
// after flush
LOG.info("Verification after flush...");
TEST_UTIL.getAdmin().flush(tableName);
r = ht.get(g);
theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
assertEquals(value, theStoredVal);
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread in project hbase by apache.
the class TestSnapshotFromMaster method testSnapshotHFileArchiving.
/**
* Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots
* should be retained, while those that are not in a snapshot should be deleted.
* @throws Exception on failure
*/
@Test(timeout = 300000)
public void testSnapshotHFileArchiving() throws Exception {
Admin admin = UTIL.getAdmin();
// make sure we don't fail on listing snapshots
SnapshotTestingUtils.assertNoSnapshots(admin);
// recreate test table with disabled compactions; otherwise compaction may happen before
// snapshot, the call after snapshot will be a no-op and checks will fail
UTIL.deleteTable(TABLE_NAME);
HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
htd.setCompactionEnabled(false);
UTIL.createTable(htd, new byte[][] { TEST_FAM }, null);
// load the table
for (int i = 0; i < blockingStoreFiles / 2; i++) {
UTIL.loadTable(UTIL.getConnection().getTable(TABLE_NAME), TEST_FAM);
UTIL.flush(TABLE_NAME);
}
// disable the table so we can take a snapshot
admin.disableTable(TABLE_NAME);
htd.setCompactionEnabled(true);
// take a snapshot of the table
String snapshotName = "snapshot";
byte[] snapshotNameBytes = Bytes.toBytes(snapshotName);
admin.snapshot(snapshotNameBytes, TABLE_NAME);
LOG.info("After snapshot File-System state");
FSUtils.logFileSystemState(fs, rootDir, LOG);
// ensure we only have one snapshot
SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshotNameBytes, TABLE_NAME);
// enable compactions now
admin.modifyTable(TABLE_NAME, htd);
// renable the table so we can compact the regions
admin.enableTable(TABLE_NAME);
// compact the files so we get some archived files for the table we just snapshotted
List<HRegion> regions = UTIL.getHBaseCluster().getRegions(TABLE_NAME);
for (HRegion region : regions) {
// enable can trigger a compaction, wait for it.
region.waitForFlushesAndCompactions();
// min is 2 so will compact and archive
region.compactStores();
}
List<RegionServerThread> regionServerThreads = UTIL.getMiniHBaseCluster().getRegionServerThreads();
HRegionServer hrs = null;
for (RegionServerThread rs : regionServerThreads) {
if (!rs.getRegionServer().getOnlineRegions(TABLE_NAME).isEmpty()) {
hrs = rs.getRegionServer();
break;
}
}
CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null, hrs, false);
cleaner.chore();
LOG.info("After compaction File-System state");
FSUtils.logFileSystemState(fs, rootDir, LOG);
// make sure the cleaner has run
LOG.debug("Running hfile cleaners");
ensureHFileCleanersRun();
LOG.info("After cleaners File-System state: " + rootDir);
FSUtils.logFileSystemState(fs, rootDir, LOG);
// get the snapshot files for the table
Path snapshotTable = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
Set<String> snapshotHFiles = SnapshotReferenceUtil.getHFileNames(UTIL.getConfiguration(), fs, snapshotTable);
// check that the files in the archive contain the ones that we need for the snapshot
LOG.debug("Have snapshot hfiles:");
for (String fileName : snapshotHFiles) {
LOG.debug(fileName);
}
// get the archived files for the table
Collection<String> archives = getHFiles(archiveDir, fs, TABLE_NAME);
// get the hfiles for the table
Collection<String> hfiles = getHFiles(rootDir, fs, TABLE_NAME);
// and make sure that there is a proper subset
for (String fileName : snapshotHFiles) {
boolean exist = archives.contains(fileName) || hfiles.contains(fileName);
assertTrue("Archived hfiles " + archives + " and table hfiles " + hfiles + " is missing snapshot file:" + fileName, exist);
}
// delete the existing snapshot
admin.deleteSnapshot(snapshotNameBytes);
SnapshotTestingUtils.assertNoSnapshots(admin);
// make sure that we don't keep around the hfiles that aren't in a snapshot
// make sure we wait long enough to refresh the snapshot hfile
List<BaseHFileCleanerDelegate> delegates = UTIL.getMiniHBaseCluster().getMaster().getHFileCleaner().cleanersChain;
for (BaseHFileCleanerDelegate delegate : delegates) {
if (delegate instanceof SnapshotHFileCleaner) {
((SnapshotHFileCleaner) delegate).getFileCacheForTesting().triggerCacheRefreshForTesting();
}
}
// run the cleaner again
LOG.debug("Running hfile cleaners");
ensureHFileCleanersRun();
LOG.info("After delete snapshot cleaners run File-System state");
FSUtils.logFileSystemState(fs, rootDir, LOG);
archives = getHFiles(archiveDir, fs, TABLE_NAME);
assertEquals("Still have some hfiles in the archive, when their snapshot has been deleted.", 0, archives.size());
}
Aggregations