use of org.apache.hadoop.hbase.regionserver.HRegionServer in project hbase by apache.
the class TestAssignmentManagerOnCluster method testOpenCloseRacing.
/**
* This tests region close racing with open
*/
@Test(timeout = 60000)
public void testOpenCloseRacing() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
try {
HTableDescriptor desc = new HTableDescriptor(tableName);
desc.addFamily(new HColumnDescriptor(FAMILY));
admin.createTable(desc);
Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
HRegionInfo hri = new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
MetaTableAccessor.addRegionToMeta(meta, hri);
meta.close();
MyRegionObserver.postOpenEnabled.set(true);
MyRegionObserver.postOpenCalled = false;
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
AssignmentManager am = master.getAssignmentManager();
// Region will be opened, but it won't complete
am.assign(hri);
long end = EnvironmentEdgeManager.currentTime() + 20000;
// Wait till postOpen is called
while (!MyRegionObserver.postOpenCalled) {
assertFalse("Timed out waiting for postOpen to be called", EnvironmentEdgeManager.currentTime() > end);
Thread.sleep(300);
}
// Now let's unassign it, it should do nothing
am.unassign(hri);
RegionState state = am.getRegionStates().getRegionState(hri);
ServerName oldServerName = state.getServerName();
assertTrue(state.isOpening() && oldServerName != null);
// Now the region is stuck in opening
// Let's forcefully re-assign it to trigger closing/opening
// racing. This test is to make sure this scenario
// is handled properly.
MyRegionObserver.postOpenEnabled.set(false);
ServerName destServerName = null;
int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
for (int i = 0; i < numRS; i++) {
HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
if (!destServer.getServerName().equals(oldServerName)) {
destServerName = destServer.getServerName();
break;
}
}
assertNotNull(destServerName);
assertFalse("Region should be assigned on a new region server", oldServerName.equals(destServerName));
List<HRegionInfo> regions = new ArrayList<>();
regions.add(hri);
am.assign(destServerName, regions);
// let's check if it's assigned after it's out of transition
am.waitOnRegionToClearRegionsInTransition(hri);
assertTrue(am.waitForAssignment(hri));
ServerName serverName = master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri);
TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
} finally {
MyRegionObserver.postOpenEnabled.set(false);
TEST_UTIL.deleteTable(tableName);
}
}
use of org.apache.hadoop.hbase.regionserver.HRegionServer in project hbase by apache.
the class TestAssignmentManagerOnCluster method testAssignDisabledRegionBySSH.
/**
* Test disabled region is ignored by SSH
*/
@Test(timeout = 60000)
public void testAssignDisabledRegionBySSH() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
MyMaster master;
try {
HTableDescriptor desc = new HTableDescriptor(tableName);
desc.addFamily(new HColumnDescriptor(FAMILY));
admin.createTable(desc);
Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
HRegionInfo hri = new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
MetaTableAccessor.addRegionToMeta(meta, hri);
// Assign the region
master = (MyMaster) cluster.getMaster();
AssignmentManager am = master.getAssignmentManager();
am.assign(hri);
RegionStates regionStates = am.getRegionStates();
ServerName metaServer = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
ServerName oldServerName = null;
while (true) {
assertTrue(am.waitForAssignment(hri));
RegionState state = regionStates.getRegionState(hri);
oldServerName = state.getServerName();
if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
// Mark the hosting server aborted, but don't actually kill it.
// It doesn't have meta on it.
MyRegionServer.abortedServer = oldServerName;
break;
}
int i = cluster.getServerWithMeta();
HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
oldServerName = rs.getServerName();
master.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(oldServerName.getServerName()));
}
// Make sure the region is assigned on the dead server
assertTrue(regionStates.isRegionOnline(hri));
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Disable the table now.
master.disableTable(hri.getTable(), HConstants.NO_NONCE, HConstants.NO_NONCE);
// Kill the hosting server, which doesn't have meta on it.
cluster.killRegionServer(oldServerName);
cluster.waitForRegionServerToStop(oldServerName, -1);
ServerManager serverManager = master.getServerManager();
while (!serverManager.isServerDead(oldServerName) || serverManager.getDeadServers().areDeadServersInProgress()) {
Thread.sleep(100);
}
// Wait till no more RIT, the region should be offline.
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
assertTrue(regionStates.isRegionOffline(hri));
} finally {
MyRegionServer.abortedServer = null;
TEST_UTIL.deleteTable(tableName);
cluster.startRegionServer();
}
}
use of org.apache.hadoop.hbase.regionserver.HRegionServer in project hbase by apache.
the class TestDistributedLogSplitting method testLogReplayForDisablingTable.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayForDisablingTable() throws Exception {
LOG.info("testLogReplayForDisablingTable");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
startCluster(NUM_RS);
final int NUM_REGIONS_TO_CREATE = 40;
final int NUM_LOG_LINES = 1000;
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
try {
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
boolean hasRegionsForBothTables = false;
String tableName = null;
for (int i = 0; i < NUM_RS; i++) {
tableName = null;
hasRegionsForBothTables = false;
boolean isCarryingSystem = false;
hrs = rsts.get(i).getRegionServer();
regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
for (HRegionInfo region : regions) {
if (region.getTable().isSystemTable()) {
isCarryingSystem = true;
break;
}
if (tableName != null && !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
// make sure that we find a RS has online regions for both "table" and "disableTable"
hasRegionsForBothTables = true;
break;
} else if (tableName == null) {
tableName = region.getTable().getNameAsString();
}
}
if (isCarryingSystem) {
continue;
}
if (hasRegionsForBothTables) {
break;
}
}
// make sure we found a good RS
Assert.assertTrue(hasRegionsForBothTables);
LOG.info("#regions = " + regions.size());
Iterator<HRegionInfo> it = regions.iterator();
while (it.hasNext()) {
HRegionInfo region = it.next();
if (region.isMetaTable()) {
it.remove();
}
}
makeWAL(hrs, regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
LOG.info("Disabling table\n");
TEST_UTIL.getAdmin().disableTable(TableName.valueOf(name.getMethodName()));
TEST_UTIL.waitTableDisabled(TableName.valueOf(name.getMethodName()).getName());
// abort RS
LOG.info("Aborting region server: " + hrs.getServerName());
hrs.abort("testing");
// wait for abort completes
TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
}
});
// wait for regions come online
TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
}
});
// wait for all regions are fully recovered
TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
ServerManager serverManager = master.getServerManager();
return (!serverManager.areDeadServersInProgress() && recoveringRegions != null && recoveringRegions.isEmpty());
}
});
int count = 0;
FileSystem fs = master.getMasterFileSystem().getFileSystem();
Path rootdir = FSUtils.getRootDir(conf);
Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf(name.getMethodName()));
for (HRegionInfo hri : regions) {
Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
LOG.debug("checking edits dir " + editsdir);
if (!fs.exists(editsdir))
continue;
FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
@Override
public boolean accept(Path p) {
if (WALSplitter.isSequenceIdFile(p)) {
return false;
}
return true;
}
});
if (files != null) {
for (FileStatus file : files) {
int c = countWAL(file.getPath(), fs, conf);
count += c;
LOG.info(c + " edits in " + file.getPath());
}
}
}
LOG.info("Verify edits in recovered.edits files");
assertEquals(NUM_LOG_LINES, count);
LOG.info("Verify replayed edits");
assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
// clean up
for (HRegionInfo hri : regions) {
Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
fs.delete(editsdir, true);
}
disablingHT.close();
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.regionserver.HRegionServer in project hbase by apache.
the class TestDistributedLogSplitting method testSameVersionUpdatesRecoveryWithCompaction.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testSameVersionUpdatesRecoveryWithCompaction() throws Exception {
LOG.info("testSameVersionUpdatesRecoveryWithWrites");
conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 30 * 1024);
conf.setInt("hbase.hstore.compactionThreshold", 3);
startCluster(NUM_RS);
final AtomicLong sequenceId = new AtomicLong(100);
final int NUM_REGIONS_TO_CREATE = 40;
final int NUM_LOG_LINES = 2000;
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
for (int i = 0; i < NUM_RS; i++) {
boolean isCarryingMeta = false;
hrs = rsts.get(i).getRegionServer();
regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
for (HRegionInfo region : regions) {
if (region.isMetaRegion()) {
isCarryingMeta = true;
break;
}
}
if (isCarryingMeta) {
continue;
}
break;
}
LOG.info("#regions = " + regions.size());
Iterator<HRegionInfo> it = regions.iterator();
while (it.hasNext()) {
HRegionInfo region = it.next();
if (region.isMetaTable() || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
it.remove();
}
}
if (regions.isEmpty())
return;
HRegionInfo curRegionInfo = regions.get(0);
byte[] startRow = curRegionInfo.getStartKey();
if (startRow == null || startRow.length == 0) {
startRow = new byte[] { 0, 0, 0, 0, 1 };
}
byte[] row = Bytes.incrementBytes(startRow, 1);
// use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
row = Arrays.copyOfRange(row, 3, 8);
long value = 0;
final TableName tableName = TableName.valueOf(name.getMethodName());
byte[] family = Bytes.toBytes("family");
byte[] qualifier = Bytes.toBytes("c1");
long timeStamp = System.currentTimeMillis();
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(family));
final WAL wal = hrs.getWAL(curRegionInfo);
for (int i = 0; i < NUM_LOG_LINES; i += 1) {
WALEdit e = new WALEdit();
value++;
e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
wal.append(curRegionInfo, new WALKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()), e, true);
}
wal.sync();
wal.shutdown();
// wait for abort completes
this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
// verify we got the last value
LOG.info("Verification Starts...");
Get g = new Get(row);
Result r = ht.get(g);
long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
assertEquals(value, theStoredVal);
// after flush & compaction
LOG.info("Verification after flush...");
TEST_UTIL.getAdmin().flush(tableName);
TEST_UTIL.getAdmin().compact(tableName);
// wait for compaction completes
TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return (TEST_UTIL.getAdmin().getCompactionState(tableName) == CompactionState.NONE);
}
});
r = ht.get(g);
theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
assertEquals(value, theStoredVal);
} finally {
if (ht != null)
ht.close();
if (zkw != null)
zkw.close();
}
}
use of org.apache.hadoop.hbase.regionserver.HRegionServer in project hbase by apache.
the class TestDistributedLogSplitting method testRecoveredEdits.
@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testRecoveredEdits() throws Exception {
LOG.info("testRecoveredEdits");
// create more than one wal
conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
startCluster(NUM_RS);
final int NUM_LOG_LINES = 1000;
final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
FileSystem fs = master.getMasterFileSystem().getFileSystem();
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
Path rootdir = FSUtils.getRootDir(conf);
Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40);
try {
TableName table = t.getName();
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
for (int i = 0; i < NUM_RS; i++) {
boolean foundRs = false;
hrs = rsts.get(i).getRegionServer();
regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
for (HRegionInfo region : regions) {
if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
foundRs = true;
break;
}
}
if (foundRs)
break;
}
final Path logDir = new Path(rootdir, AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
LOG.info("#regions = " + regions.size());
Iterator<HRegionInfo> it = regions.iterator();
while (it.hasNext()) {
HRegionInfo region = it.next();
if (region.getTable().getNamespaceAsString().equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
it.remove();
}
}
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
slm.splitLogDistributed(logDir);
int count = 0;
for (HRegionInfo hri : regions) {
Path tdir = FSUtils.getTableDir(rootdir, table);
Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
LOG.debug("checking edits dir " + editsdir);
FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
@Override
public boolean accept(Path p) {
if (WALSplitter.isSequenceIdFile(p)) {
return false;
}
return true;
}
});
assertTrue("edits dir should have more than a single file in it. instead has " + files.length, files.length > 1);
for (int i = 0; i < files.length; i++) {
int c = countWAL(files[i].getPath(), fs, conf);
count += c;
}
LOG.info(count + " edits in " + files.length + " recovered edits files.");
}
// check that the log file is moved
assertFalse(fs.exists(logDir));
assertEquals(NUM_LOG_LINES, count);
} finally {
if (t != null)
t.close();
}
}
Aggregations