use of org.apache.hadoop.hbase.RegionLocations in project hbase by apache.
the class ScannerCallableWithReplicas method call.
@Override
public Result[] call(int timeout) throws IOException {
// the closed flag. Then an RPC is required to actually close the scanner.
if (currentScannerCallable != null && currentScannerCallable.closed) {
// the case of normal reads)
if (LOG.isTraceEnabled()) {
LOG.trace("Closing scanner id=" + currentScannerCallable.scannerId);
}
Result[] r = currentScannerCallable.call(timeout);
currentScannerCallable = null;
return r;
}
// We need to do the following:
//1. When a scan goes out to a certain replica (default or not), we need to
// continue to hit that until there is a failure. So store the last successfully invoked
// replica
//2. We should close the "losing" scanners (scanners other than the ones we hear back
// from first)
//
RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(true, RegionReplicaUtil.DEFAULT_REPLICA_ID, cConnection, tableName, currentScannerCallable.getRow());
// allocate a boundedcompletion pool of some multiple of number of replicas.
// We want to accomodate some RPCs for redundant replica scans (but are still in progress)
ResultBoundedCompletionService<Pair<Result[], ScannerCallable>> cs = new ResultBoundedCompletionService<>(RpcRetryingCallerFactory.instantiate(ScannerCallableWithReplicas.this.conf), pool, rl.size() * 5);
AtomicBoolean done = new AtomicBoolean(false);
replicaSwitched.set(false);
// submit call for the primary replica.
addCallsForCurrentReplica(cs, rl);
int startIndex = 0;
try {
// wait for the timeout to see whether the primary responds back
Future<Pair<Result[], ScannerCallable>> f = cs.poll(timeBeforeReplicas, // Yes, microseconds
TimeUnit.MICROSECONDS);
if (f != null) {
// After poll, if f is not null, there must be a completed task
Pair<Result[], ScannerCallable> r = f.get();
if (r != null && r.getSecond() != null) {
updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool);
}
//great we got a response
return r == null ? null : r.getFirst();
}
} catch (ExecutionException e) {
// We ignore the ExecutionException and continue with the replicas
if (LOG.isDebugEnabled()) {
LOG.debug("Scan with primary region returns " + e.getCause());
}
// out the exception from the primary replica
if ((rl.size() == 1) || (scan.getConsistency() == Consistency.STRONG)) {
// Rethrow the first exception
RpcRetryingCallerWithReadReplicas.throwEnrichedException(e, retries);
}
startIndex = 1;
} catch (CancellationException e) {
throw new InterruptedIOException(e.getMessage());
} catch (InterruptedException e) {
throw new InterruptedIOException(e.getMessage());
}
// submit call for the all of the secondaries at once
int endIndex = rl.size();
if (scan.getConsistency() == Consistency.STRONG) {
// When scan's consistency is strong, do not send to the secondaries
endIndex = 1;
} else {
// TODO: this may be an overkill for large region replication
addCallsForOtherReplicas(cs, rl, 0, rl.size() - 1);
}
try {
Future<Pair<Result[], ScannerCallable>> f = cs.pollForFirstSuccessfullyCompletedTask(timeout, TimeUnit.MILLISECONDS, startIndex, endIndex);
if (f == null) {
throw new IOException("Failed to get result within timeout, timeout=" + timeout + "ms");
}
Pair<Result[], ScannerCallable> r = f.get();
if (r != null && r.getSecond() != null) {
updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool);
}
// great we got an answer
return r == null ? null : r.getFirst();
} catch (ExecutionException e) {
RpcRetryingCallerWithReadReplicas.throwEnrichedException(e, retries);
} catch (CancellationException e) {
throw new InterruptedIOException(e.getMessage());
} catch (InterruptedException e) {
throw new InterruptedIOException(e.getMessage());
} finally {
// We get there because we were interrupted or because one or more of the
// calls succeeded or failed. In all case, we stop all our tasks.
cs.cancelAll();
}
// unreachable
LOG.error("Imposible? Arrive at an unreachable line...");
throw new IOException("Imposible? Arrive at an unreachable line...");
}
use of org.apache.hadoop.hbase.RegionLocations in project hbase by apache.
the class ReversedScannerCallable method prepare.
/**
* @param reload force reload of server location
* @throws IOException
*/
@Override
public void prepare(boolean reload) throws IOException {
if (Thread.interrupted()) {
throw new InterruptedIOException();
}
if (!instantiated || reload) {
// 2. the start row is empty which means we need to locate to the last region.
if (scan.includeStartRow() && !isEmptyStartRow(getRow())) {
// Just locate the region with the row
RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(reload, id, getConnection(), getTableName(), getRow());
this.location = id < rl.size() ? rl.getRegionLocation(id) : null;
if (location == null || location.getServerName() == null) {
throw new IOException("Failed to find location, tableName=" + getTableName() + ", row=" + Bytes.toStringBinary(getRow()) + ", reload=" + reload);
}
} else {
// Need to locate the regions with the range, and the target location is
// the last one which is the previous region of last region scanner
byte[] locateStartRow = createCloseRowBefore(getRow());
List<HRegionLocation> locatedRegions = locateRegionsInRange(locateStartRow, getRow(), reload);
if (locatedRegions.isEmpty()) {
throw new DoNotRetryIOException("Does hbase:meta exist hole? Couldn't get regions for the range from " + Bytes.toStringBinary(locateStartRow) + " to " + Bytes.toStringBinary(getRow()));
}
this.location = locatedRegions.get(locatedRegions.size() - 1);
}
setStub(getConnection().getClient(getLocation().getServerName()));
checkIfRegionServerIsRemote();
instantiated = true;
}
// check how often we retry.
if (reload && this.scanMetrics != null) {
this.scanMetrics.countOfRPCRetries.incrementAndGet();
if (isRegionServerRemote) {
this.scanMetrics.countOfRemoteRPCRetries.incrementAndGet();
}
}
}
use of org.apache.hadoop.hbase.RegionLocations in project hbase by apache.
the class ReversedScannerCallable method locateRegionsInRange.
/**
* Get the corresponding regions for an arbitrary range of keys.
* @param startKey Starting row in range, inclusive
* @param endKey Ending row in range, exclusive
* @param reload force reload of server location
* @return A list of HRegionLocation corresponding to the regions that contain
* the specified range
* @throws IOException
*/
private List<HRegionLocation> locateRegionsInRange(byte[] startKey, byte[] endKey, boolean reload) throws IOException {
final boolean endKeyIsEndOfTable = Bytes.equals(endKey, HConstants.EMPTY_END_ROW);
if ((Bytes.compareTo(startKey, endKey) > 0) && !endKeyIsEndOfTable) {
throw new IllegalArgumentException("Invalid range: " + Bytes.toStringBinary(startKey) + " > " + Bytes.toStringBinary(endKey));
}
List<HRegionLocation> regionList = new ArrayList<>();
byte[] currentKey = startKey;
do {
RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(reload, id, getConnection(), getTableName(), currentKey);
HRegionLocation regionLocation = id < rl.size() ? rl.getRegionLocation(id) : null;
if (regionLocation != null && regionLocation.getRegionInfo().containsRow(currentKey)) {
regionList.add(regionLocation);
} else {
throw new DoNotRetryIOException("Does hbase:meta exist hole? Locating row " + Bytes.toStringBinary(currentKey) + " returns incorrect region " + (regionLocation == null ? null : regionLocation.getRegionInfo()));
}
currentKey = regionLocation.getRegionInfo().getEndKey();
} while (!Bytes.equals(currentKey, HConstants.EMPTY_END_ROW) && (endKeyIsEndOfTable || Bytes.compareTo(currentKey, endKey) < 0));
return regionList;
}
use of org.apache.hadoop.hbase.RegionLocations in project hbase by apache.
the class TestMasterOperationsForRegionReplicas method testCreateTableWithMultipleReplicas.
@Test
public void testCreateTableWithMultipleReplicas() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final int numRegions = 3;
final int numReplica = 2;
try {
HTableDescriptor desc = new HTableDescriptor(tableName);
desc.setRegionReplication(numReplica);
desc.addFamily(new HColumnDescriptor("family"));
ADMIN.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), numRegions);
TEST_UTIL.waitTableEnabled(tableName);
validateNumberOfRowsInMeta(tableName, numRegions, ADMIN.getConnection());
List<HRegionInfo> hris = MetaTableAccessor.getTableRegions(ADMIN.getConnection(), tableName);
assert (hris.size() == numRegions * numReplica);
// check that the master created expected number of RegionState objects
for (int i = 0; i < numRegions; i++) {
for (int j = 0; j < numReplica; j++) {
HRegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j);
RegionState state = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionState(replica);
assert (state != null);
}
}
List<Result> metaRows = MetaTableAccessor.fullScanRegions(ADMIN.getConnection());
int numRows = 0;
for (Result result : metaRows) {
RegionLocations locations = MetaTableAccessor.getRegionLocations(result);
HRegionInfo hri = locations.getRegionLocation().getRegionInfo();
if (!hri.getTable().equals(tableName))
continue;
numRows += 1;
HRegionLocation[] servers = locations.getRegionLocations();
// have two locations for the replicas of a region, and the locations should be different
assert (servers.length == 2);
assert (!servers[0].equals(servers[1]));
}
assert (numRows == numRegions);
// The same verification of the meta as above but with the SnapshotOfRegionAssignmentFromMeta
// class
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
// Now kill the master, restart it and see if the assignments are kept
ServerName master = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster();
TEST_UTIL.getHBaseClusterInterface().stopMaster(master);
TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(master, 30000);
TEST_UTIL.getHBaseClusterInterface().startMaster(master.getHostname(), master.getPort());
TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster();
for (int i = 0; i < numRegions; i++) {
for (int j = 0; j < numReplica; j++) {
HRegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j);
RegionState state = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionState(replica);
assert (state != null);
}
}
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
// Now shut the whole cluster down, and verify the assignments are kept so that the
// availability constraints are met.
TEST_UTIL.getConfiguration().setBoolean("hbase.master.startup.retainassign", true);
TEST_UTIL.shutdownMiniHBaseCluster();
TEST_UTIL.startMiniHBaseCluster(1, numSlaves);
TEST_UTIL.waitTableEnabled(tableName);
validateFromSnapshotFromMeta(TEST_UTIL, tableName, numRegions, numReplica, ADMIN.getConnection());
// Now shut the whole cluster down, and verify regions are assigned even if there is only
// one server running
TEST_UTIL.shutdownMiniHBaseCluster();
TEST_UTIL.startMiniHBaseCluster(1, 1);
TEST_UTIL.waitTableEnabled(tableName);
validateSingleRegionServerAssignment(ADMIN.getConnection(), numRegions, numReplica);
for (int i = 1; i < numSlaves; i++) {
//restore the cluster
TEST_UTIL.getMiniHBaseCluster().startRegionServer();
}
//check on alter table
ADMIN.disableTable(tableName);
assert (ADMIN.isTableDisabled(tableName));
//increase the replica
desc.setRegionReplication(numReplica + 1);
ADMIN.modifyTable(tableName, desc);
ADMIN.enableTable(tableName);
assert (ADMIN.isTableEnabled(tableName));
List<HRegionInfo> regions = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionsOfTable(tableName);
assert (regions.size() == numRegions * (numReplica + 1));
//decrease the replica(earlier, table was modified to have a replica count of numReplica + 1)
ADMIN.disableTable(tableName);
desc.setRegionReplication(numReplica);
ADMIN.modifyTable(tableName, desc);
ADMIN.enableTable(tableName);
assert (ADMIN.isTableEnabled(tableName));
regions = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates().getRegionsOfTable(tableName);
assert (regions.size() == numRegions * numReplica);
//also make sure the meta table has the replica locations removed
hris = MetaTableAccessor.getTableRegions(ADMIN.getConnection(), tableName);
assert (hris.size() == numRegions * numReplica);
//just check that the number of default replica regions in the meta table are the same
//as the number of regions the table was created with, and the count of the
//replicas is numReplica for each region
Map<HRegionInfo, Integer> defaultReplicas = new HashMap<>();
for (HRegionInfo hri : hris) {
Integer i;
HRegionInfo regionReplica0 = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
defaultReplicas.put(regionReplica0, (i = defaultReplicas.get(regionReplica0)) == null ? 1 : i + 1);
}
assert (defaultReplicas.size() == numRegions);
Collection<Integer> counts = new HashSet<>(defaultReplicas.values());
assert (counts.size() == 1 && counts.contains(new Integer(numReplica)));
} finally {
ADMIN.disableTable(tableName);
ADMIN.deleteTable(tableName);
}
}
use of org.apache.hadoop.hbase.RegionLocations in project hbase by apache.
the class SnapshotOfRegionAssignmentFromMeta method initialize.
/**
* Initialize the region assignment snapshot by scanning the hbase:meta table
* @throws IOException
*/
public void initialize() throws IOException {
LOG.info("Start to scan the hbase:meta for the current region assignment " + "snappshot");
// TODO: at some point this code could live in the MetaTableAccessor
Visitor v = new Visitor() {
@Override
public boolean visit(Result result) throws IOException {
try {
if (result == null || result.isEmpty())
return true;
RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
if (rl == null)
return true;
HRegionInfo hri = rl.getRegionLocation(0).getRegionInfo();
if (hri == null)
return true;
if (hri.getTable() == null)
return true;
if (disabledTables.contains(hri.getTable())) {
return true;
}
// Are we to include split parents in the list?
if (excludeOfflinedSplitParents && hri.isSplit())
return true;
HRegionLocation[] hrls = rl.getRegionLocations();
// Add the current assignment to the snapshot for all replicas
for (int i = 0; i < hrls.length; i++) {
if (hrls[i] == null)
continue;
hri = hrls[i].getRegionInfo();
if (hri == null)
continue;
addAssignment(hri, hrls[i].getServerName());
addRegion(hri);
}
hri = rl.getRegionLocation(0).getRegionInfo();
// the code below is to handle favored nodes
byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY, FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER);
if (favoredNodes == null)
return true;
// Add the favored nodes into assignment plan
ServerName[] favoredServerList = FavoredNodeAssignmentHelper.getFavoredNodesList(favoredNodes);
// Add the favored nodes into assignment plan
existingAssignmentPlan.updateFavoredNodesMap(hri, Arrays.asList(favoredServerList));
/*
* Typically there should be FAVORED_NODES_NUM favored nodes for a region in meta. If
* there is less than FAVORED_NODES_NUM, lets use as much as we can but log a warning.
*/
if (favoredServerList.length != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
LOG.warn("Insufficient favored nodes for region " + hri + " fn: " + Arrays.toString(favoredServerList));
}
for (int i = 0; i < favoredServerList.length; i++) {
if (i == PRIMARY.ordinal())
addPrimaryAssignment(hri, favoredServerList[i]);
if (i == SECONDARY.ordinal())
addSecondaryAssignment(hri, favoredServerList[i]);
if (i == TERTIARY.ordinal())
addTeritiaryAssignment(hri, favoredServerList[i]);
}
return true;
} catch (RuntimeException e) {
LOG.error("Catche remote exception " + e.getMessage() + " when processing" + result);
throw e;
}
}
};
// Scan hbase:meta to pick up user regions
MetaTableAccessor.fullScanRegions(connection, v);
//regionToRegionServerMap = regions;
LOG.info("Finished to scan the hbase:meta for the current region assignment" + "snapshot");
}
Aggregations