use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class FavoredNodesManager method updateFavoredNodes.
public synchronized void updateFavoredNodes(Map<HRegionInfo, List<ServerName>> regionFNMap) throws IOException {
Map<HRegionInfo, List<ServerName>> regionToFavoredNodes = new HashMap<>();
for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionFNMap.entrySet()) {
HRegionInfo regionInfo = entry.getKey();
List<ServerName> servers = entry.getValue();
/*
* None of the following error conditions should happen. If it does, there is an issue with
* favored nodes generation or the regions its called on.
*/
if (servers.size() != Sets.newHashSet(servers).size()) {
throw new IOException("Duplicates found: " + servers);
}
if (!isFavoredNodeApplicable(regionInfo)) {
throw new IOException("Can't update FN for a un-applicable region: " + regionInfo.getRegionNameAsString() + " with " + servers);
}
if (servers.size() != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
throw new IOException("At least " + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM + " favored nodes should be present for region : " + regionInfo.getEncodedName() + " current FN servers:" + servers);
}
List<ServerName> serversWithNoStartCodes = Lists.newArrayList();
for (ServerName sn : servers) {
if (sn.getStartcode() == ServerName.NON_STARTCODE) {
serversWithNoStartCodes.add(sn);
} else {
serversWithNoStartCodes.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE));
}
}
regionToFavoredNodes.put(regionInfo, serversWithNoStartCodes);
}
// Lets do a bulk update to meta since that reduces the RPC's
FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, masterServices.getConnection());
deleteFavoredNodesForRegions(regionToFavoredNodes.keySet());
for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) {
HRegionInfo regionInfo = entry.getKey();
List<ServerName> serversWithNoStartCodes = entry.getValue();
globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(regionInfo, serversWithNoStartCodes);
addToReplicaLoad(regionInfo, serversWithNoStartCodes);
}
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class TakeSnapshotHandler method process.
/**
* Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
* call should get implemented for each snapshot flavor.
*/
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION", justification = "Intentional")
public void process() {
String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable;
LOG.info(msg);
ReentrantLock lock = snapshotManager.getLocks().acquireLock(snapshot.getName());
status.setStatus(msg);
try {
// If regions move after this meta scan, the region specific snapshot should fail, triggering
// an external exception that gets captured here.
// write down the snapshot info in the working directory
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
snapshotManifest.addTableDescriptor(this.htd);
monitor.rethrowException();
List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(server.getZooKeeper());
} else {
regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(server.getConnection(), snapshotTable, false);
}
// run the snapshot
snapshotRegions(regionsAndLocations);
monitor.rethrowException();
// extract each pair to separate lists
Set<String> serverNames = new HashSet<>();
for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
if (p != null && p.getFirst() != null && p.getSecond() != null) {
HRegionInfo hri = p.getFirst();
if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent()))
continue;
serverNames.add(p.getSecond().toString());
}
}
// flush the in-memory state, and write the single manifest
status.setStatus("Consolidate snapshot: " + snapshot.getName());
snapshotManifest.consolidate();
// verify the snapshot is valid
status.setStatus("Verifying snapshot: " + snapshot.getName());
verifier.verifySnapshot(this.workingDir, serverNames);
// complete the snapshot, atomically moving from tmp to .snapshot dir.
completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
status.markComplete(msg);
LOG.info(msg);
metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
} catch (Exception e) {
// FindBugs: REC_CATCH_EXCEPTION
status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " + snapshotTable + " because " + e.getMessage());
String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage();
LOG.error(reason, e);
ForeignException ee = new ForeignException(reason, e);
monitor.receive(ee);
// need to mark this completed to close off and allow cleanup to happen.
cancel(reason);
} finally {
LOG.debug("Launching cleanup of working dir:" + workingDir);
try {
// it.
if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
LOG.error("Couldn't delete snapshot working directory:" + workingDir);
}
} catch (IOException e) {
LOG.error("Couldn't delete snapshot working directory:" + workingDir);
}
lock.unlock();
tableLock.release();
}
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class DisabledTableSnapshotHandler method snapshotRegions.
// TODO consider parallelizing these operations since they are independent. Right now its just
// easier to keep them serial though
@Override
public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations) throws IOException, KeeperException {
try {
// 1. get all the regions hosting this table.
// extract each pair to separate lists
Set<HRegionInfo> regions = new HashSet<>();
for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
// Don't include non-default regions
HRegionInfo hri = p.getFirst();
if (RegionReplicaUtil.isDefaultReplica(hri)) {
regions.add(hri);
}
}
// handle the mob files if any.
boolean mobEnabled = MobUtils.hasMobColumns(htd);
if (mobEnabled) {
// snapshot the mob files as a offline region.
HRegionInfo mobRegionInfo = MobUtils.getMobRegionInfo(htd.getTableName());
regions.add(mobRegionInfo);
}
// 2. for each region, write all the info to disk
String msg = "Starting to write region info and WALs for regions for offline snapshot:" + ClientSnapshotDescriptionUtils.toString(snapshot);
LOG.info(msg);
status.setStatus(msg);
ThreadPoolExecutor exec = SnapshotManifest.createExecutor(conf, "DisabledTableSnapshot");
try {
ModifyRegionUtils.editRegions(exec, regions, new ModifyRegionUtils.RegionEditTask() {
@Override
public void editRegion(final HRegionInfo regionInfo) throws IOException {
snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
}
});
} finally {
exec.shutdown();
}
} catch (Exception e) {
// make sure we capture the exception to propagate back to the client later
String reason = "Failed snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage();
ForeignException ee = new ForeignException(reason, e);
monitor.receive(ee);
status.abort("Snapshot of table: " + snapshotTable + " failed because " + e.getMessage());
} finally {
LOG.debug("Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot) + " as finished.");
}
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class ServerCrashProcedure method calcRegionsToAssign.
/**
* Figure out what we need to assign. Should be idempotent.
* @param env
* @return List of calculated regions to assign; may be empty or null.
* @throws IOException
*/
private List<HRegionInfo> calcRegionsToAssign(final MasterProcedureEnv env) throws IOException {
AssignmentManager am = env.getMasterServices().getAssignmentManager();
List<HRegionInfo> regionsToAssignAggregator = new ArrayList<>();
int replicaCount = env.getMasterConfiguration().getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM);
for (int i = 1; i < replicaCount; i++) {
HRegionInfo metaHri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
if (am.isCarryingMetaReplica(this.serverName, metaHri)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName);
}
regionsToAssignAggregator.add(metaHri);
}
}
// Clean out anything in regions in transition.
List<HRegionInfo> regionsInTransition = am.cleanOutCrashedServerReferences(serverName);
if (LOG.isDebugEnabled()) {
LOG.debug("Reassigning " + size(this.regionsOnCrashedServer) + " region(s) that " + (serverName == null ? "null" : serverName) + " was carrying (and " + regionsInTransition.size() + " regions(s) that were opening on this server)");
}
regionsToAssignAggregator.addAll(regionsInTransition);
// Iterate regions that were on this server and figure which of these we need to reassign
if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
RegionStates regionStates = am.getRegionStates();
for (HRegionInfo hri : this.regionsOnCrashedServer) {
if (regionsInTransition.contains(hri))
continue;
String encodedName = hri.getEncodedName();
Lock lock = am.acquireRegionLock(encodedName);
try {
RegionState rit = regionStates.getRegionTransitionState(hri);
if (processDeadRegion(hri, am)) {
ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
// If this region is in transition on the dead server, it must be
// opening or pending_open, which should have been covered by
// AM#cleanOutCrashedServerReferences
LOG.info("Skip assigning " + hri.getRegionNameAsString() + " because opened on " + addressFromAM.getServerName());
continue;
}
if (rit != null) {
if (rit.getServerName() != null && !rit.isOnServer(this.serverName)) {
// Skip regions that are in transition on other server
LOG.info("Skip assigning region in transition on other server" + rit);
continue;
}
LOG.info("Reassigning region " + rit + " and clearing zknode if exists");
regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
} else if (regionStates.isRegionInState(hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
}
regionsToAssignAggregator.add(hri);
// TODO: The below else if is different in branch-1 from master branch.
} else if (rit != null) {
if ((rit.isClosing() || rit.isFailedClose() || rit.isOffline()) && am.getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || am.getReplicasToClose().contains(hri)) {
// If the table was partially disabled and the RS went down, we should clear the
// RIT and remove the node for the region.
// The rit that we use may be stale in case the table was in DISABLING state
// but though we did assign we will not be clearing the znode in CLOSING state.
// Doing this will have no harm. See HBASE-5927
regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
am.offlineDisabledRegion(hri);
} else {
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition " + rit + " not to be assigned by SSH of server " + serverName);
}
}
} finally {
lock.unlock();
}
}
}
return regionsToAssignAggregator;
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class Canary method sniff.
/*
* Loops over regions that owns this table, and output some information abouts the state.
*/
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType, boolean rawScanEnabled) throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("reading list of regions for table %s", tableDesc.getTableName()));
}
Table table = null;
try {
table = admin.getConnection().getTable(tableDesc.getTableName());
} catch (TableNotFoundException e) {
return new ArrayList<>();
} finally {
if (table != null) {
table.close();
}
}
List<RegionTask> tasks = new ArrayList<>();
RegionLocator regionLocator = null;
try {
regionLocator = admin.getConnection().getRegionLocator(tableDesc.getTableName());
for (HRegionLocation location : regionLocator.getAllRegionLocations()) {
ServerName rs = location.getServerName();
HRegionInfo region = location.getRegionInfo();
tasks.add(new RegionTask(admin.getConnection(), region, rs, sink, taskType, rawScanEnabled));
}
} finally {
if (regionLocator != null) {
regionLocator.close();
}
}
return executor.invokeAll(tasks);
}
Aggregations