use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.
the class TakeSnapshotHandler method process.
/**
* Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
* call should get implemented for each snapshot flavor.
*/
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION", justification = "Intentional")
public void process() {
String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable;
LOG.info(msg);
MasterLock tableLockToRelease = this.tableLock;
status.setStatus(msg);
try {
if (downgradeToSharedTableLock()) {
// release the exclusive lock and hold the shared lock instead
tableLockToRelease = master.getLockManager().createMasterLock(snapshotTable, LockType.SHARED, this.getClass().getName() + ": take snapshot " + snapshot.getName());
tableLock.release();
tableLockToRelease.acquire();
}
// If regions move after this meta scan, the region specific snapshot should fail, triggering
// an external exception that gets captured here.
// write down the snapshot info in the working directory
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs);
snapshotManifest.addTableDescriptor(this.htd);
monitor.rethrowException();
List<Pair<RegionInfo, ServerName>> regionsAndLocations = master.getAssignmentManager().getTableRegionsAndLocations(snapshotTable, false);
// run the snapshot
snapshotRegions(regionsAndLocations);
monitor.rethrowException();
// extract each pair to separate lists
Set<String> serverNames = new HashSet<>();
for (Pair<RegionInfo, ServerName> p : regionsAndLocations) {
if (p != null && p.getFirst() != null && p.getSecond() != null) {
RegionInfo hri = p.getFirst();
if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent()))
continue;
serverNames.add(p.getSecond().toString());
}
}
// flush the in-memory state, and write the single manifest
status.setStatus("Consolidate snapshot: " + snapshot.getName());
snapshotManifest.consolidate();
// verify the snapshot is valid
status.setStatus("Verifying snapshot: " + snapshot.getName());
verifier.verifySnapshot(this.workingDir, serverNames);
// complete the snapshot, atomically moving from tmp to .snapshot dir.
SnapshotDescriptionUtils.completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs, this.workingDirFs, this.conf);
finished = true;
msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
status.markComplete(msg);
LOG.info(msg);
metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
if (master.getMasterCoprocessorHost() != null) {
master.getMasterCoprocessorHost().postCompletedSnapshotAction(ProtobufUtil.createSnapshotDesc(snapshot), this.htd);
}
} catch (Exception e) {
// FindBugs: REC_CATCH_EXCEPTION
status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " + snapshotTable + " because " + e.getMessage());
String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage();
LOG.error(reason, e);
ForeignException ee = new ForeignException(reason, e);
monitor.receive(ee);
// need to mark this completed to close off and allow cleanup to happen.
cancel(reason);
} finally {
LOG.debug("Launching cleanup of working dir:" + workingDir);
try {
// it.
if (!workingDirFs.delete(workingDir, true)) {
LOG.error("Couldn't delete snapshot working directory:" + workingDir);
}
} catch (IOException e) {
LOG.error("Couldn't delete snapshot working directory:" + workingDir);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Table snapshot journal : \n" + status.prettyPrintJournal());
}
tableLockToRelease.release();
}
}
use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.
the class ProcedureCoordinator method rpcConnectionFailure.
/**
* The connection to the rest of the procedure group (members and coordinator) has been
* broken/lost/failed. This should fail any interested procedures, but not attempt to notify other
* members since we cannot reach them anymore.
* @param message description of the error
* @param cause the actual cause of the failure
*/
void rpcConnectionFailure(final String message, final IOException cause) {
Collection<Procedure> toNotify = procedures.values();
boolean isTraceEnabled = LOG.isTraceEnabled();
LOG.debug("received connection failure: " + message, cause);
for (Procedure proc : toNotify) {
if (proc == null) {
continue;
}
// notify the elements, if they aren't null
if (isTraceEnabled) {
LOG.trace("connection failure - notify procedure: " + proc.getName());
}
proc.receive(new ForeignException(proc.getName(), cause));
}
}
use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.
the class RestoreSnapshotProcedure method updateMETA.
/**
* Apply changes to hbase:meta
*/
private void updateMETA(final MasterProcedureEnv env) throws IOException {
try {
Connection conn = env.getMasterServices().getConnection();
RegionStateStore regionStateStore = env.getAssignmentManager().getRegionStateStore();
int regionReplication = modifiedTableDescriptor.getRegionReplication();
// 1. Prepare to restore
getMonitorStatus().setStatus("Preparing to restore each region");
// that are not correct after the restore.
if (regionsToRemove != null) {
regionStateStore.deleteRegions(regionsToRemove);
deleteRegionsFromInMemoryStates(regionsToRemove, env, regionReplication);
}
// in the snapshot folder.
if (regionsToAdd != null) {
MetaTableAccessor.addRegionsToMeta(conn, regionsToAdd, regionReplication);
addRegionsToInMemoryStates(regionsToAdd, env, regionReplication);
}
if (regionsToRestore != null) {
regionStateStore.overwriteRegions(regionsToRestore, regionReplication);
deleteRegionsFromInMemoryStates(regionsToRestore, env, regionReplication);
addRegionsToInMemoryStates(regionsToRestore, env, regionReplication);
}
RestoreSnapshotHelper.RestoreMetaChanges metaChanges = new RestoreSnapshotHelper.RestoreMetaChanges(modifiedTableDescriptor, parentsToChildrenPairMap);
metaChanges.updateMetaParentRegions(conn, regionsToAdd);
// At this point the restore is complete.
LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + getTableName() + " completed!");
} catch (IOException e) {
final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher();
String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + " failed in meta update. Try re-running the restore command.";
LOG.error(msg, e);
monitorException.receive(new ForeignException(env.getMasterServices().getServerName().toString(), e));
throw new IOException(msg, e);
}
monitorStatus.markComplete("Restore snapshot '" + snapshot.getName() + "'!");
MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
metricsSnapshot.addSnapshotRestore(monitorStatus.getCompletionTimestamp() - monitorStatus.getStartTime());
}
use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.
the class RestoreSnapshotProcedure method restoreSnapshot.
/**
* Execute the on-disk Restore
* @param env MasterProcedureEnv
* @throws IOException
*/
private void restoreSnapshot(final MasterProcedureEnv env) throws IOException {
MasterFileSystem fileSystemManager = env.getMasterServices().getMasterFileSystem();
FileSystem fs = fileSystemManager.getFileSystem();
Path rootDir = fileSystemManager.getRootDir();
final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher();
final Configuration conf = new Configuration(env.getMasterConfiguration());
LOG.info("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot));
try {
Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshot);
RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(conf, fs, manifest, modifiedTableDescriptor, rootDir, monitorException, getMonitorStatus());
RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions();
regionsToRestore = metaChanges.getRegionsToRestore();
regionsToRemove = metaChanges.getRegionsToRemove();
regionsToAdd = metaChanges.getRegionsToAdd();
parentsToChildrenPairMap = metaChanges.getParentToChildrenPairMap();
} catch (IOException e) {
String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + " failed in on-disk restore. Try re-running the restore command.";
LOG.error(msg, e);
monitorException.receive(new ForeignException(env.getMasterServices().getServerName().toString(), e));
throw new IOException(msg, e);
}
}
use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.
the class SimpleMasterProcedureManager method execProcedureWithRet.
@Override
public byte[] execProcedureWithRet(ProcedureDescription desc) throws IOException {
this.done = false;
// start the process on the RS
ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getInstance());
List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
List<String> servers = new ArrayList<>();
for (ServerName sn : serverNames) {
servers.add(sn.toString());
}
Procedure proc = coordinator.startProcedure(monitor, desc.getInstance(), new byte[0], servers);
if (proc == null) {
String msg = "Failed to submit distributed procedure for '" + getProcedureSignature() + "'";
LOG.error(msg);
throw new IOException(msg);
}
HashMap<String, byte[]> returnData = null;
try {
// wait for the procedure to complete. A timer thread is kicked off that should cancel this
// if it takes too long.
returnData = proc.waitForCompletedWithRet();
LOG.info("Done waiting - exec procedure for " + desc.getInstance());
this.done = true;
} catch (InterruptedException e) {
ForeignException ee = new ForeignException("Interrupted while waiting for procdure to finish", e);
monitor.receive(ee);
Thread.currentThread().interrupt();
} catch (ForeignException e) {
monitor.receive(e);
}
// return the first value for testing
return returnData.values().iterator().next();
}
Aggregations