use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.
the class FSNamesystem method loadFSImage.
private void loadFSImage(StartupOption startOpt) throws IOException {
final FSImage fsImage = getFSImage();
// format before starting up if requested
if (startOpt == StartupOption.FORMAT) {
// reuse current id
fsImage.format(this, fsImage.getStorage().determineClusterId());
startOpt = StartupOption.REGULAR;
}
boolean success = false;
writeLock();
try {
// We shouldn't be calling saveNamespace if we've come up in standby state.
MetaRecoveryContext recovery = startOpt.createRecoveryContext();
final boolean staleImage = fsImage.recoverTransitionRead(startOpt, this, recovery);
if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
rollingUpgradeInfo = null;
}
final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
LOG.info("Need to save fs image? " + needToSave + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
if (needToSave) {
fsImage.saveNamespace(this);
} else {
// No need to save, so mark the phase done.
StartupProgress prog = NameNode.getStartupProgress();
prog.beginPhase(Phase.SAVING_CHECKPOINT);
prog.endPhase(Phase.SAVING_CHECKPOINT);
}
// we shouldn't do it when coming up in standby state
if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE) || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) {
fsImage.openEditLogForWrite(getEffectiveLayoutVersion());
}
success = true;
} finally {
if (!success) {
fsImage.close();
}
writeUnlock("loadFSImage");
}
imageLoadComplete();
}
use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.
the class FSNamesystem method startCommonServices.
/**
* Start services common to both active and standby states
*/
void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
// register the MBean for the FSNamesystemState
this.registerMBean();
writeLock();
this.haContext = haContext;
try {
nnResourceChecker = new NameNodeResourceChecker(conf);
checkAvailableResources();
assert !blockManager.isPopulatingReplQueues();
StartupProgress prog = NameNode.getStartupProgress();
prog.beginPhase(Phase.SAFEMODE);
long completeBlocksTotal = getCompleteBlocksTotal();
prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, completeBlocksTotal);
blockManager.activate(conf, completeBlocksTotal);
} finally {
writeUnlock("startCommonServices");
}
registerMXBean();
DefaultMetricsSystem.instance().register(this);
if (inodeAttributeProvider != null) {
inodeAttributeProvider.start();
dir.setINodeAttributeProvider(inodeAttributeProvider);
}
snapshotManager.registerMXBean();
InetSocketAddress serviceAddress = NameNode.getServiceAddress(conf, true);
this.nameNodeHostName = (serviceAddress != null) ? serviceAddress.getHostName() : "";
}
use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.
the class FSImage method saveFSImageInAllDirs.
private synchronized void saveFSImageInAllDirs(FSNamesystem source, NameNodeFile nnf, long txid, Canceler canceler) throws IOException {
StartupProgress prog = NameNode.getStartupProgress();
prog.beginPhase(Phase.SAVING_CHECKPOINT);
if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
throw new IOException("No image directories available!");
}
if (canceler == null) {
canceler = new Canceler();
}
SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid, canceler);
try {
List<Thread> saveThreads = new ArrayList<Thread>();
// save images into current
for (Iterator<StorageDirectory> it = storage.dirIterator(NameNodeDirType.IMAGE); it.hasNext(); ) {
StorageDirectory sd = it.next();
FSImageSaver saver = new FSImageSaver(ctx, sd, nnf);
Thread saveThread = new Thread(saver, saver.toString());
saveThreads.add(saveThread);
saveThread.start();
}
waitForThreads(saveThreads);
saveThreads.clear();
storage.reportErrorsOnDirectories(ctx.getErrorSDs());
if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
throw new IOException("Failed to save in any storage directories while saving namespace.");
}
if (canceler.isCancelled()) {
deleteCancelledCheckpoint(txid);
// throws
ctx.checkCancelled();
assert false : "should have thrown above!";
}
renameCheckpoint(txid, NameNodeFile.IMAGE_NEW, nnf, false);
// Since we now have a new checkpoint, we can clean up some
// old edit logs and checkpoints.
purgeOldStorage(nnf);
archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
} finally {
// Notify any threads waiting on the checkpoint to be canceled
// that it is complete.
ctx.markComplete();
ctx = null;
}
prog.endPhase(Phase.SAVING_CHECKPOINT);
}
use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.
the class FSImage method loadEdits.
private long loadEdits(Iterable<EditLogInputStream> editStreams, FSNamesystem target, StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
LOG.debug("About to load edits:\n " + Joiner.on("\n ").join(editStreams));
StartupProgress prog = NameNode.getStartupProgress();
prog.beginPhase(Phase.LOADING_EDITS);
long prevLastAppliedTxId = lastAppliedTxId;
try {
FSEditLogLoader loader = new FSEditLogLoader(target, lastAppliedTxId);
// Load latest edits
for (EditLogInputStream editIn : editStreams) {
LOG.info("Reading " + editIn + " expecting start txid #" + (lastAppliedTxId + 1));
try {
loader.loadFSEdits(editIn, lastAppliedTxId + 1, startOpt, recovery);
} finally {
// Update lastAppliedTxId even in case of error, since some ops may
// have been successfully applied before the error.
lastAppliedTxId = loader.getLastAppliedTxId();
}
// If we are in recovery mode, we may have skipped over some txids.
if (editIn.getLastTxId() != HdfsServerConstants.INVALID_TXID) {
lastAppliedTxId = editIn.getLastTxId();
}
}
} finally {
FSEditLog.closeAllStreams(editStreams);
}
prog.endPhase(Phase.LOADING_EDITS);
return lastAppliedTxId - prevLastAppliedTxId;
}
use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.
the class FSEditLogLoader method loadEditRecords.
long loadEditRecords(EditLogInputStream in, boolean closeOnExit, long expectedStartingTxId, StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
FSDirectory fsDir = fsNamesys.dir;
EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
if (LOG.isTraceEnabled()) {
LOG.trace("Acquiring write lock to replay edit log");
}
fsNamesys.writeLock();
fsDir.writeLock();
long[] recentOpcodeOffsets = new long[4];
Arrays.fill(recentOpcodeOffsets, -1);
long expectedTxId = expectedStartingTxId;
long numEdits = 0;
long lastTxId = in.getLastTxId();
long numTxns = (lastTxId - expectedStartingTxId) + 1;
StartupProgress prog = NameNode.getStartupProgress();
Step step = createStartupProgressStep(in);
prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
long lastLogTime = monotonicNow();
long lastInodeId = fsNamesys.dir.getLastInodeId();
try {
while (true) {
try {
FSEditLogOp op;
try {
op = in.readOp();
if (op == null) {
break;
}
} catch (Throwable e) {
// Handle a problem with our input
check203UpgradeFailure(in.getVersion(true), e);
String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
FSImage.LOG.error(errorMessage, e);
if (recovery == null) {
// recovery mode.
throw new EditLogInputException(errorMessage, e, numEdits);
}
MetaRecoveryContext.editLogLoaderPrompt("We failed to read txId " + expectedTxId, recovery, "skipping the bad section in the log");
in.resync();
continue;
}
recentOpcodeOffsets[(int) (numEdits % recentOpcodeOffsets.length)] = in.getPosition();
if (op.hasTransactionId()) {
if (op.getTransactionId() > expectedTxId) {
MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be a gap in the edit log. We expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "ignoring missing " + " transaction IDs");
} else if (op.getTransactionId() < expectedTxId) {
MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be an out-of-order edit in the edit log. We " + "expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "skipping the out-of-order edit");
continue;
}
}
try {
if (LOG.isTraceEnabled()) {
LOG.trace("op=" + op + ", startOpt=" + startOpt + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
}
long inodeId = applyEditLogOp(op, fsDir, startOpt, in.getVersion(true), lastInodeId);
if (lastInodeId < inodeId) {
lastInodeId = inodeId;
}
} catch (RollingUpgradeOp.RollbackException e) {
throw e;
} catch (Throwable e) {
LOG.error("Encountered exception on operation " + op, e);
if (recovery == null) {
throw e instanceof IOException ? (IOException) e : new IOException(e);
}
MetaRecoveryContext.editLogLoaderPrompt("Failed to " + "apply edit log operation " + op + ": error " + e.getMessage(), recovery, "applying edits");
}
// Now that the operation has been successfully decoded and
// applied, update our bookkeeping.
incrOpCount(op.opCode, opCounts, step, counter);
if (op.hasTransactionId()) {
lastAppliedTxId = op.getTransactionId();
expectedTxId = lastAppliedTxId + 1;
} else {
expectedTxId = lastAppliedTxId = expectedStartingTxId;
}
// log progress
if (op.hasTransactionId()) {
long now = monotonicNow();
if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
int percent = Math.round((float) deltaTxId / numTxns * 100);
LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns + " transactions completed. (" + percent + "%)");
lastLogTime = now;
}
}
numEdits++;
totalEdits++;
} catch (RollingUpgradeOp.RollbackException e) {
LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
break;
} catch (MetaRecoveryContext.RequestStopException e) {
MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + in.getPosition() + "/" + in.length());
break;
}
}
} finally {
fsNamesys.dir.resetLastInodeId(lastInodeId);
if (closeOnExit) {
in.close();
}
fsDir.writeUnlock();
fsNamesys.writeUnlock("loadEditRecords");
if (LOG.isTraceEnabled()) {
LOG.trace("replaying edit log finished");
}
if (FSImage.LOG.isDebugEnabled()) {
dumpOpCounts(opCounts);
}
}
return numEdits;
}
Aggregations