Search in sources :

Example 6 with StartupProgress

use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.

the class FSNamesystem method loadFSImage.

private void loadFSImage(StartupOption startOpt) throws IOException {
    final FSImage fsImage = getFSImage();
    // format before starting up if requested
    if (startOpt == StartupOption.FORMAT) {
        // reuse current id
        fsImage.format(this, fsImage.getStorage().determineClusterId());
        startOpt = StartupOption.REGULAR;
    }
    boolean success = false;
    writeLock();
    try {
        // We shouldn't be calling saveNamespace if we've come up in standby state.
        MetaRecoveryContext recovery = startOpt.createRecoveryContext();
        final boolean staleImage = fsImage.recoverTransitionRead(startOpt, this, recovery);
        if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
            rollingUpgradeInfo = null;
        }
        final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
        LOG.info("Need to save fs image? " + needToSave + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled + ", isRollingUpgrade=" + isRollingUpgrade() + ")");
        if (needToSave) {
            fsImage.saveNamespace(this);
        } else {
            // No need to save, so mark the phase done.
            StartupProgress prog = NameNode.getStartupProgress();
            prog.beginPhase(Phase.SAVING_CHECKPOINT);
            prog.endPhase(Phase.SAVING_CHECKPOINT);
        }
        // we shouldn't do it when coming up in standby state
        if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE) || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) {
            fsImage.openEditLogForWrite(getEffectiveLayoutVersion());
        }
        success = true;
    } finally {
        if (!success) {
            fsImage.close();
        }
        writeUnlock("loadFSImage");
    }
    imageLoadComplete();
}
Also used : StartupProgress(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)

Example 7 with StartupProgress

use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.

the class FSNamesystem method startCommonServices.

/** 
   * Start services common to both active and standby states
   */
void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
    // register the MBean for the FSNamesystemState
    this.registerMBean();
    writeLock();
    this.haContext = haContext;
    try {
        nnResourceChecker = new NameNodeResourceChecker(conf);
        checkAvailableResources();
        assert !blockManager.isPopulatingReplQueues();
        StartupProgress prog = NameNode.getStartupProgress();
        prog.beginPhase(Phase.SAFEMODE);
        long completeBlocksTotal = getCompleteBlocksTotal();
        prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, completeBlocksTotal);
        blockManager.activate(conf, completeBlocksTotal);
    } finally {
        writeUnlock("startCommonServices");
    }
    registerMXBean();
    DefaultMetricsSystem.instance().register(this);
    if (inodeAttributeProvider != null) {
        inodeAttributeProvider.start();
        dir.setINodeAttributeProvider(inodeAttributeProvider);
    }
    snapshotManager.registerMXBean();
    InetSocketAddress serviceAddress = NameNode.getServiceAddress(conf, true);
    this.nameNodeHostName = (serviceAddress != null) ? serviceAddress.getHostName() : "";
}
Also used : InetSocketAddress(java.net.InetSocketAddress) StartupProgress(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)

Example 8 with StartupProgress

use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.

the class FSImage method saveFSImageInAllDirs.

private synchronized void saveFSImageInAllDirs(FSNamesystem source, NameNodeFile nnf, long txid, Canceler canceler) throws IOException {
    StartupProgress prog = NameNode.getStartupProgress();
    prog.beginPhase(Phase.SAVING_CHECKPOINT);
    if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
        throw new IOException("No image directories available!");
    }
    if (canceler == null) {
        canceler = new Canceler();
    }
    SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid, canceler);
    try {
        List<Thread> saveThreads = new ArrayList<Thread>();
        // save images into current
        for (Iterator<StorageDirectory> it = storage.dirIterator(NameNodeDirType.IMAGE); it.hasNext(); ) {
            StorageDirectory sd = it.next();
            FSImageSaver saver = new FSImageSaver(ctx, sd, nnf);
            Thread saveThread = new Thread(saver, saver.toString());
            saveThreads.add(saveThread);
            saveThread.start();
        }
        waitForThreads(saveThreads);
        saveThreads.clear();
        storage.reportErrorsOnDirectories(ctx.getErrorSDs());
        if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
            throw new IOException("Failed to save in any storage directories while saving namespace.");
        }
        if (canceler.isCancelled()) {
            deleteCancelledCheckpoint(txid);
            // throws
            ctx.checkCancelled();
            assert false : "should have thrown above!";
        }
        renameCheckpoint(txid, NameNodeFile.IMAGE_NEW, nnf, false);
        // Since we now have a new checkpoint, we can clean up some
        // old edit logs and checkpoints.
        purgeOldStorage(nnf);
        archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
    } finally {
        // Notify any threads waiting on the checkpoint to be canceled
        // that it is complete.
        ctx.markComplete();
        ctx = null;
    }
    prog.endPhase(Phase.SAVING_CHECKPOINT);
}
Also used : Canceler(org.apache.hadoop.hdfs.util.Canceler) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StorageDirectory(org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory) StartupProgress(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)

Example 9 with StartupProgress

use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.

the class FSImage method loadEdits.

private long loadEdits(Iterable<EditLogInputStream> editStreams, FSNamesystem target, StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
    LOG.debug("About to load edits:\n  " + Joiner.on("\n  ").join(editStreams));
    StartupProgress prog = NameNode.getStartupProgress();
    prog.beginPhase(Phase.LOADING_EDITS);
    long prevLastAppliedTxId = lastAppliedTxId;
    try {
        FSEditLogLoader loader = new FSEditLogLoader(target, lastAppliedTxId);
        // Load latest edits
        for (EditLogInputStream editIn : editStreams) {
            LOG.info("Reading " + editIn + " expecting start txid #" + (lastAppliedTxId + 1));
            try {
                loader.loadFSEdits(editIn, lastAppliedTxId + 1, startOpt, recovery);
            } finally {
                // Update lastAppliedTxId even in case of error, since some ops may
                // have been successfully applied before the error.
                lastAppliedTxId = loader.getLastAppliedTxId();
            }
            // If we are in recovery mode, we may have skipped over some txids.
            if (editIn.getLastTxId() != HdfsServerConstants.INVALID_TXID) {
                lastAppliedTxId = editIn.getLastTxId();
            }
        }
    } finally {
        FSEditLog.closeAllStreams(editStreams);
    }
    prog.endPhase(Phase.LOADING_EDITS);
    return lastAppliedTxId - prevLastAppliedTxId;
}
Also used : StartupProgress(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)

Example 10 with StartupProgress

use of org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress in project hadoop by apache.

the class FSEditLogLoader method loadEditRecords.

long loadEditRecords(EditLogInputStream in, boolean closeOnExit, long expectedStartingTxId, StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
    FSDirectory fsDir = fsNamesys.dir;
    EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Acquiring write lock to replay edit log");
    }
    fsNamesys.writeLock();
    fsDir.writeLock();
    long[] recentOpcodeOffsets = new long[4];
    Arrays.fill(recentOpcodeOffsets, -1);
    long expectedTxId = expectedStartingTxId;
    long numEdits = 0;
    long lastTxId = in.getLastTxId();
    long numTxns = (lastTxId - expectedStartingTxId) + 1;
    StartupProgress prog = NameNode.getStartupProgress();
    Step step = createStartupProgressStep(in);
    prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
    Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
    long lastLogTime = monotonicNow();
    long lastInodeId = fsNamesys.dir.getLastInodeId();
    try {
        while (true) {
            try {
                FSEditLogOp op;
                try {
                    op = in.readOp();
                    if (op == null) {
                        break;
                    }
                } catch (Throwable e) {
                    // Handle a problem with our input
                    check203UpgradeFailure(in.getVersion(true), e);
                    String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
                    FSImage.LOG.error(errorMessage, e);
                    if (recovery == null) {
                        // recovery mode.
                        throw new EditLogInputException(errorMessage, e, numEdits);
                    }
                    MetaRecoveryContext.editLogLoaderPrompt("We failed to read txId " + expectedTxId, recovery, "skipping the bad section in the log");
                    in.resync();
                    continue;
                }
                recentOpcodeOffsets[(int) (numEdits % recentOpcodeOffsets.length)] = in.getPosition();
                if (op.hasTransactionId()) {
                    if (op.getTransactionId() > expectedTxId) {
                        MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be a gap in the edit log.  We expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "ignoring missing " + " transaction IDs");
                    } else if (op.getTransactionId() < expectedTxId) {
                        MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be an out-of-order edit in the edit log.  We " + "expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "skipping the out-of-order edit");
                        continue;
                    }
                }
                try {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("op=" + op + ", startOpt=" + startOpt + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
                    }
                    long inodeId = applyEditLogOp(op, fsDir, startOpt, in.getVersion(true), lastInodeId);
                    if (lastInodeId < inodeId) {
                        lastInodeId = inodeId;
                    }
                } catch (RollingUpgradeOp.RollbackException e) {
                    throw e;
                } catch (Throwable e) {
                    LOG.error("Encountered exception on operation " + op, e);
                    if (recovery == null) {
                        throw e instanceof IOException ? (IOException) e : new IOException(e);
                    }
                    MetaRecoveryContext.editLogLoaderPrompt("Failed to " + "apply edit log operation " + op + ": error " + e.getMessage(), recovery, "applying edits");
                }
                // Now that the operation has been successfully decoded and
                // applied, update our bookkeeping.
                incrOpCount(op.opCode, opCounts, step, counter);
                if (op.hasTransactionId()) {
                    lastAppliedTxId = op.getTransactionId();
                    expectedTxId = lastAppliedTxId + 1;
                } else {
                    expectedTxId = lastAppliedTxId = expectedStartingTxId;
                }
                // log progress
                if (op.hasTransactionId()) {
                    long now = monotonicNow();
                    if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
                        long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
                        int percent = Math.round((float) deltaTxId / numTxns * 100);
                        LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns + " transactions completed. (" + percent + "%)");
                        lastLogTime = now;
                    }
                }
                numEdits++;
                totalEdits++;
            } catch (RollingUpgradeOp.RollbackException e) {
                LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
                break;
            } catch (MetaRecoveryContext.RequestStopException e) {
                MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + in.getPosition() + "/" + in.length());
                break;
            }
        }
    } finally {
        fsNamesys.dir.resetLastInodeId(lastInodeId);
        if (closeOnExit) {
            in.close();
        }
        fsDir.writeUnlock();
        fsNamesys.writeUnlock("loadEditRecords");
        if (LOG.isTraceEnabled()) {
            LOG.trace("replaying edit log finished");
        }
        if (FSImage.LOG.isDebugEnabled()) {
            dumpOpCounts(opCounts);
        }
    }
    return numEdits;
}
Also used : Holder(org.apache.hadoop.hdfs.util.Holder) Step(org.apache.hadoop.hdfs.server.namenode.startupprogress.Step) IOException(java.io.IOException) Counter(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter) RollingUpgradeOp(org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp) EnumMap(java.util.EnumMap) StartupProgress(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)

Aggregations

StartupProgress (org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)11 IOException (java.io.IOException)3 Step (org.apache.hadoop.hdfs.server.namenode.startupprogress.Step)3 JsonFactory (com.fasterxml.jackson.core.JsonFactory)1 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 PrintWriter (java.io.PrintWriter)1 InetSocketAddress (java.net.InetSocketAddress)1 ArrayList (java.util.ArrayList)1 EnumMap (java.util.EnumMap)1 ServletContext (javax.servlet.ServletContext)1 HttpServletRequest (javax.servlet.http.HttpServletRequest)1 HttpServletResponse (javax.servlet.http.HttpServletResponse)1 InconsistentFSStateException (org.apache.hadoop.hdfs.server.common.InconsistentFSStateException)1 StorageDirectory (org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory)1 RollingUpgradeOp (org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp)1 FSImageFile (org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile)1 NameNodeFile (org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile)1