Search in sources :

Example 1 with Canceler

use of org.apache.hadoop.hdfs.util.Canceler in project hadoop by apache.

the class FSImage method saveFSImageInAllDirs.

private synchronized void saveFSImageInAllDirs(FSNamesystem source, NameNodeFile nnf, long txid, Canceler canceler) throws IOException {
    StartupProgress prog = NameNode.getStartupProgress();
    prog.beginPhase(Phase.SAVING_CHECKPOINT);
    if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
        throw new IOException("No image directories available!");
    }
    if (canceler == null) {
        canceler = new Canceler();
    }
    SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid, canceler);
    try {
        List<Thread> saveThreads = new ArrayList<Thread>();
        // save images into current
        for (Iterator<StorageDirectory> it = storage.dirIterator(NameNodeDirType.IMAGE); it.hasNext(); ) {
            StorageDirectory sd = it.next();
            FSImageSaver saver = new FSImageSaver(ctx, sd, nnf);
            Thread saveThread = new Thread(saver, saver.toString());
            saveThreads.add(saveThread);
            saveThread.start();
        }
        waitForThreads(saveThreads);
        saveThreads.clear();
        storage.reportErrorsOnDirectories(ctx.getErrorSDs());
        if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
            throw new IOException("Failed to save in any storage directories while saving namespace.");
        }
        if (canceler.isCancelled()) {
            deleteCancelledCheckpoint(txid);
            // throws
            ctx.checkCancelled();
            assert false : "should have thrown above!";
        }
        renameCheckpoint(txid, NameNodeFile.IMAGE_NEW, nnf, false);
        // Since we now have a new checkpoint, we can clean up some
        // old edit logs and checkpoints.
        purgeOldStorage(nnf);
        archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
    } finally {
        // Notify any threads waiting on the checkpoint to be canceled
        // that it is complete.
        ctx.markComplete();
        ctx = null;
    }
    prog.endPhase(Phase.SAVING_CHECKPOINT);
}
Also used : Canceler(org.apache.hadoop.hdfs.util.Canceler) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StorageDirectory(org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory) StartupProgress(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)

Example 2 with Canceler

use of org.apache.hadoop.hdfs.util.Canceler in project hadoop by apache.

the class SecondaryNameNode method doCheckpoint.

/**
   * Create a new checkpoint
   * @return if the image is fetched from primary or not
   */
@VisibleForTesting
@SuppressWarnings("deprecated")
public boolean doCheckpoint() throws IOException {
    checkpointImage.ensureCurrentDirExists();
    NNStorage dstStorage = checkpointImage.getStorage();
    // Tell the namenode to start logging transactions in a new edit file
    // Returns a token that would be used to upload the merged image.
    CheckpointSignature sig = namenode.rollEditLog();
    boolean loadImage = false;
    boolean isFreshCheckpointer = (checkpointImage.getNamespaceID() == 0);
    boolean isSameCluster = (dstStorage.versionSupportsFederation(NameNodeLayoutVersion.FEATURES) && sig.isSameCluster(checkpointImage)) || (!dstStorage.versionSupportsFederation(NameNodeLayoutVersion.FEATURES) && sig.namespaceIdMatches(checkpointImage));
    if (isFreshCheckpointer || (isSameCluster && !sig.storageVersionMatches(checkpointImage.getStorage()))) {
        // if we're a fresh 2NN, or if we're on the same cluster and our storage
        // needs an upgrade, just take the storage info from the server.
        dstStorage.setStorageInfo(sig);
        dstStorage.setClusterID(sig.getClusterID());
        dstStorage.setBlockPoolID(sig.getBlockpoolID());
        loadImage = true;
    }
    sig.validateStorageInfo(checkpointImage);
    // error simulation code for junit test
    CheckpointFaultInjector.getInstance().afterSecondaryCallsRollEditLog();
    RemoteEditLogManifest manifest = namenode.getEditLogManifest(sig.mostRecentCheckpointTxId + 1);
    // Fetch fsimage and edits. Reload the image if previous merge failed.
    loadImage |= downloadCheckpointFiles(fsName, checkpointImage, sig, manifest) | checkpointImage.hasMergeError();
    try {
        doMerge(sig, manifest, loadImage, checkpointImage, namesystem);
    } catch (IOException ioe) {
        // A merge error occurred. The in-memory file system state may be
        // inconsistent, so the image and edits need to be reloaded.
        checkpointImage.setMergeError();
        throw ioe;
    }
    // Clear any error since merge was successful.
    checkpointImage.clearMergeError();
    //
    // Upload the new image into the NameNode. Then tell the Namenode
    // to make this new uploaded image as the most current image.
    //
    long txid = checkpointImage.getLastAppliedTxId();
    TransferFsImage.uploadImageFromStorage(fsName, conf, dstStorage, NameNodeFile.IMAGE, txid);
    // error simulation code for junit test
    CheckpointFaultInjector.getInstance().afterSecondaryUploadsNewImage();
    LOG.warn("Checkpoint done. New Image Size: " + dstStorage.getFsImageName(txid).length());
    if (legacyOivImageDir != null && !legacyOivImageDir.isEmpty()) {
        try {
            checkpointImage.saveLegacyOIVImage(namesystem, legacyOivImageDir, new Canceler());
        } catch (IOException e) {
            LOG.warn("Failed to write legacy OIV image: ", e);
        }
    }
    return loadImage;
}
Also used : RemoteEditLogManifest(org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest) Canceler(org.apache.hadoop.hdfs.util.Canceler) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with Canceler

use of org.apache.hadoop.hdfs.util.Canceler in project hadoop by apache.

the class TestFSImageWithSnapshot method saveFSImageToTempFile.

/** Save the fsimage to a temp file */
private File saveFSImageToTempFile() throws IOException {
    SaveNamespaceContext context = new SaveNamespaceContext(fsn, txid, new Canceler());
    FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context);
    FSImageCompression compression = FSImageCompression.createCompression(conf);
    File imageFile = getImageFile(testDir, txid);
    fsn.readLock();
    try {
        saver.save(imageFile, compression);
    } finally {
        fsn.readUnlock();
    }
    return imageFile;
}
Also used : Canceler(org.apache.hadoop.hdfs.util.Canceler) File(java.io.File) NameNodeFile(org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile)

Example 4 with Canceler

use of org.apache.hadoop.hdfs.util.Canceler in project hadoop by apache.

the class TestSaveNamespace method testCancelSaveNamespace.

@Test(timeout = 20000)
public void testCancelSaveNamespace() throws Exception {
    Configuration conf = getConf();
    NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
    DFSTestUtil.formatNameNode(conf);
    FSNamesystem fsn = FSNamesystem.loadFromDisk(conf);
    // Replace the FSImage with a spy
    final FSImage image = fsn.getFSImage();
    NNStorage storage = image.getStorage();
    // unlock any directories that
    // FSNamesystem's initialization may have locked
    storage.close();
    storage.setStorageDirectories(FSNamesystem.getNamespaceDirs(conf), FSNamesystem.getNamespaceEditsDirs(conf));
    FSNamesystem spyFsn = spy(fsn);
    final FSNamesystem finalFsn = spyFsn;
    DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG);
    BlockIdManager bid = spy(spyFsn.getBlockManager().getBlockIdManager());
    Whitebox.setInternalState(finalFsn.getBlockManager(), "blockIdManager", bid);
    doAnswer(delayer).when(bid).getGenerationStamp();
    ExecutorService pool = Executors.newFixedThreadPool(2);
    try {
        doAnEdit(fsn, 1);
        final Canceler canceler = new Canceler();
        // Save namespace
        fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
        try {
            Future<Void> saverFuture = pool.submit(new Callable<Void>() {

                @Override
                public Void call() throws Exception {
                    image.saveNamespace(finalFsn, NameNodeFile.IMAGE, canceler);
                    return null;
                }
            });
            // Wait until saveNamespace calls getGenerationStamp
            delayer.waitForCall();
            // then cancel the saveNamespace
            Future<Void> cancelFuture = pool.submit(new Callable<Void>() {

                @Override
                public Void call() throws Exception {
                    canceler.cancel("cancelled");
                    return null;
                }
            });
            // give the cancel call time to run
            Thread.sleep(500);
            // allow saveNamespace to proceed - it should check the cancel flag
            // after this point and throw an exception
            delayer.proceed();
            cancelFuture.get();
            saverFuture.get();
            fail("saveNamespace did not fail even though cancelled!");
        } catch (Throwable t) {
            GenericTestUtils.assertExceptionContains("SaveNamespaceCancelledException", t);
        }
        LOG.info("Successfully cancelled a saveNamespace");
        // Check that we have only the original image and not any
        // cruft left over from half-finished images
        FSImageTestUtil.logStorageContents(LOG, storage);
        for (StorageDirectory sd : storage.dirIterable(null)) {
            File curDir = sd.getCurrentDir();
            GenericTestUtils.assertGlobEquals(curDir, "fsimage_.*", NNStorage.getImageFileName(0), NNStorage.getImageFileName(0) + MD5FileUtils.MD5_SUFFIX);
        }
    } finally {
        fsn.close();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Canceler(org.apache.hadoop.hdfs.util.Canceler) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) StorageDirectory(org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) BlockIdManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockIdManager) File(java.io.File) NameNodeFile(org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile) Test(org.junit.Test)

Aggregations

Canceler (org.apache.hadoop.hdfs.util.Canceler)4 IOException (java.io.IOException)3 File (java.io.File)2 StorageDirectory (org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory)2 NameNodeFile (org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ArrayList (java.util.ArrayList)1 ExecutorService (java.util.concurrent.ExecutorService)1 Configuration (org.apache.hadoop.conf.Configuration)1 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)1 BlockIdManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockIdManager)1 StartupProgress (org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)1 RemoteEditLogManifest (org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest)1 DelayAnswer (org.apache.hadoop.test.GenericTestUtils.DelayAnswer)1 Test (org.junit.Test)1