Search in sources :

Example 21 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project hadoop by apache.

the class TestDistributedFileSystem method testConcurrentStatistics.

@SuppressWarnings("ThrowableResultOfMethodCallIgnored")
@Test(timeout = 180000)
public void testConcurrentStatistics() throws IOException, InterruptedException {
    FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME, DistributedFileSystem.class).reset();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()).build();
    cluster.waitActive();
    final FileSystem fs = cluster.getFileSystem();
    final int numThreads = 5;
    final ExecutorService threadPool = HadoopExecutors.newFixedThreadPool(numThreads);
    try {
        final CountDownLatch allExecutorThreadsReady = new CountDownLatch(numThreads);
        final CountDownLatch startBlocker = new CountDownLatch(1);
        final CountDownLatch allDone = new CountDownLatch(numThreads);
        final AtomicReference<Throwable> childError = new AtomicReference<>();
        for (int i = 0; i < numThreads; i++) {
            threadPool.submit(new Runnable() {

                @Override
                public void run() {
                    allExecutorThreadsReady.countDown();
                    try {
                        startBlocker.await();
                        final FileSystem fs = cluster.getFileSystem();
                        fs.mkdirs(new Path("/testStatisticsParallelChild"));
                    } catch (Throwable t) {
                        LOG.error("Child failed when calling mkdir", t);
                        childError.compareAndSet(null, t);
                    } finally {
                        allDone.countDown();
                    }
                }
            });
        }
        final long oldMkdirOpCount = getOpStatistics(OpType.MKDIRS);
        // wait until all threads are ready
        allExecutorThreadsReady.await();
        // all threads start making directories
        startBlocker.countDown();
        // wait until all threads are done
        allDone.await();
        assertNull("Child failed with exception " + childError.get(), childError.get());
        checkStatistics(fs, 0, numThreads, 0);
        // check the single operation count stat
        checkOpStatistics(OpType.MKDIRS, numThreads + oldMkdirOpCount);
        // iterate all the operation counts
        for (Iterator<LongStatistic> opCountIter = FileSystem.getGlobalStorageStatistics().get(DFSOpsCountStatistics.NAME).getLongStatistics(); opCountIter.hasNext(); ) {
            final LongStatistic opCount = opCountIter.next();
            if (OpType.MKDIRS.getSymbol().equals(opCount.getName())) {
                assertEquals("Unexpected op count from iterator!", numThreads + oldMkdirOpCount, opCount.getValue());
            }
            LOG.info(opCount.getName() + "\t" + opCount.getValue());
        }
    } finally {
        threadPool.shutdownNow();
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) LongStatistic(org.apache.hadoop.fs.StorageStatistics.LongStatistic) FileSystem(org.apache.hadoop.fs.FileSystem) ExecutorService(java.util.concurrent.ExecutorService) Test(org.junit.Test)

Example 22 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project hadoop by apache.

the class TestBlockRecovery method testStopWorker.

/**
   * Test that an FsDatasetImpl operation does not hold the lock for an
   * unreasonable amount of time if a writer is taking a long time to stop.
   */
private void testStopWorker(final TestStopWorkerRunnable tswr) throws Exception {
    LOG.debug("Running " + currentTestName.getMethodName());
    // We need a long value for the data xceiver stop timeout.
    // Otherwise the timeout will trigger, and we will not have tested that
    // thread join was done locklessly.
    Assert.assertEquals(TEST_STOP_WORKER_XCEIVER_STOP_TIMEOUT_MILLIS, dn.getDnConf().getXceiverStopTimeout());
    final TestStopWorkerSemaphore progressParent = new TestStopWorkerSemaphore();
    final TestStopWorkerSemaphore terminateSlowWriter = new TestStopWorkerSemaphore();
    final AtomicReference<String> failure = new AtomicReference<String>(null);
    Collection<RecoveringBlock> recoveringBlocks = initRecoveringBlocks();
    final RecoveringBlock recoveringBlock = Iterators.get(recoveringBlocks.iterator(), 0);
    final ExtendedBlock block = recoveringBlock.getBlock();
    Thread slowWriterThread = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                // Register this thread as the writer for the recoveringBlock.
                LOG.debug("slowWriter creating rbw");
                ReplicaHandler replicaHandler = spyDN.data.createRbw(StorageType.DISK, block, false);
                replicaHandler.close();
                LOG.debug("slowWriter created rbw");
                // Tell the parent thread to start progressing.
                progressParent.sem.release();
                terminateSlowWriter.uninterruptiblyAcquire(60000);
                LOG.debug("slowWriter exiting");
            } catch (Throwable t) {
                LOG.error("slowWriter got exception", t);
                failure.compareAndSet(null, "slowWriter got exception " + t.getMessage());
            }
        }
    });
    // Start the slow worker thread and wait for it to take ownership of the
    // ReplicaInPipeline
    slowWriterThread.start();
    progressParent.uninterruptiblyAcquire(60000);
    // Start a worker thread which will attempt to stop the writer.
    Thread stopWriterThread = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                LOG.debug("initiating " + tswr.opName());
                tswr.run(recoveringBlock);
                LOG.debug("finished " + tswr.opName());
            } catch (Throwable t) {
                LOG.error("stopWriterThread got unexpected exception for " + tswr.opName(), t);
                failure.compareAndSet(null, "stopWriterThread got unexpected " + "exception for " + tswr.opName() + ": " + t.getMessage());
            }
        }
    });
    stopWriterThread.start();
    while (!terminateSlowWriter.gotInterruption.get()) {
        // Wait until stopWriterThread attempts to stop our slow writer by sending
        // it an InterruptedException.
        Thread.sleep(1);
    }
    // We know that stopWriterThread is in the process of joining our slow
    // writer.  It must not hold the lock during this operation.
    // In order to test that it does not, we attempt to do an operation that
    // requires the lock-- getReplicaString.
    spyDN.getFSDataset().getReplicaString(recoveringBlock.getBlock().getBlockPoolId(), recoveringBlock.getBlock().getBlockId());
    // Tell the slow writer to exit, and then wait for all threads to join.
    terminateSlowWriter.sem.release();
    slowWriterThread.join();
    stopWriterThread.join();
    // Check that our worker threads exited cleanly.  This is not checked by the
    // unit test framework, so we have to do it manually here.
    String failureReason = failure.get();
    if (failureReason != null) {
        Assert.fail("Thread failure: " + failureReason);
    }
}
Also used : RecoveringBlock(org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) AtomicReference(java.util.concurrent.atomic.AtomicReference)

Example 23 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project hadoop by apache.

the class TestEditLogRace method testSaveImageWhileSyncInProgress.

/**
   * The logSync() method in FSEditLog is unsynchronized whiel syncing
   * so that other threads can concurrently enqueue edits while the prior
   * sync is ongoing. This test checks that the log is saved correctly
   * if the saveImage occurs while the syncing thread is in the unsynchronized middle section.
   * 
   * This replicates the following manual test proposed by Konstantin:
   *   I start the name-node in debugger.
   *   I do -mkdir and stop the debugger in logSync() just before it does flush.
   *   Then I enter safe mode with another client
   *   I start saveNamepsace and stop the debugger in
   *     FSImage.saveFSImage() -> FSEditLog.createEditLogFile()
   *     -> EditLogFileOutputStream.create() ->
   *     after truncating the file but before writing LAYOUT_VERSION into it.
   *   Then I let logSync() run.
   *   Then I terminate the name-node.
   *   After that the name-node wont start, since the edits file is broken.
   */
@Test
public void testSaveImageWhileSyncInProgress() throws Exception {
    Configuration conf = getConf();
    NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
    DFSTestUtil.formatNameNode(conf);
    final FSNamesystem namesystem = FSNamesystem.loadFromDisk(conf);
    try {
        FSImage fsimage = namesystem.getFSImage();
        FSEditLog editLog = fsimage.getEditLog();
        JournalAndStream jas = editLog.getJournals().get(0);
        EditLogFileOutputStream spyElos = spy((EditLogFileOutputStream) jas.getCurrentStream());
        jas.setCurrentStreamForTests(spyElos);
        final AtomicReference<Throwable> deferredException = new AtomicReference<Throwable>();
        final CountDownLatch waitToEnterFlush = new CountDownLatch(1);
        final Thread doAnEditThread = new Thread() {

            @Override
            public void run() {
                try {
                    LOG.info("Starting mkdirs");
                    namesystem.mkdirs("/test", new PermissionStatus("test", "test", new FsPermission((short) 00755)), true);
                    LOG.info("mkdirs complete");
                } catch (Throwable ioe) {
                    LOG.fatal("Got exception", ioe);
                    deferredException.set(ioe);
                    waitToEnterFlush.countDown();
                }
            }
        };
        Answer<Void> blockingFlush = new Answer<Void>() {

            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                LOG.info("Flush called");
                if (useAsyncEditLog || Thread.currentThread() == doAnEditThread) {
                    LOG.info("edit thread: Telling main thread we made it to flush section...");
                    // Signal to main thread that the edit thread is in the racy section
                    waitToEnterFlush.countDown();
                    LOG.info("edit thread: sleeping for " + BLOCK_TIME + "secs");
                    Thread.sleep(BLOCK_TIME * 1000);
                    LOG.info("Going through to flush. This will allow the main thread to continue.");
                }
                invocation.callRealMethod();
                LOG.info("Flush complete");
                return null;
            }
        };
        doAnswer(blockingFlush).when(spyElos).flush();
        doAnEditThread.start();
        // Wait for the edit thread to get to the logsync unsynchronized section
        LOG.info("Main thread: waiting to enter flush...");
        waitToEnterFlush.await();
        assertNull(deferredException.get());
        LOG.info("Main thread: detected that logSync is in unsynchronized section.");
        LOG.info("Trying to enter safe mode.");
        LOG.info("This should block for " + BLOCK_TIME + "sec, since flush will sleep that long");
        long st = Time.now();
        namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
        long et = Time.now();
        LOG.info("Entered safe mode");
        // Make sure we really waited for the flush to complete!
        assertTrue(et - st > (BLOCK_TIME - 1) * 1000);
        // Once we're in safe mode, save namespace.
        namesystem.saveNamespace(0, 0);
        LOG.info("Joining on edit thread...");
        doAnEditThread.join();
        assertNull(deferredException.get());
        // We did 3 edits: begin, txn, and end
        assertEquals(3, verifyEditLogs(namesystem, fsimage, NNStorage.getFinalizedEditsFileName(1, 3), 1));
        // after the save, just the one "begin"
        assertEquals(1, verifyEditLogs(namesystem, fsimage, NNStorage.getInProgressEditsFileName(4), 4));
    } finally {
        LOG.info("Closing nn");
        if (namesystem != null)
            namesystem.close();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) JournalAndStream(org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) Answer(org.mockito.stubbing.Answer) Mockito.doAnswer(org.mockito.Mockito.doAnswer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) FsPermission(org.apache.hadoop.fs.permission.FsPermission) PermissionStatus(org.apache.hadoop.fs.permission.PermissionStatus) Test(org.junit.Test)

Example 24 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project hadoop by apache.

the class TestEditLogRace method testEditLogRolling.

/**
   * Tests rolling edit logs while transactions are ongoing.
   */
@Test
public void testEditLogRolling() throws Exception {
    // start a cluster 
    Configuration conf = getConf();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    FileSystem fileSys = null;
    AtomicReference<Throwable> caughtErr = new AtomicReference<Throwable>();
    try {
        cluster.waitActive();
        fileSys = cluster.getFileSystem();
        final NamenodeProtocols nn = cluster.getNameNode().getRpcServer();
        FSImage fsimage = cluster.getNamesystem().getFSImage();
        StorageDirectory sd = fsimage.getStorage().getStorageDir(0);
        startTransactionWorkers(cluster, caughtErr);
        long previousLogTxId = 1;
        for (int i = 0; i < NUM_ROLLS && caughtErr.get() == null; i++) {
            try {
                Thread.sleep(20);
            } catch (InterruptedException e) {
            }
            LOG.info("Starting roll " + i + ".");
            CheckpointSignature sig = nn.rollEditLog();
            long nextLog = sig.curSegmentTxId;
            String logFileName = NNStorage.getFinalizedEditsFileName(previousLogTxId, nextLog - 1);
            previousLogTxId += verifyEditLogs(cluster.getNamesystem(), fsimage, logFileName, previousLogTxId);
            assertEquals(previousLogTxId, nextLog);
            File expectedLog = NNStorage.getInProgressEditsFile(sd, previousLogTxId);
            assertTrue("Expect " + expectedLog + " to exist", expectedLog.exists());
        }
    } finally {
        stopTransactionWorkers();
        if (caughtErr.get() != null) {
            throw new RuntimeException(caughtErr.get());
        }
        if (fileSys != null)
            fileSys.close();
        if (cluster != null)
            cluster.shutdown();
    }
}
Also used : NamenodeProtocols(org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) AtomicReference(java.util.concurrent.atomic.AtomicReference) StorageDirectory(org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory) FileSystem(org.apache.hadoop.fs.FileSystem) File(java.io.File) Test(org.junit.Test)

Example 25 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project hadoop by apache.

the class TestEditLogRace method testSaveNamespace.

/**
   * Tests saving fs image while transactions are ongoing.
   */
@Test
public void testSaveNamespace() throws Exception {
    // start a cluster 
    Configuration conf = getConf();
    MiniDFSCluster cluster = null;
    FileSystem fileSys = null;
    AtomicReference<Throwable> caughtErr = new AtomicReference<Throwable>();
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
        cluster.waitActive();
        fileSys = cluster.getFileSystem();
        final FSNamesystem namesystem = cluster.getNamesystem();
        FSImage fsimage = namesystem.getFSImage();
        FSEditLog editLog = fsimage.getEditLog();
        startTransactionWorkers(cluster, caughtErr);
        for (int i = 0; i < NUM_SAVE_IMAGE && caughtErr.get() == null; i++) {
            try {
                Thread.sleep(20);
            } catch (InterruptedException ignored) {
            }
            LOG.info("Save " + i + ": entering safe mode");
            namesystem.enterSafeMode(false);
            // Verify edit logs before the save
            // They should start with the first edit after the checkpoint
            long logStartTxId = fsimage.getStorage().getMostRecentCheckpointTxId() + 1;
            verifyEditLogs(namesystem, fsimage, NNStorage.getInProgressEditsFileName(logStartTxId), logStartTxId);
            LOG.info("Save " + i + ": saving namespace");
            namesystem.saveNamespace(0, 0);
            LOG.info("Save " + i + ": leaving safemode");
            long savedImageTxId = fsimage.getStorage().getMostRecentCheckpointTxId();
            // Verify that edit logs post save got finalized and aren't corrupt
            verifyEditLogs(namesystem, fsimage, NNStorage.getFinalizedEditsFileName(logStartTxId, savedImageTxId), logStartTxId);
            // The checkpoint id should be 1 less than the last written ID, since
            // the log roll writes the "BEGIN" transaction to the new log.
            assertEquals(fsimage.getStorage().getMostRecentCheckpointTxId(), editLog.getLastWrittenTxId() - 1);
            namesystem.leaveSafeMode(false);
            LOG.info("Save " + i + ": complete");
        }
    } finally {
        stopTransactionWorkers();
        if (caughtErr.get() != null) {
            throw new RuntimeException(caughtErr.get());
        }
        if (fileSys != null)
            fileSys.close();
        if (cluster != null)
            cluster.shutdown();
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) AtomicReference(java.util.concurrent.atomic.AtomicReference) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.junit.Test)

Aggregations

AtomicReference (java.util.concurrent.atomic.AtomicReference)1331 Test (org.junit.Test)668 CountDownLatch (java.util.concurrent.CountDownLatch)437 IOException (java.io.IOException)263 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)205 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)159 ArrayList (java.util.ArrayList)108 HashMap (java.util.HashMap)105 List (java.util.List)95 Map (java.util.Map)77 Test (org.testng.annotations.Test)76 File (java.io.File)64 ExecutionException (java.util.concurrent.ExecutionException)60 HashSet (java.util.HashSet)54 URI (java.net.URI)48 TimeoutException (java.util.concurrent.TimeoutException)48 HttpServletRequest (javax.servlet.http.HttpServletRequest)48 HttpServletResponse (javax.servlet.http.HttpServletResponse)46 MockResponse (okhttp3.mockwebserver.MockResponse)46 ByteBuffer (java.nio.ByteBuffer)44