use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestDistributedFileSystem method testConcurrentStatistics.
@SuppressWarnings("ThrowableResultOfMethodCallIgnored")
@Test(timeout = 180000)
public void testConcurrentStatistics() throws IOException, InterruptedException {
FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME, DistributedFileSystem.class).reset();
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()).build();
cluster.waitActive();
final FileSystem fs = cluster.getFileSystem();
final int numThreads = 5;
final ExecutorService threadPool = HadoopExecutors.newFixedThreadPool(numThreads);
try {
final CountDownLatch allExecutorThreadsReady = new CountDownLatch(numThreads);
final CountDownLatch startBlocker = new CountDownLatch(1);
final CountDownLatch allDone = new CountDownLatch(numThreads);
final AtomicReference<Throwable> childError = new AtomicReference<>();
for (int i = 0; i < numThreads; i++) {
threadPool.submit(new Runnable() {
@Override
public void run() {
allExecutorThreadsReady.countDown();
try {
startBlocker.await();
final FileSystem fs = cluster.getFileSystem();
fs.mkdirs(new Path("/testStatisticsParallelChild"));
} catch (Throwable t) {
LOG.error("Child failed when calling mkdir", t);
childError.compareAndSet(null, t);
} finally {
allDone.countDown();
}
}
});
}
final long oldMkdirOpCount = getOpStatistics(OpType.MKDIRS);
// wait until all threads are ready
allExecutorThreadsReady.await();
// all threads start making directories
startBlocker.countDown();
// wait until all threads are done
allDone.await();
assertNull("Child failed with exception " + childError.get(), childError.get());
checkStatistics(fs, 0, numThreads, 0);
// check the single operation count stat
checkOpStatistics(OpType.MKDIRS, numThreads + oldMkdirOpCount);
// iterate all the operation counts
for (Iterator<LongStatistic> opCountIter = FileSystem.getGlobalStorageStatistics().get(DFSOpsCountStatistics.NAME).getLongStatistics(); opCountIter.hasNext(); ) {
final LongStatistic opCount = opCountIter.next();
if (OpType.MKDIRS.getSymbol().equals(opCount.getName())) {
assertEquals("Unexpected op count from iterator!", numThreads + oldMkdirOpCount, opCount.getValue());
}
LOG.info(opCount.getName() + "\t" + opCount.getValue());
}
} finally {
threadPool.shutdownNow();
cluster.shutdown();
}
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestEditLogRace method testSaveImageWhileSyncInProgress.
/**
* The logSync() method in FSEditLog is unsynchronized whiel syncing
* so that other threads can concurrently enqueue edits while the prior
* sync is ongoing. This test checks that the log is saved correctly
* if the saveImage occurs while the syncing thread is in the unsynchronized middle section.
*
* This replicates the following manual test proposed by Konstantin:
* I start the name-node in debugger.
* I do -mkdir and stop the debugger in logSync() just before it does flush.
* Then I enter safe mode with another client
* I start saveNamepsace and stop the debugger in
* FSImage.saveFSImage() -> FSEditLog.createEditLogFile()
* -> EditLogFileOutputStream.create() ->
* after truncating the file but before writing LAYOUT_VERSION into it.
* Then I let logSync() run.
* Then I terminate the name-node.
* After that the name-node wont start, since the edits file is broken.
*/
@Test
public void testSaveImageWhileSyncInProgress() throws Exception {
Configuration conf = getConf();
NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
DFSTestUtil.formatNameNode(conf);
final FSNamesystem namesystem = FSNamesystem.loadFromDisk(conf);
try {
FSImage fsimage = namesystem.getFSImage();
FSEditLog editLog = fsimage.getEditLog();
JournalAndStream jas = editLog.getJournals().get(0);
EditLogFileOutputStream spyElos = spy((EditLogFileOutputStream) jas.getCurrentStream());
jas.setCurrentStreamForTests(spyElos);
final AtomicReference<Throwable> deferredException = new AtomicReference<Throwable>();
final CountDownLatch waitToEnterFlush = new CountDownLatch(1);
final Thread doAnEditThread = new Thread() {
@Override
public void run() {
try {
LOG.info("Starting mkdirs");
namesystem.mkdirs("/test", new PermissionStatus("test", "test", new FsPermission((short) 00755)), true);
LOG.info("mkdirs complete");
} catch (Throwable ioe) {
LOG.fatal("Got exception", ioe);
deferredException.set(ioe);
waitToEnterFlush.countDown();
}
}
};
Answer<Void> blockingFlush = new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
LOG.info("Flush called");
if (useAsyncEditLog || Thread.currentThread() == doAnEditThread) {
LOG.info("edit thread: Telling main thread we made it to flush section...");
// Signal to main thread that the edit thread is in the racy section
waitToEnterFlush.countDown();
LOG.info("edit thread: sleeping for " + BLOCK_TIME + "secs");
Thread.sleep(BLOCK_TIME * 1000);
LOG.info("Going through to flush. This will allow the main thread to continue.");
}
invocation.callRealMethod();
LOG.info("Flush complete");
return null;
}
};
doAnswer(blockingFlush).when(spyElos).flush();
doAnEditThread.start();
// Wait for the edit thread to get to the logsync unsynchronized section
LOG.info("Main thread: waiting to enter flush...");
waitToEnterFlush.await();
assertNull(deferredException.get());
LOG.info("Main thread: detected that logSync is in unsynchronized section.");
LOG.info("Trying to enter safe mode.");
LOG.info("This should block for " + BLOCK_TIME + "sec, since flush will sleep that long");
long st = Time.now();
namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
long et = Time.now();
LOG.info("Entered safe mode");
// Make sure we really waited for the flush to complete!
assertTrue(et - st > (BLOCK_TIME - 1) * 1000);
// Once we're in safe mode, save namespace.
namesystem.saveNamespace(0, 0);
LOG.info("Joining on edit thread...");
doAnEditThread.join();
assertNull(deferredException.get());
// We did 3 edits: begin, txn, and end
assertEquals(3, verifyEditLogs(namesystem, fsimage, NNStorage.getFinalizedEditsFileName(1, 3), 1));
// after the save, just the one "begin"
assertEquals(1, verifyEditLogs(namesystem, fsimage, NNStorage.getInProgressEditsFileName(4), 4));
} finally {
LOG.info("Closing nn");
if (namesystem != null)
namesystem.close();
}
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestEditLogRace method testSaveRightBeforeSync.
/**
* Most of the FSNamesystem methods have a synchronized section where they
* update the name system itself and write to the edit log, and then
* unsynchronized, they call logSync. This test verifies that, if an
* operation has written to the edit log but not yet synced it,
* we wait for that sync before entering safe mode.
*/
@Test
public void testSaveRightBeforeSync() throws Exception {
Configuration conf = getConf();
NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
DFSTestUtil.formatNameNode(conf);
final FSNamesystem namesystem = FSNamesystem.loadFromDisk(conf);
try {
FSImage fsimage = namesystem.getFSImage();
final FSEditLog editLog = fsimage.getEditLog();
final AtomicReference<Throwable> deferredException = new AtomicReference<Throwable>();
final CountDownLatch sleepingBeforeSync = new CountDownLatch(1);
final Thread doAnEditThread = new Thread() {
@Override
public void run() {
try {
LOG.info("Starting setOwner");
namesystem.writeLock();
try {
editLog.logSetOwner("/", "test", "test");
} finally {
namesystem.writeUnlock();
}
sleepingBeforeSync.countDown();
LOG.info("edit thread: sleeping for " + BLOCK_TIME + "secs");
Thread.sleep(BLOCK_TIME * 1000);
editLog.logSync();
LOG.info("edit thread: logSync complete");
} catch (Throwable ioe) {
LOG.fatal("Got exception", ioe);
deferredException.set(ioe);
sleepingBeforeSync.countDown();
}
}
};
doAnEditThread.setDaemon(true);
doAnEditThread.start();
LOG.info("Main thread: waiting to just before logSync...");
sleepingBeforeSync.await(200, TimeUnit.MILLISECONDS);
assertNull(deferredException.get());
LOG.info("Main thread: detected that logSync about to be called.");
LOG.info("Trying to enter safe mode.");
long st = Time.now();
namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
long et = Time.now();
LOG.info("Entered safe mode after " + (et - st) + "ms");
// Make sure we didn't wait for the thread that did a logEdit but
// not logSync. Going into safemode does a logSyncAll that will flush
// its edit.
assertTrue(et - st < (BLOCK_TIME / 2) * 1000);
// Once we're in safe mode, save namespace.
namesystem.saveNamespace(0, 0);
LOG.info("Joining on edit thread...");
doAnEditThread.join();
assertNull(deferredException.get());
// We did 3 edits: begin, txn, and end
assertEquals(3, verifyEditLogs(namesystem, fsimage, NNStorage.getFinalizedEditsFileName(1, 3), 1));
// after the save, just the one "begin"
assertEquals(1, verifyEditLogs(namesystem, fsimage, NNStorage.getInProgressEditsFileName(4), 4));
} finally {
LOG.info("Closing nn");
if (namesystem != null)
namesystem.close();
}
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestLdapGroupsMapping method testLdapConnectionTimeout.
/**
* Test that if the {@link LdapGroupsMapping#CONNECTION_TIMEOUT} is set in the
* configuration, the LdapGroupsMapping connection will timeout by this value
* if it does not get a LDAP response from the server.
* @throws IOException
* @throws InterruptedException
*/
@Test(timeout = 30000)
public void testLdapConnectionTimeout() throws IOException, InterruptedException {
// 3s
final int connectionTimeoutMs = 3 * 1000;
try (ServerSocket serverSock = new ServerSocket(0)) {
final CountDownLatch finLatch = new CountDownLatch(1);
// Below we create a LDAP server which will accept a client request;
// but it will never reply to the bind (connect) request.
// Client of this LDAP server is expected to get a connection timeout.
final Thread ldapServer = new Thread(new Runnable() {
@Override
public void run() {
try {
try (Socket ignored = serverSock.accept()) {
finLatch.await();
}
} catch (Exception e) {
e.printStackTrace();
}
}
});
ldapServer.start();
final LdapGroupsMapping mapping = new LdapGroupsMapping();
final Configuration conf = new Configuration();
conf.set(LdapGroupsMapping.LDAP_URL_KEY, "ldap://localhost:" + serverSock.getLocalPort());
conf.setInt(CONNECTION_TIMEOUT, connectionTimeoutMs);
mapping.setConf(conf);
try {
mapping.doGetGroups("hadoop", 1);
fail("The LDAP query should have timed out!");
} catch (NamingException ne) {
LOG.debug("Got the exception while LDAP querying: ", ne);
assertExceptionContains("LDAP response read timed out, timeout used:" + connectionTimeoutMs + "ms", ne);
assertFalse(ne.getMessage().contains("remaining name"));
} finally {
finLatch.countDown();
}
ldapServer.join();
}
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestLdapGroupsMapping method testLdapReadTimeout.
/**
* Test that if the {@link LdapGroupsMapping#READ_TIMEOUT} is set in the
* configuration, the LdapGroupsMapping query will timeout by this value if
* it does not get a LDAP response from the server.
*
* @throws IOException
* @throws InterruptedException
*/
@Test(timeout = 30000)
public void testLdapReadTimeout() throws IOException, InterruptedException {
// 4s
final int readTimeoutMs = 4 * 1000;
try (ServerSocket serverSock = new ServerSocket(0)) {
final CountDownLatch finLatch = new CountDownLatch(1);
// Below we create a LDAP server which will accept a client request,
// authenticate it successfully; but it will never reply to the following
// query request.
// Client of this LDAP server is expected to get a read timeout.
final Thread ldapServer = new Thread(new Runnable() {
@Override
public void run() {
try {
try (Socket clientSock = serverSock.accept()) {
IOUtils.skipFully(clientSock.getInputStream(), 1);
clientSock.getOutputStream().write(AUTHENTICATE_SUCCESS_MSG);
finLatch.await();
}
} catch (Exception e) {
e.printStackTrace();
}
}
});
ldapServer.start();
final LdapGroupsMapping mapping = new LdapGroupsMapping();
final Configuration conf = new Configuration();
conf.set(LdapGroupsMapping.LDAP_URL_KEY, "ldap://localhost:" + serverSock.getLocalPort());
conf.setInt(READ_TIMEOUT, readTimeoutMs);
mapping.setConf(conf);
try {
mapping.doGetGroups("hadoop", 1);
fail("The LDAP query should have timed out!");
} catch (NamingException ne) {
LOG.debug("Got the exception while LDAP querying: ", ne);
assertExceptionContains("LDAP response read timed out, timeout used:" + readTimeoutMs + "ms", ne);
assertExceptionContains("remaining name", ne);
} finally {
finLatch.countDown();
}
ldapServer.join();
}
}
Aggregations