use of org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream in project hadoop by apache.
the class TestEditLogJournalFailures method testSingleRequiredFailedEditsDirOnSetReadyToFlush.
@Test
public void testSingleRequiredFailedEditsDirOnSetReadyToFlush() throws IOException {
// Set one of the edits dirs to be required.
String[] editsDirs = cluster.getConfiguration(0).getTrimmedStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
shutDownMiniCluster();
Configuration conf = getConf();
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY, editsDirs[0]);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY, 0);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_KEY, 0);
setUpMiniCluster(conf, true);
assertTrue(doAnEdit());
// Invalidated the one required edits journal.
invalidateEditsDirAtIndex(0, false, false);
JournalAndStream nonRequiredJas = getJournalAndStream(1);
EditLogFileOutputStream nonRequiredSpy = spyOnStream(nonRequiredJas);
// The NN has not terminated (no ExitException thrown)
// ..and that the other stream is active.
assertTrue(nonRequiredJas.isActive());
try {
doAnEdit();
fail("A single failure of a required journal should have halted the NN");
} catch (RemoteException re) {
assertTrue(re.getClassName().contains("ExitException"));
GenericTestUtils.assertExceptionContains("setReadyToFlush failed for required journal", re);
}
// Since the required directory failed setReadyToFlush, and that
// directory was listed prior to the non-required directory,
// we should not call setReadyToFlush on the non-required
// directory. Regression test for HDFS-2874.
Mockito.verify(nonRequiredSpy, Mockito.never()).setReadyToFlush();
assertFalse(nonRequiredJas.isActive());
}
use of org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream in project hadoop by apache.
the class TestEditLogJournalFailures method invalidateEditsDirAtIndex.
/**
* Replace the journal at index <code>index</code> with one that throws an
* exception on flush.
*
* @param index the index of the journal to take offline.
* @return the original <code>EditLogOutputStream</code> of the journal.
*/
private void invalidateEditsDirAtIndex(int index, boolean failOnFlush, boolean failOnWrite) throws IOException {
JournalAndStream jas = getJournalAndStream(index);
EditLogFileOutputStream spyElos = spyOnStream(jas);
if (failOnWrite) {
doThrow(new IOException("fail on write()")).when(spyElos).write((FSEditLogOp) any());
}
if (failOnFlush) {
doThrow(new IOException("fail on flush()")).when(spyElos).flush();
} else {
doThrow(new IOException("fail on setReadyToFlush()")).when(spyElos).setReadyToFlush();
}
}
use of org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream in project hadoop by apache.
the class TestEditLogRace method testSaveImageWhileSyncInProgress.
/**
* The logSync() method in FSEditLog is unsynchronized whiel syncing
* so that other threads can concurrently enqueue edits while the prior
* sync is ongoing. This test checks that the log is saved correctly
* if the saveImage occurs while the syncing thread is in the unsynchronized middle section.
*
* This replicates the following manual test proposed by Konstantin:
* I start the name-node in debugger.
* I do -mkdir and stop the debugger in logSync() just before it does flush.
* Then I enter safe mode with another client
* I start saveNamepsace and stop the debugger in
* FSImage.saveFSImage() -> FSEditLog.createEditLogFile()
* -> EditLogFileOutputStream.create() ->
* after truncating the file but before writing LAYOUT_VERSION into it.
* Then I let logSync() run.
* Then I terminate the name-node.
* After that the name-node wont start, since the edits file is broken.
*/
@Test
public void testSaveImageWhileSyncInProgress() throws Exception {
Configuration conf = getConf();
NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
DFSTestUtil.formatNameNode(conf);
final FSNamesystem namesystem = FSNamesystem.loadFromDisk(conf);
try {
FSImage fsimage = namesystem.getFSImage();
FSEditLog editLog = fsimage.getEditLog();
JournalAndStream jas = editLog.getJournals().get(0);
EditLogFileOutputStream spyElos = spy((EditLogFileOutputStream) jas.getCurrentStream());
jas.setCurrentStreamForTests(spyElos);
final AtomicReference<Throwable> deferredException = new AtomicReference<Throwable>();
final CountDownLatch waitToEnterFlush = new CountDownLatch(1);
final Thread doAnEditThread = new Thread() {
@Override
public void run() {
try {
LOG.info("Starting mkdirs");
namesystem.mkdirs("/test", new PermissionStatus("test", "test", new FsPermission((short) 00755)), true);
LOG.info("mkdirs complete");
} catch (Throwable ioe) {
LOG.fatal("Got exception", ioe);
deferredException.set(ioe);
waitToEnterFlush.countDown();
}
}
};
Answer<Void> blockingFlush = new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
LOG.info("Flush called");
if (useAsyncEditLog || Thread.currentThread() == doAnEditThread) {
LOG.info("edit thread: Telling main thread we made it to flush section...");
// Signal to main thread that the edit thread is in the racy section
waitToEnterFlush.countDown();
LOG.info("edit thread: sleeping for " + BLOCK_TIME + "secs");
Thread.sleep(BLOCK_TIME * 1000);
LOG.info("Going through to flush. This will allow the main thread to continue.");
}
invocation.callRealMethod();
LOG.info("Flush complete");
return null;
}
};
doAnswer(blockingFlush).when(spyElos).flush();
doAnEditThread.start();
// Wait for the edit thread to get to the logsync unsynchronized section
LOG.info("Main thread: waiting to enter flush...");
waitToEnterFlush.await();
assertNull(deferredException.get());
LOG.info("Main thread: detected that logSync is in unsynchronized section.");
LOG.info("Trying to enter safe mode.");
LOG.info("This should block for " + BLOCK_TIME + "sec, since flush will sleep that long");
long st = Time.now();
namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
long et = Time.now();
LOG.info("Entered safe mode");
// Make sure we really waited for the flush to complete!
assertTrue(et - st > (BLOCK_TIME - 1) * 1000);
// Once we're in safe mode, save namespace.
namesystem.saveNamespace(0, 0);
LOG.info("Joining on edit thread...");
doAnEditThread.join();
assertNull(deferredException.get());
// We did 3 edits: begin, txn, and end
assertEquals(3, verifyEditLogs(namesystem, fsimage, NNStorage.getFinalizedEditsFileName(1, 3), 1));
// after the save, just the one "begin"
assertEquals(1, verifyEditLogs(namesystem, fsimage, NNStorage.getInProgressEditsFileName(4), 4));
} finally {
LOG.info("Closing nn");
if (namesystem != null)
namesystem.close();
}
}
use of org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream in project hadoop by apache.
the class TestStorageRestore method invalidateStorage.
/**
* invalidate storage by removing the second and third storage directories
*/
public void invalidateStorage(FSImage fi, Set<File> filesToInvalidate) throws IOException {
ArrayList<StorageDirectory> al = new ArrayList<StorageDirectory>(2);
Iterator<StorageDirectory> it = fi.getStorage().dirIterator();
while (it.hasNext()) {
StorageDirectory sd = it.next();
if (filesToInvalidate.contains(sd.getRoot())) {
LOG.info("causing IO error on " + sd.getRoot());
al.add(sd);
}
}
// simulate an error
fi.getStorage().reportErrorsOnDirectories(al);
for (JournalAndStream j : fi.getEditLog().getJournals()) {
if (j.getManager() instanceof FileJournalManager) {
FileJournalManager fm = (FileJournalManager) j.getManager();
if (fm.getStorageDirectory().getRoot().equals(path2) || fm.getStorageDirectory().getRoot().equals(path3)) {
EditLogOutputStream mockStream = spy(j.getCurrentStream());
j.setCurrentStreamForTests(mockStream);
doThrow(new IOException("Injected fault: write")).when(mockStream).write(Mockito.<FSEditLogOp>anyObject());
}
}
}
}
use of org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream in project hadoop by apache.
the class FSNamesystem method getNameJournalStatus.
// NameNodeMXBean
@Override
public String getNameJournalStatus() {
List<Map<String, String>> jasList = new ArrayList<Map<String, String>>();
FSEditLog log = getFSImage().getEditLog();
if (log != null) {
// This flag can be false because we cannot hold a lock of FSEditLog
// for metrics.
boolean openForWrite = log.isOpenForWriteWithoutLock();
for (JournalAndStream jas : log.getJournals()) {
final Map<String, String> jasMap = new HashMap<String, String>();
String manager = jas.getManager().toString();
jasMap.put("required", String.valueOf(jas.isRequired()));
jasMap.put("disabled", String.valueOf(jas.isDisabled()));
jasMap.put("manager", manager);
if (jas.isDisabled()) {
jasMap.put("stream", "Failed");
} else if (openForWrite) {
EditLogOutputStream elos = jas.getCurrentStream();
if (elos != null) {
jasMap.put("stream", elos.generateReport());
} else {
jasMap.put("stream", "not currently writing");
}
} else {
jasMap.put("stream", "open for read");
}
jasList.add(jasMap);
}
}
return JSON.toString(jasList);
}
Aggregations