use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.
the class TestDnRespectsBlockReportSplitThreshold method testCornerCaseAtThreshold.
/**
* Tests the behavior when the count of blocks is exactly equal to the
* threshold.
*/
@Test(timeout = 300000)
public void testCornerCaseAtThreshold() throws IOException, InterruptedException {
startUpCluster(BLOCKS_IN_FILE);
NameNode nn = cluster.getNameNode();
DataNode dn = cluster.getDataNodes().get(0);
// Create a file with a few blocks.
createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
// Insert a spy object for the NN RPC.
DatanodeProtocolClientSideTranslatorPB nnSpy = InternalDataNodeTestUtils.spyOnBposToNN(dn, nn);
// Trigger a block report so there is an interaction with the spy
// object.
DataNodeTestUtils.triggerBlockReport(dn);
ArgumentCaptor<StorageBlockReport[]> captor = ArgumentCaptor.forClass(StorageBlockReport[].class);
Mockito.verify(nnSpy, times(cluster.getStoragesPerDatanode())).blockReport(any(DatanodeRegistration.class), anyString(), captor.capture(), Mockito.<BlockReportContext>anyObject());
verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
}
use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.
the class TestHAStateTransitions method testSecretManagerState.
/**
* The secret manager needs to start/stop - the invariant should be that
* the secret manager runs if and only if the NN is active and not in
* safe mode. As a state diagram, we need to test all of the following
* transitions to make sure the secret manager is started when we transition
* into state 4, but none of the others.
* <pre>
* SafeMode Not SafeMode
* Standby 1 <------> 2
* ^ ^
* | |
* v v
* Active 3 <------> 4
* </pre>
*/
@Test(timeout = 60000)
public void testSecretManagerState() throws Exception {
Configuration conf = new Configuration();
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 50);
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).waitSafeMode(false).build();
try {
cluster.transitionToActive(0);
DFSTestUtil.createFile(cluster.getFileSystem(0), TEST_FILE_PATH, 6000, (short) 1, 1L);
cluster.getConfiguration(0).setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 60000);
cluster.restartNameNode(0);
NameNode nn = cluster.getNameNode(0);
banner("Started in state 1.");
assertTrue(nn.isStandbyState());
assertTrue(nn.isInSafeMode());
assertFalse(isDTRunning(nn));
banner("Transition 1->2. Should not start secret manager");
NameNodeAdapter.leaveSafeMode(nn);
assertTrue(nn.isStandbyState());
assertFalse(nn.isInSafeMode());
assertFalse(isDTRunning(nn));
banner("Transition 2->1. Should not start secret manager.");
NameNodeAdapter.enterSafeMode(nn, false);
assertTrue(nn.isStandbyState());
assertTrue(nn.isInSafeMode());
assertFalse(isDTRunning(nn));
banner("Transition 1->3. Should not start secret manager.");
nn.getRpcServer().transitionToActive(REQ_INFO);
assertFalse(nn.isStandbyState());
assertTrue(nn.isInSafeMode());
assertFalse(isDTRunning(nn));
banner("Transition 3->1. Should not start secret manager.");
nn.getRpcServer().transitionToStandby(REQ_INFO);
assertTrue(nn.isStandbyState());
assertTrue(nn.isInSafeMode());
assertFalse(isDTRunning(nn));
banner("Transition 1->3->4. Should start secret manager.");
nn.getRpcServer().transitionToActive(REQ_INFO);
NameNodeAdapter.leaveSafeMode(nn);
assertFalse(nn.isStandbyState());
assertFalse(nn.isInSafeMode());
assertTrue(isDTRunning(nn));
banner("Transition 4->3. Should stop secret manager");
NameNodeAdapter.enterSafeMode(nn, false);
assertFalse(nn.isStandbyState());
assertTrue(nn.isInSafeMode());
assertFalse(isDTRunning(nn));
banner("Transition 3->4. Should start secret manager");
NameNodeAdapter.leaveSafeMode(nn);
assertFalse(nn.isStandbyState());
assertFalse(nn.isInSafeMode());
assertTrue(isDTRunning(nn));
for (int i = 0; i < 20; i++) {
// Loop the last check to suss out races.
banner("Transition 4->2. Should stop secret manager.");
nn.getRpcServer().transitionToStandby(REQ_INFO);
assertTrue(nn.isStandbyState());
assertFalse(nn.isInSafeMode());
assertFalse(isDTRunning(nn));
banner("Transition 2->4. Should start secret manager");
nn.getRpcServer().transitionToActive(REQ_INFO);
assertFalse(nn.isStandbyState());
assertFalse(nn.isInSafeMode());
assertTrue(isDTRunning(nn));
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.
the class TestHAStateTransitions method testFailoverAfterCrashDuringLogRoll.
private static void testFailoverAfterCrashDuringLogRoll(boolean writeHeader) throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, Integer.MAX_VALUE);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
try {
cluster.transitionToActive(0);
NameNode nn0 = cluster.getNameNode(0);
nn0.getRpcServer().rollEditLog();
cluster.shutdownNameNode(0);
createEmptyInProgressEditLog(cluster, nn0, writeHeader);
cluster.transitionToActive(1);
} finally {
IOUtils.cleanup(LOG, fs);
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.
the class TestPipelinesFailover method testFailoverRightBeforeCommitSynchronization.
/**
* Test the scenario where the NN fails over after issuing a block
* synchronization request, but before it is committed. The
* DN running the recovery should then fail to commit the synchronization
* and a later retry will succeed.
*/
@Test(timeout = 30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
final Configuration conf = new Configuration();
// Disable permissions so that another user can recover the lease.
conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
FSDataOutputStream stm = null;
final MiniDFSCluster cluster = newMiniCluster(conf, 3);
try {
cluster.waitActive();
cluster.transitionToActive(0);
Thread.sleep(500);
LOG.info("Starting with NN 0 active");
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
stm = fs.create(TEST_PATH);
// write a half block
AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
stm.hflush();
// Look into the block manager on the active node for the block
// under construction.
NameNode nn0 = cluster.getNameNode(0);
ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
DatanodeDescriptor expectedPrimary = DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
LOG.info("Expecting block recovery to be triggered on DN " + expectedPrimary);
// Find the corresponding DN daemon, and spy on its connection to the
// active.
DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
DatanodeProtocolClientSideTranslatorPB nnSpy = InternalDataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
// Delay the commitBlockSynchronization call
DelayAnswer delayer = new DelayAnswer(LOG);
Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(Mockito.eq(blk), // new genstamp
Mockito.anyInt(), // new length
Mockito.anyLong(), // close file
Mockito.eq(true), // delete block
Mockito.eq(false), // new targets
(DatanodeID[]) Mockito.anyObject(), // new target storages
(String[]) Mockito.anyObject());
DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
assertFalse(fsOtherUser.recoverLease(TEST_PATH));
LOG.info("Waiting for commitBlockSynchronization call from primary");
delayer.waitForCall();
LOG.info("Failing over to NN 1");
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
// Let the commitBlockSynchronization call go through, and check that
// it failed with the correct exception.
delayer.proceed();
delayer.waitForResult();
Throwable t = delayer.getThrown();
if (t == null) {
fail("commitBlockSynchronization call did not fail on standby");
}
GenericTestUtils.assertExceptionContains("Operation category WRITE is not supported", t);
// Now, if we try again to recover the block, it should succeed on the new
// active.
loopRecoverLease(fsOtherUser, TEST_PATH);
AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE / 2);
} finally {
IOUtils.closeStream(stm);
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.
the class TestFailureOfSharedDir method testFailureOfSharedDir.
/**
* Test that marking the shared edits dir as being "required" causes the NN to
* fail if that dir can't be accessed.
*/
@Test
public void testFailureOfSharedDir() throws Exception {
Configuration conf = new Configuration();
conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
// The shared edits dir will automatically be marked required.
MiniDFSCluster cluster = null;
File sharedEditsDir = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).checkExitOnShutdown(false).build();
cluster.waitActive();
cluster.transitionToActive(0);
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
assertTrue(fs.mkdirs(new Path("/test1")));
// Blow away the shared edits dir.
URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
sharedEditsDir = new File(sharedEditsUri);
assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w", true));
Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
NameNode nn1 = cluster.getNameNode(1);
assertTrue(nn1.isStandbyState());
assertFalse("StandBy NameNode should not go to SafeMode on resource unavailability", nn1.isInSafeMode());
NameNode nn0 = cluster.getNameNode(0);
try {
// Make sure that subsequent operations on the NN fail.
nn0.getRpcServer().rollEditLog();
fail("Succeeded in rolling edit log despite shared dir being deleted");
} catch (ExitException ee) {
GenericTestUtils.assertExceptionContains("finalize log segment 1, 3 failed for required journal", ee);
}
// dir didn't roll. Regression test for HDFS-2874.
for (URI editsUri : cluster.getNameEditsDirs(0)) {
if (editsUri.equals(sharedEditsUri)) {
continue;
}
File editsDir = new File(editsUri.getPath());
File curDir = new File(editsDir, "current");
GenericTestUtils.assertGlobEquals(curDir, "edits_.*", NNStorage.getInProgressEditsFileName(1));
}
} finally {
if (sharedEditsDir != null) {
// without this test cleanup will fail
FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
}
if (cluster != null) {
cluster.shutdown();
}
}
}
Aggregations