Search in sources :

Example 6 with NameNode

use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.

the class TestDnRespectsBlockReportSplitThreshold method testCornerCaseAtThreshold.

/**
   * Tests the behavior when the count of blocks is exactly equal to the
   * threshold.
   */
@Test(timeout = 300000)
public void testCornerCaseAtThreshold() throws IOException, InterruptedException {
    startUpCluster(BLOCKS_IN_FILE);
    NameNode nn = cluster.getNameNode();
    DataNode dn = cluster.getDataNodes().get(0);
    // Create a file with a few blocks.
    createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
    // Insert a spy object for the NN RPC.
    DatanodeProtocolClientSideTranslatorPB nnSpy = InternalDataNodeTestUtils.spyOnBposToNN(dn, nn);
    // Trigger a block report so there is an interaction with the spy
    // object.
    DataNodeTestUtils.triggerBlockReport(dn);
    ArgumentCaptor<StorageBlockReport[]> captor = ArgumentCaptor.forClass(StorageBlockReport[].class);
    Mockito.verify(nnSpy, times(cluster.getStoragesPerDatanode())).blockReport(any(DatanodeRegistration.class), anyString(), captor.capture(), Mockito.<BlockReportContext>anyObject());
    verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
}
Also used : DatanodeProtocolClientSideTranslatorPB(org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) StorageBlockReport(org.apache.hadoop.hdfs.server.protocol.StorageBlockReport) Test(org.junit.Test)

Example 7 with NameNode

use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.

the class TestHAStateTransitions method testSecretManagerState.

/**
   * The secret manager needs to start/stop - the invariant should be that
   * the secret manager runs if and only if the NN is active and not in
   * safe mode. As a state diagram, we need to test all of the following
   * transitions to make sure the secret manager is started when we transition
   * into state 4, but none of the others.
   * <pre>
   *         SafeMode     Not SafeMode 
   * Standby   1 <------> 2
   *           ^          ^
   *           |          |
   *           v          v
   * Active    3 <------> 4
   * </pre>
   */
@Test(timeout = 60000)
public void testSecretManagerState() throws Exception {
    Configuration conf = new Configuration();
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 50);
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).waitSafeMode(false).build();
    try {
        cluster.transitionToActive(0);
        DFSTestUtil.createFile(cluster.getFileSystem(0), TEST_FILE_PATH, 6000, (short) 1, 1L);
        cluster.getConfiguration(0).setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 60000);
        cluster.restartNameNode(0);
        NameNode nn = cluster.getNameNode(0);
        banner("Started in state 1.");
        assertTrue(nn.isStandbyState());
        assertTrue(nn.isInSafeMode());
        assertFalse(isDTRunning(nn));
        banner("Transition 1->2. Should not start secret manager");
        NameNodeAdapter.leaveSafeMode(nn);
        assertTrue(nn.isStandbyState());
        assertFalse(nn.isInSafeMode());
        assertFalse(isDTRunning(nn));
        banner("Transition 2->1. Should not start secret manager.");
        NameNodeAdapter.enterSafeMode(nn, false);
        assertTrue(nn.isStandbyState());
        assertTrue(nn.isInSafeMode());
        assertFalse(isDTRunning(nn));
        banner("Transition 1->3. Should not start secret manager.");
        nn.getRpcServer().transitionToActive(REQ_INFO);
        assertFalse(nn.isStandbyState());
        assertTrue(nn.isInSafeMode());
        assertFalse(isDTRunning(nn));
        banner("Transition 3->1. Should not start secret manager.");
        nn.getRpcServer().transitionToStandby(REQ_INFO);
        assertTrue(nn.isStandbyState());
        assertTrue(nn.isInSafeMode());
        assertFalse(isDTRunning(nn));
        banner("Transition 1->3->4. Should start secret manager.");
        nn.getRpcServer().transitionToActive(REQ_INFO);
        NameNodeAdapter.leaveSafeMode(nn);
        assertFalse(nn.isStandbyState());
        assertFalse(nn.isInSafeMode());
        assertTrue(isDTRunning(nn));
        banner("Transition 4->3. Should stop secret manager");
        NameNodeAdapter.enterSafeMode(nn, false);
        assertFalse(nn.isStandbyState());
        assertTrue(nn.isInSafeMode());
        assertFalse(isDTRunning(nn));
        banner("Transition 3->4. Should start secret manager");
        NameNodeAdapter.leaveSafeMode(nn);
        assertFalse(nn.isStandbyState());
        assertFalse(nn.isInSafeMode());
        assertTrue(isDTRunning(nn));
        for (int i = 0; i < 20; i++) {
            // Loop the last check to suss out races.
            banner("Transition 4->2. Should stop secret manager.");
            nn.getRpcServer().transitionToStandby(REQ_INFO);
            assertTrue(nn.isStandbyState());
            assertFalse(nn.isInSafeMode());
            assertFalse(isDTRunning(nn));
            banner("Transition 2->4. Should start secret manager");
            nn.getRpcServer().transitionToActive(REQ_INFO);
            assertFalse(nn.isStandbyState());
            assertFalse(nn.isInSafeMode());
            assertTrue(isDTRunning(nn));
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) Configuration(org.apache.hadoop.conf.Configuration) Test(org.junit.Test)

Example 8 with NameNode

use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.

the class TestHAStateTransitions method testFailoverAfterCrashDuringLogRoll.

private static void testFailoverAfterCrashDuringLogRoll(boolean writeHeader) throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, Integer.MAX_VALUE);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    try {
        cluster.transitionToActive(0);
        NameNode nn0 = cluster.getNameNode(0);
        nn0.getRpcServer().rollEditLog();
        cluster.shutdownNameNode(0);
        createEmptyInProgressEditLog(cluster, nn0, writeHeader);
        cluster.transitionToActive(1);
    } finally {
        IOUtils.cleanup(LOG, fs);
        cluster.shutdown();
    }
}
Also used : NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 9 with NameNode

use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.

the class TestPipelinesFailover method testFailoverRightBeforeCommitSynchronization.

/**
   * Test the scenario where the NN fails over after issuing a block
   * synchronization request, but before it is committed. The
   * DN running the recovery should then fail to commit the synchronization
   * and a later retry will succeed.
   */
@Test(timeout = 30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
    final Configuration conf = new Configuration();
    // Disable permissions so that another user can recover the lease.
    conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    FSDataOutputStream stm = null;
    final MiniDFSCluster cluster = newMiniCluster(conf, 3);
    try {
        cluster.waitActive();
        cluster.transitionToActive(0);
        Thread.sleep(500);
        LOG.info("Starting with NN 0 active");
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        stm = fs.create(TEST_PATH);
        // write a half block
        AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
        stm.hflush();
        // Look into the block manager on the active node for the block
        // under construction.
        NameNode nn0 = cluster.getNameNode(0);
        ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
        DatanodeDescriptor expectedPrimary = DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
        LOG.info("Expecting block recovery to be triggered on DN " + expectedPrimary);
        // Find the corresponding DN daemon, and spy on its connection to the
        // active.
        DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
        DatanodeProtocolClientSideTranslatorPB nnSpy = InternalDataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
        // Delay the commitBlockSynchronization call
        DelayAnswer delayer = new DelayAnswer(LOG);
        Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(Mockito.eq(blk), // new genstamp
        Mockito.anyInt(), // new length
        Mockito.anyLong(), // close file
        Mockito.eq(true), // delete block
        Mockito.eq(false), // new targets
        (DatanodeID[]) Mockito.anyObject(), // new target storages
        (String[]) Mockito.anyObject());
        DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
        assertFalse(fsOtherUser.recoverLease(TEST_PATH));
        LOG.info("Waiting for commitBlockSynchronization call from primary");
        delayer.waitForCall();
        LOG.info("Failing over to NN 1");
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
        // Let the commitBlockSynchronization call go through, and check that
        // it failed with the correct exception.
        delayer.proceed();
        delayer.waitForResult();
        Throwable t = delayer.getThrown();
        if (t == null) {
            fail("commitBlockSynchronization call did not fail on standby");
        }
        GenericTestUtils.assertExceptionContains("Operation category WRITE is not supported", t);
        // Now, if we try again to recover the block, it should succeed on the new
        // active.
        loopRecoverLease(fsOtherUser, TEST_PATH);
        AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE / 2);
    } finally {
        IOUtils.closeStream(stm);
        cluster.shutdown();
    }
}
Also used : NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) DelayAnswer(org.apache.hadoop.test.GenericTestUtils.DelayAnswer) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeProtocolClientSideTranslatorPB(org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 10 with NameNode

use of org.apache.hadoop.hdfs.server.namenode.NameNode in project hadoop by apache.

the class TestFailureOfSharedDir method testFailureOfSharedDir.

/**
   * Test that marking the shared edits dir as being "required" causes the NN to
   * fail if that dir can't be accessed.
   */
@Test
public void testFailureOfSharedDir() throws Exception {
    Configuration conf = new Configuration();
    conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
    // The shared edits dir will automatically be marked required.
    MiniDFSCluster cluster = null;
    File sharedEditsDir = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).checkExitOnShutdown(false).build();
        cluster.waitActive();
        cluster.transitionToActive(0);
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        assertTrue(fs.mkdirs(new Path("/test1")));
        // Blow away the shared edits dir.
        URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
        sharedEditsDir = new File(sharedEditsUri);
        assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w", true));
        Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
        NameNode nn1 = cluster.getNameNode(1);
        assertTrue(nn1.isStandbyState());
        assertFalse("StandBy NameNode should not go to SafeMode on resource unavailability", nn1.isInSafeMode());
        NameNode nn0 = cluster.getNameNode(0);
        try {
            // Make sure that subsequent operations on the NN fail.
            nn0.getRpcServer().rollEditLog();
            fail("Succeeded in rolling edit log despite shared dir being deleted");
        } catch (ExitException ee) {
            GenericTestUtils.assertExceptionContains("finalize log segment 1, 3 failed for required journal", ee);
        }
        // dir didn't roll. Regression test for HDFS-2874.
        for (URI editsUri : cluster.getNameEditsDirs(0)) {
            if (editsUri.equals(sharedEditsUri)) {
                continue;
            }
            File editsDir = new File(editsUri.getPath());
            File curDir = new File(editsDir, "current");
            GenericTestUtils.assertGlobEquals(curDir, "edits_.*", NNStorage.getInProgressEditsFileName(1));
        }
    } finally {
        if (sharedEditsDir != null) {
            // without this test cleanup will fail
            FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
        }
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) File(java.io.File) URI(java.net.URI) ExitException(org.apache.hadoop.util.ExitUtil.ExitException) Test(org.junit.Test)

Aggregations

NameNode (org.apache.hadoop.hdfs.server.namenode.NameNode)65 Test (org.junit.Test)44 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)22 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)17 FileSystem (org.apache.hadoop.fs.FileSystem)15 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)9 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)8 File (java.io.File)7 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 DatanodeProtocolClientSideTranslatorPB (org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB)7 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)6 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)6 IOException (java.io.IOException)5 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 BlockTokenSecretManager (org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager)4 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)4 DatanodeRegistration (org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration)4 NamenodeProtocols (org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols)4