Search in sources :

Example 61 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestHeartbeatHandling method testHeartbeatBlockRecovery.

/**
   * Test if
   * {@link FSNamesystem#handleHeartbeat}
   * correctly selects data node targets for block recovery.
   */
@Test
public void testHeartbeatBlockRecovery() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    try {
        cluster.waitActive();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
        final String poolId = namesystem.getBlockPoolId();
        final DatanodeRegistration nodeReg1 = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
        final DatanodeDescriptor dd1 = NameNodeAdapter.getDatanode(namesystem, nodeReg1);
        dd1.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
        final DatanodeRegistration nodeReg2 = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(1), poolId);
        final DatanodeDescriptor dd2 = NameNodeAdapter.getDatanode(namesystem, nodeReg2);
        dd2.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
        final DatanodeRegistration nodeReg3 = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), poolId);
        final DatanodeDescriptor dd3 = NameNodeAdapter.getDatanode(namesystem, nodeReg3);
        dd3.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
        try {
            namesystem.writeLock();
            synchronized (hm) {
                NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem);
                NameNodeAdapter.sendHeartBeat(nodeReg2, dd2, namesystem);
                NameNodeAdapter.sendHeartBeat(nodeReg3, dd3, namesystem);
                // Test with all alive nodes.
                DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
                DFSTestUtil.resetLastUpdatesWithOffset(dd2, 0);
                DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
                final DatanodeStorageInfo[] storages = { dd1.getStorageInfos()[0], dd2.getStorageInfos()[0], dd3.getStorageInfos()[0] };
                BlockInfo blockInfo = new BlockInfoContiguous(new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
                blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
                dd1.addBlockToBeRecovered(blockInfo);
                DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
                assertEquals(1, cmds.length);
                assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
                BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand) cmds[0];
                assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
                DatanodeInfo[] recoveringNodes = recoveryCommand.getRecoveringBlocks().toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0].getLocations();
                assertEquals(3, recoveringNodes.length);
                assertEquals(recoveringNodes[0], dd1);
                assertEquals(recoveringNodes[1], dd2);
                assertEquals(recoveringNodes[2], dd3);
                // Test with one stale node.
                DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
                // More than the default stale interval of 30 seconds.
                DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
                DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
                blockInfo = new BlockInfoContiguous(new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
                blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
                dd1.addBlockToBeRecovered(blockInfo);
                cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
                assertEquals(1, cmds.length);
                assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
                recoveryCommand = (BlockRecoveryCommand) cmds[0];
                assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
                recoveringNodes = recoveryCommand.getRecoveringBlocks().toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0].getLocations();
                assertEquals(2, recoveringNodes.length);
                // dd2 is skipped.
                assertEquals(recoveringNodes[0], dd1);
                assertEquals(recoveringNodes[1], dd3);
                // Test with all stale node.
                DFSTestUtil.resetLastUpdatesWithOffset(dd1, -60 * 1000);
                // More than the default stale interval of 30 seconds.
                DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
                DFSTestUtil.resetLastUpdatesWithOffset(dd3, -80 * 1000);
                blockInfo = new BlockInfoContiguous(new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
                blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
                dd1.addBlockToBeRecovered(blockInfo);
                cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
                assertEquals(1, cmds.length);
                assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
                recoveryCommand = (BlockRecoveryCommand) cmds[0];
                assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
                recoveringNodes = recoveryCommand.getRecoveringBlocks().toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0].getLocations();
                // Only dd1 is included since it heart beated and hence its not stale
                // when the list of recovery blocks is constructed.
                assertEquals(3, recoveringNodes.length);
                assertEquals(recoveringNodes[0], dd1);
                assertEquals(recoveringNodes[1], dd2);
                assertEquals(recoveringNodes[2], dd3);
            }
        } finally {
            namesystem.writeUnlock();
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : BlockRecoveryCommand(org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) DatanodeCommand(org.apache.hadoop.hdfs.server.protocol.DatanodeCommand) DatanodeStorage(org.apache.hadoop.hdfs.server.protocol.DatanodeStorage) Block(org.apache.hadoop.hdfs.protocol.Block) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 62 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestHostFileManager method testIncludeExcludeLists.

@Test
@SuppressWarnings("unchecked")
public void testIncludeExcludeLists() throws IOException {
    BlockManager bm = mock(BlockManager.class);
    FSNamesystem fsn = mock(FSNamesystem.class);
    Configuration conf = new Configuration();
    HostFileManager hm = new HostFileManager();
    HostSet includedNodes = new HostSet();
    HostSet excludedNodes = new HostSet();
    includedNodes.add(entry("127.0.0.1:12345"));
    includedNodes.add(entry("localhost:12345"));
    includedNodes.add(entry("127.0.0.1:12345"));
    includedNodes.add(entry("127.0.0.2"));
    excludedNodes.add(entry("127.0.0.1:12346"));
    excludedNodes.add(entry("127.0.30.1:12346"));
    Assert.assertEquals(2, includedNodes.size());
    Assert.assertEquals(2, excludedNodes.size());
    hm.refresh(includedNodes, excludedNodes);
    DatanodeManager dm = new DatanodeManager(bm, fsn, conf);
    Whitebox.setInternalState(dm, "hostConfigManager", hm);
    Map<String, DatanodeDescriptor> dnMap = (Map<String, DatanodeDescriptor>) Whitebox.getInternalState(dm, "datanodeMap");
    // After the de-duplication, there should be only one DN from the included
    // nodes declared as dead.
    Assert.assertEquals(2, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL).size());
    Assert.assertEquals(2, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
    dnMap.put("uuid-foo", new DatanodeDescriptor(new DatanodeID("127.0.0.1", "localhost", "uuid-foo", 12345, 1020, 1021, 1022)));
    Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
    dnMap.put("uuid-bar", new DatanodeDescriptor(new DatanodeID("127.0.0.2", "127.0.0.2", "uuid-bar", 12345, 1020, 1021, 1022)));
    Assert.assertEquals(0, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
    DatanodeDescriptor spam = new DatanodeDescriptor(new DatanodeID("127.0.0" + ".3", "127.0.0.3", "uuid-spam", 12345, 1020, 1021, 1022));
    DFSTestUtil.setDatanodeDead(spam);
    includedNodes.add(entry("127.0.0.3:12345"));
    dnMap.put("uuid-spam", spam);
    Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
    dnMap.remove("uuid-spam");
    Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
    excludedNodes.add(entry("127.0.0.3"));
    Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
}
Also used : DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 63 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestSequentialBlockId method testTriggerBlockIdCollision.

/**
   * Test that collisions in the block ID space are handled gracefully.
   *
   * @throws IOException
   */
@Test
public void testTriggerBlockIdCollision() throws IOException {
    Configuration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    try {
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        FSNamesystem fsn = cluster.getNamesystem();
        final int blockCount = 10;
        // Create a file with a few blocks to rev up the global block ID
        // counter.
        Path path1 = new Path("testBlockIdCollisionDetection_file1.dat");
        DFSTestUtil.createFile(fs, path1, IO_SIZE, BLOCK_SIZE * blockCount, BLOCK_SIZE, REPLICATION, SEED);
        List<LocatedBlock> blocks1 = DFSTestUtil.getAllBlocks(fs, path1);
        // Rewind the block ID counter in the name system object. This will result
        // in block ID collisions when we try to allocate new blocks.
        SequentialBlockIdGenerator blockIdGenerator = fsn.getBlockManager().getBlockIdManager().getBlockIdGenerator();
        blockIdGenerator.setCurrentValue(blockIdGenerator.getCurrentValue() - 5);
        // Trigger collisions by creating a new file.
        Path path2 = new Path("testBlockIdCollisionDetection_file2.dat");
        DFSTestUtil.createFile(fs, path2, IO_SIZE, BLOCK_SIZE * blockCount, BLOCK_SIZE, REPLICATION, SEED);
        List<LocatedBlock> blocks2 = DFSTestUtil.getAllBlocks(fs, path2);
        assertThat(blocks2.size(), is(blockCount));
        // Make sure that file2 block IDs start immediately after file1
        assertThat(blocks2.get(0).getBlock().getBlockId(), is(blocks1.get(9).getBlock().getBlockId() + 1));
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 64 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestSequentialBlockGroupId method testTriggerBlockGroupIdCollisionWithLegacyBlockId.

/**
   * Test that collisions in the blockGroup ID when the id is occupied by legacy
   * block.
   */
@Test(timeout = 60000)
public void testTriggerBlockGroupIdCollisionWithLegacyBlockId() throws Exception {
    long blockGroupIdInitialValue = blockGrpIdGenerator.getCurrentValue();
    blockGrpIdGenerator.skipTo((blockGrpIdGenerator.getCurrentValue() & ~BLOCK_GROUP_INDEX_MASK) + MAX_BLOCKS_IN_GROUP);
    final long curBlockGroupIdValue = blockGrpIdGenerator.getCurrentValue();
    // Creates contiguous block with negative blockId so that it would trigger
    // collision during blockGroup Id generation
    FSNamesystem fsn = cluster.getNamesystem();
    // Replace SequentialBlockIdGenerator with a spy
    SequentialBlockIdGenerator blockIdGenerator = spy(fsn.getBlockManager().getBlockIdManager().getBlockIdGenerator());
    Whitebox.setInternalState(fsn.getBlockManager().getBlockIdManager(), "blockIdGenerator", blockIdGenerator);
    SequentialBlockIdGenerator spySequentialBlockIdGenerator = new SequentialBlockIdGenerator(null) {

        @Override
        public long nextValue() {
            return curBlockGroupIdValue;
        }
    };
    final Answer<Object> delegator = new GenericTestUtils.DelegateAnswer(spySequentialBlockIdGenerator);
    doAnswer(delegator).when(blockIdGenerator).nextValue();
    Path path1 = new Path("/testCollisionWithLegacyBlock_file1.dat");
    DFSTestUtil.createFile(fs, path1, 1024, REPLICATION, SEED);
    List<LocatedBlock> contiguousBlocks = DFSTestUtil.getAllBlocks(fs, path1);
    assertThat(contiguousBlocks.size(), is(1));
    Assert.assertEquals("Unexpected BlockId!", curBlockGroupIdValue, contiguousBlocks.get(0).getBlock().getBlockId());
    // Reset back to the initial value to trigger collision
    blockGrpIdGenerator.setCurrentValue(blockGroupIdInitialValue);
    // Trigger collisions by creating a new file.
    Path path2 = new Path(ecDir, "testCollisionWithLegacyBlock_file2.dat");
    DFSTestUtil.createFile(fs, path2, cellSize, fileLen, blockSize, REPLICATION, SEED);
    List<LocatedBlock> blocks2 = DFSTestUtil.getAllBlocks(fs, path2);
    assertThat("Wrong BlockGrps", blocks2.size(), is(blockGrpCount));
    // Make sure that file1 and file2 block IDs are different
    for (LocatedBlock locBlock1 : contiguousBlocks) {
        long blockId1 = locBlock1.getBlock().getBlockId();
        for (LocatedBlock locBlock2 : blocks2) {
            long blockId2 = locBlock2.getBlock().getBlockId();
            assertThat("BlockGrpId mismatches!", blockId1, is(not(blockId2)));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 65 with FSNamesystem

use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.

the class TestBlocksWithNotEnoughRacks method testReduceReplFactorDueToRejoinRespectsRackPolicy.

/*
   * Test that when the excess replicas of a block are reduced due to
   * a node re-joining the cluster the rack policy is not violated.
   */
@Test
public void testReduceReplFactorDueToRejoinRespectsRackPolicy() throws Exception {
    Configuration conf = getConf();
    short REPLICATION_FACTOR = 2;
    final Path filePath = new Path("/testFile");
    // Last datanode is on a different rack
    String[] racks = { "/rack1", "/rack1", "/rack2" };
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(racks.length).racks(racks).build();
    final FSNamesystem ns = cluster.getNameNode().getNamesystem();
    final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();
    try {
        // Create a file with one block
        final FileSystem fs = cluster.getFileSystem();
        DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
        ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
        DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
        // Make the last (cross rack) datanode look like it failed
        // to heartbeat by stopping it and calling removeDatanode.
        ArrayList<DataNode> datanodes = cluster.getDataNodes();
        assertEquals(3, datanodes.size());
        DataNode dataNode = datanodes.get(2);
        DatanodeID dnId = dataNode.getDatanodeId();
        cluster.stopDataNode(2);
        dm.removeDatanode(dnId);
        // The block gets re-replicated to another datanode so it has a 
        // sufficient # replicas, but not across racks, so there should
        // be 1 rack.
        DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 0);
        // Start the "failed" datanode, which has a replica so the block is
        // now over-replicated and therefore a replica should be removed but
        // not on the restarted datanode as that would violate the rack policy.
        String[] rack2 = { "/rack2" };
        cluster.startDataNodes(conf, 1, true, null, rack2);
        cluster.waitActive();
        // The block now has sufficient # replicas, across racks
        DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Aggregations

FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)77 Test (org.junit.Test)59 Path (org.apache.hadoop.fs.Path)51 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)37 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)27 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)25 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)23 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)19 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)14 ArrayList (java.util.ArrayList)12 DatanodeRegistration (org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration)12 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)9 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)6 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)6 File (java.io.File)5 IOException (java.io.IOException)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5