Search in sources :

Example 71 with DatanodeDescriptor

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.

the class TestFsck method testFsckReplicaDetails.

@Test(timeout = 90000)
public void testFsckReplicaDetails() throws Exception {
    final short replFactor = 1;
    short numDn = 1;
    final long blockSize = 512;
    final long fileSize = 1024;
    String[] racks = { "/rack1" };
    String[] hosts = { "host1" };
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
    DistributedFileSystem dfs;
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
    cluster.waitClusterUp();
    dfs = cluster.getFileSystem();
    // create files
    final String testFile = new String("/testfile");
    final Path path = new Path(testFile);
    DFSTestUtil.createFile(dfs, path, fileSize, replFactor, 1000L);
    DFSTestUtil.waitReplication(dfs, path, replFactor);
    // make sure datanode that has replica is fine before decommission
    String fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
    assertTrue(fsckOut.contains("(LIVE)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // decommission datanode
    FSNamesystem fsn = cluster.getNameNode().getNamesystem();
    BlockManager bm = fsn.getBlockManager();
    final DatanodeManager dnm = bm.getDatanodeManager();
    DatanodeDescriptor dnDesc0 = dnm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
    bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
    final String dn0Name = dnDesc0.getXferAddr();
    // check the replica status while decommissioning
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // Start 2nd DataNode
    cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack2" }, new String[] { "host2" }, null, false);
    // Wait for decommission to start
    final AtomicBoolean checkDecommissionInProgress = new AtomicBoolean(false);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            DatanodeInfo datanodeInfo = null;
            try {
                for (DatanodeInfo info : dfs.getDataNodeStats()) {
                    if (dn0Name.equals(info.getXferAddr())) {
                        datanodeInfo = info;
                    }
                }
                if (!checkDecommissionInProgress.get() && datanodeInfo != null && datanodeInfo.isDecommissionInProgress()) {
                    checkDecommissionInProgress.set(true);
                }
                if (datanodeInfo != null && datanodeInfo.isDecommissioned()) {
                    return true;
                }
            } catch (Exception e) {
                LOG.warn("Unexpected exception: " + e);
                return false;
            }
            return false;
        }
    }, 500, 30000);
    // check the replica status after decommission is done
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    DatanodeDescriptor dnDesc1 = dnm.getDatanode(cluster.getDataNodes().get(1).getDatanodeId());
    final String dn1Name = dnDesc1.getXferAddr();
    bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1, Long.MAX_VALUE);
    // check the replica status while entering maintenance
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertTrue(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // check entering maintenance replicas are printed only when requested
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // Start 3rd DataNode
    cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack3" }, new String[] { "host3" }, null, false);
    // Wait for the 2nd node to reach in maintenance state
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            DatanodeInfo dnInfo = null;
            try {
                for (DatanodeInfo info : dfs.getDataNodeStats()) {
                    if (dn1Name.equals(info.getXferAddr())) {
                        dnInfo = info;
                    }
                }
                if (dnInfo != null && dnInfo.isInMaintenance()) {
                    return true;
                }
            } catch (Exception e) {
                LOG.warn("Unexpected exception: " + e);
                return false;
            }
            return false;
        }
    }, 500, 30000);
    // check the replica status after decommission is done
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertTrue(fsckOut.contains("(IN MAINTENANCE)"));
    // check in maintenance replicas are not printed when not requested
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Matchers.anyString(org.mockito.Matchers.anyString) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) IOException(java.io.IOException) ChecksumException(org.apache.hadoop.fs.ChecksumException) TimeoutException(java.util.concurrent.TimeoutException) UnresolvedLinkException(org.apache.hadoop.fs.UnresolvedLinkException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(org.apache.hadoop.security.AccessControlException) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Test(org.junit.Test)

Example 72 with DatanodeDescriptor

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.

the class TestReconstructStripedBlocks method doTestMissingStripedBlock.

/**
   * Start GROUP_SIZE + 1 datanodes.
   * Inject striped blocks to first GROUP_SIZE datanodes.
   * Then make numOfBusy datanodes busy, make numOfMissed datanodes missed.
   * Then trigger BlockManager to compute reconstruction works. (so all
   * reconstruction work will be scheduled to the last datanode)
   * Finally, verify the reconstruction work of the last datanode.
   */
private void doTestMissingStripedBlock(int numOfMissed, int numOfBusy) throws Exception {
    Configuration conf = new HdfsConfiguration();
    initConf(conf);
    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 1).build();
    try {
        cluster.waitActive();
        final int numBlocks = 4;
        DFSTestUtil.createStripedFile(cluster, filePath, dirPath, numBlocks, 1, true);
        // all blocks will be located at first GROUP_SIZE DNs, the last DN is
        // empty because of the util function createStripedFile
        // make sure the file is complete in NN
        final INodeFile fileNode = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile();
        assertFalse(fileNode.isUnderConstruction());
        assertTrue(fileNode.isStriped());
        BlockInfo[] blocks = fileNode.getBlocks();
        assertEquals(numBlocks, blocks.length);
        for (BlockInfo blk : blocks) {
            assertTrue(blk.isStriped());
            assertTrue(blk.isComplete());
            assertEquals(cellSize * dataBlocks, blk.getNumBytes());
            final BlockInfoStriped sb = (BlockInfoStriped) blk;
            assertEquals(groupSize, sb.numNodes());
        }
        final BlockManager bm = cluster.getNamesystem().getBlockManager();
        BlockInfo firstBlock = fileNode.getBlocks()[0];
        DatanodeStorageInfo[] storageInfos = bm.getStorages(firstBlock);
        // make numOfBusy nodes busy
        int i = 0;
        for (; i < numOfBusy; i++) {
            DatanodeDescriptor busyNode = storageInfos[i].getDatanodeDescriptor();
            for (int j = 0; j < maxReplicationStreams + 1; j++) {
                BlockManagerTestUtil.addBlockToBeReplicated(busyNode, new Block(j), new DatanodeStorageInfo[] { storageInfos[0] });
            }
        }
        // make numOfMissed internal blocks missed
        for (; i < numOfBusy + numOfMissed; i++) {
            DatanodeDescriptor missedNode = storageInfos[i].getDatanodeDescriptor();
            assertEquals(numBlocks, missedNode.numBlocks());
            bm.getDatanodeManager().removeDatanode(missedNode);
        }
        BlockManagerTestUtil.getComputedDatanodeWork(bm);
        // all the reconstruction work will be scheduled on the last DN
        DataNode lastDn = cluster.getDataNodes().get(groupSize);
        DatanodeDescriptor last = bm.getDatanodeManager().getDatanode(lastDn.getDatanodeId());
        assertEquals("Counting the number of outstanding EC tasks", numBlocks, last.getNumberOfBlocksToBeErasureCoded());
        List<BlockECReconstructionInfo> reconstruction = last.getErasureCodeCommand(numBlocks);
        for (BlockECReconstructionInfo info : reconstruction) {
            assertEquals(1, info.getTargetDnInfos().length);
            assertEquals(last, info.getTargetDnInfos()[0]);
            assertEquals(info.getSourceDnInfos().length, info.getLiveBlockIndices().length);
            if (groupSize - numOfMissed == dataBlocks) {
                // It's a QUEUE_HIGHEST_PRIORITY block, so the busy DNs will be chosen
                // to make sure we have NUM_DATA_BLOCKS DNs to do reconstruction
                // work.
                assertEquals(dataBlocks, info.getSourceDnInfos().length);
            } else {
                // The block has no highest priority, so we don't use the busy DNs as
                // sources
                assertEquals(groupSize - numOfMissed - numOfBusy, info.getSourceDnInfos().length);
            }
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : BlockInfoStriped(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped) BlockECReconstructionInfo(org.apache.hadoop.hdfs.server.protocol.BlockECReconstructionCommand.BlockECReconstructionInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) Block(org.apache.hadoop.hdfs.protocol.Block) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock)

Example 73 with DatanodeDescriptor

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.

the class TestNetworkTopology method testSortByDistance.

@Test
public void testSortByDistance() throws Exception {
    DatanodeDescriptor[] testNodes = new DatanodeDescriptor[3];
    // array contains both local node & local rack node
    testNodes[0] = dataNodes[1];
    testNodes[1] = dataNodes[2];
    testNodes[2] = dataNodes[0];
    cluster.setRandomSeed(0xDEADBEEF);
    cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
    assertTrue(testNodes[0] == dataNodes[0]);
    assertTrue(testNodes[1] == dataNodes[1]);
    assertTrue(testNodes[2] == dataNodes[2]);
    // array contains both local node & local rack node & decommissioned node
    DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5];
    dtestNodes[0] = dataNodes[8];
    dtestNodes[1] = dataNodes[12];
    dtestNodes[2] = dataNodes[11];
    dtestNodes[3] = dataNodes[9];
    dtestNodes[4] = dataNodes[10];
    cluster.setRandomSeed(0xDEADBEEF);
    cluster.sortByDistance(dataNodes[8], dtestNodes, dtestNodes.length - 2);
    assertTrue(dtestNodes[0] == dataNodes[8]);
    assertTrue(dtestNodes[1] == dataNodes[11]);
    assertTrue(dtestNodes[2] == dataNodes[12]);
    assertTrue(dtestNodes[3] == dataNodes[9]);
    assertTrue(dtestNodes[4] == dataNodes[10]);
    // array contains local node
    testNodes[0] = dataNodes[1];
    testNodes[1] = dataNodes[3];
    testNodes[2] = dataNodes[0];
    cluster.setRandomSeed(0xDEADBEEF);
    cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
    assertTrue(testNodes[0] == dataNodes[0]);
    assertTrue(testNodes[1] == dataNodes[1]);
    assertTrue(testNodes[2] == dataNodes[3]);
    // array contains local rack node
    testNodes[0] = dataNodes[5];
    testNodes[1] = dataNodes[3];
    testNodes[2] = dataNodes[1];
    cluster.setRandomSeed(0xDEADBEEF);
    cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
    assertTrue(testNodes[0] == dataNodes[1]);
    assertTrue(testNodes[1] == dataNodes[3]);
    assertTrue(testNodes[2] == dataNodes[5]);
    // array contains local rack node which happens to be in position 0
    testNodes[0] = dataNodes[1];
    testNodes[1] = dataNodes[5];
    testNodes[2] = dataNodes[3];
    cluster.setRandomSeed(0xDEADBEEF);
    cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
    assertTrue(testNodes[0] == dataNodes[1]);
    assertTrue(testNodes[1] == dataNodes[3]);
    assertTrue(testNodes[2] == dataNodes[5]);
    // Same as previous, but with a different random seed to test randomization
    testNodes[0] = dataNodes[1];
    testNodes[1] = dataNodes[5];
    testNodes[2] = dataNodes[3];
    cluster.setRandomSeed(0xDEAD);
    cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
    assertTrue(testNodes[0] == dataNodes[1]);
    assertTrue(testNodes[1] == dataNodes[3]);
    assertTrue(testNodes[2] == dataNodes[5]);
    // Array of just rack-local nodes
    // Expect a random first node
    DatanodeDescriptor first = null;
    boolean foundRandom = false;
    for (int i = 5; i <= 7; i++) {
        testNodes[0] = dataNodes[5];
        testNodes[1] = dataNodes[6];
        testNodes[2] = dataNodes[7];
        cluster.sortByDistance(dataNodes[i], testNodes, testNodes.length);
        if (first == null) {
            first = testNodes[0];
        } else {
            if (first != testNodes[0]) {
                foundRandom = true;
                break;
            }
        }
    }
    assertTrue("Expected to find a different first location", foundRandom);
    // Array of just remote nodes
    // Expect random first node
    first = null;
    for (int i = 1; i <= 4; i++) {
        testNodes[0] = dataNodes[13];
        testNodes[1] = dataNodes[14];
        testNodes[2] = dataNodes[15];
        cluster.sortByDistance(dataNodes[i], testNodes, testNodes.length);
        if (first == null) {
            first = testNodes[0];
        } else {
            if (first != testNodes[0]) {
                foundRandom = true;
                break;
            }
        }
    }
    assertTrue("Expected to find a different first location", foundRandom);
    //Reader is not a datanode, but is in one of the datanode's rack.
    testNodes[0] = dataNodes[0];
    testNodes[1] = dataNodes[5];
    testNodes[2] = dataNodes[8];
    Node rackClient = new NodeBase("/d3/r1/25.25.25");
    cluster.setRandomSeed(0xDEADBEEF);
    cluster.sortByDistance(rackClient, testNodes, testNodes.length);
    assertTrue(testNodes[0] == dataNodes[8]);
    assertTrue(testNodes[1] == dataNodes[5]);
    assertTrue(testNodes[2] == dataNodes[0]);
    //Reader is not a datanode , but is in one of the datanode's data center.
    testNodes[0] = dataNodes[8];
    testNodes[1] = dataNodes[5];
    testNodes[2] = dataNodes[0];
    Node dcClient = new NodeBase("/d1/r2/25.25.25");
    cluster.setRandomSeed(0xDEADBEEF);
    cluster.sortByDistance(dcClient, testNodes, testNodes.length);
    assertTrue(testNodes[0] == dataNodes[0]);
    assertTrue(testNodes[1] == dataNodes[5]);
    assertTrue(testNodes[2] == dataNodes[8]);
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) Test(org.junit.Test)

Example 74 with DatanodeDescriptor

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.

the class TestNetworkTopology method testContains.

@Test
public void testContains() throws Exception {
    DatanodeDescriptor nodeNotInMap = DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r4");
    for (int i = 0; i < dataNodes.length; i++) {
        assertTrue(cluster.contains(dataNodes[i]));
    }
    assertFalse(cluster.contains(nodeNotInMap));
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) Test(org.junit.Test)

Aggregations

DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)74 Test (org.junit.Test)37 ArrayList (java.util.ArrayList)23 DatanodeManager (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager)21 Path (org.apache.hadoop.fs.Path)19 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)13 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)12 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)12 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)11 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)11 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)10 HashMap (java.util.HashMap)9 Configuration (org.apache.hadoop.conf.Configuration)9 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)9 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)9 Node (org.apache.hadoop.net.Node)9 DatanodeStorageInfo (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo)8 IOException (java.io.IOException)7 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)7 Map (java.util.Map)6