Search in sources :

Example 31 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestDataNodeVolumeFailureToleration method testConfigureMinValidVolumes.

/**
   * Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
   * option, ie the DN shuts itself down when the number of failures
   * experienced drops below the tolerated amount.
   */
@Test
public void testConfigureMinValidVolumes() throws Exception {
    assumeNotWindows();
    // Bring up two additional datanodes that need both of their volumes
    // functioning in order to stay up.
    conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 0);
    conf.setTimeDuration(DFSConfigKeys.DFS_DATANODE_DISK_CHECK_MIN_GAP_KEY, 0, TimeUnit.MILLISECONDS);
    cluster.startDataNodes(conf, 2, true, null, null);
    cluster.waitActive();
    final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
    long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
    long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
    // Fail a volume on the 2nd DN
    File dn2Vol1 = new File(dataDir, "data" + (2 * 1 + 1));
    DataNodeTestUtils.injectDataDirFailure(dn2Vol1);
    // Should only get two replicas (the first DN and the 3rd)
    Path file1 = new Path("/test1");
    DFSTestUtil.createFile(fs, file1, 1024, (short) 3, 1L);
    DFSTestUtil.waitReplication(fs, file1, (short) 2);
    // Check that this single failure caused a DN to die.
    DFSTestUtil.waitForDatanodeStatus(dm, 2, 1, 0, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS);
    // If we restore the volume we should still only be able to get
    // two replicas since the DN is still considered dead.
    DataNodeTestUtils.restoreDataDirFromFailure(dn2Vol1);
    Path file2 = new Path("/test2");
    DFSTestUtil.createFile(fs, file2, 1024, (short) 3, 1L);
    DFSTestUtil.waitReplication(fs, file2, (short) 2);
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) File(java.io.File) Test(org.junit.Test)

Example 32 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestDecommissioningStatus method testDecommissionStatus.

/**
   * Tests Decommissioning Status in DFS.
   */
@Test
public void testDecommissionStatus() throws Exception {
    InetSocketAddress addr = new InetSocketAddress("localhost", cluster.getNameNodePort());
    DFSClient client = new DFSClient(addr, conf);
    DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
    assertEquals("Number of Datanodes ", 2, info.length);
    DistributedFileSystem fileSys = cluster.getFileSystem();
    DFSAdmin admin = new DFSAdmin(cluster.getConfiguration(0));
    short replicas = numDatanodes;
    //
    // Decommission one node. Verify the decommission status
    //
    Path file1 = new Path("decommission.dat");
    DFSTestUtil.createFile(fileSys, file1, fileSize, fileSize, blockSize, replicas, seed);
    Path file2 = new Path("decommission1.dat");
    FSDataOutputStream st1 = AdminStatesBaseTest.writeIncompleteFile(fileSys, file2, replicas, (short) (fileSize / blockSize));
    for (DataNode d : cluster.getDataNodes()) {
        DataNodeTestUtils.triggerBlockReport(d);
    }
    FSNamesystem fsn = cluster.getNamesystem();
    final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
    for (int iteration = 0; iteration < numDatanodes; iteration++) {
        String downnode = decommissionNode(client, iteration);
        dm.refreshNodes(conf);
        decommissionedNodes.add(downnode);
        BlockManagerTestUtil.recheckDecommissionState(dm);
        final List<DatanodeDescriptor> decommissioningNodes = dm.getDecommissioningNodes();
        if (iteration == 0) {
            assertEquals(decommissioningNodes.size(), 1);
            DatanodeDescriptor decommNode = decommissioningNodes.get(0);
            checkDecommissionStatus(decommNode, 3, 0, 1);
            checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 1), fileSys, admin);
        } else {
            assertEquals(decommissioningNodes.size(), 2);
            DatanodeDescriptor decommNode1 = decommissioningNodes.get(0);
            DatanodeDescriptor decommNode2 = decommissioningNodes.get(1);
            // This one is still 3,3,1 since it passed over the UC block 
            // earlier, before node 2 was decommed
            checkDecommissionStatus(decommNode1, 3, 3, 1);
            // This one is 4,4,2 since it has the full state
            checkDecommissionStatus(decommNode2, 4, 4, 2);
            checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 2), fileSys, admin);
        }
    }
    // Call refreshNodes on FSNamesystem with empty exclude file.
    // This will remove the datanodes from decommissioning list and
    // make them available again.
    hostsFileWriter.initExcludeHost("");
    dm.refreshNodes(conf);
    st1.close();
    AdminStatesBaseTest.cleanupFile(fileSys, file1);
    AdminStatesBaseTest.cleanupFile(fileSys, file2);
}
Also used : DFSClient(org.apache.hadoop.hdfs.DFSClient) Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) InetSocketAddress(java.net.InetSocketAddress) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) DFSAdmin(org.apache.hadoop.hdfs.tools.DFSAdmin) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test) AdminStatesBaseTest(org.apache.hadoop.hdfs.AdminStatesBaseTest)

Example 33 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestDefaultBlockPlacementPolicy method testPlacementWithLocalRackNodesDecommissioned.

/**
   * Verify decommissioned nodes should not be selected.
   */
@Test
public void testPlacementWithLocalRackNodesDecommissioned() throws Exception {
    String clientMachine = "client.foo.com";
    // Map client to RACK3
    String clientRack = "/RACK3";
    StaticMapping.addNodeToRack(clientMachine, clientRack);
    final DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
    DatanodeDescriptor dnd3 = dnm.getDatanode(cluster.getDataNodes().get(3).getDatanodeId());
    assertEquals(dnd3.getNetworkLocation(), clientRack);
    dnm.getDecomManager().startDecommission(dnd3);
    try {
        testPlacement(clientMachine, clientRack, false);
    } finally {
        dnm.getDecomManager().stopDecommission(dnd3);
    }
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) Test(org.junit.Test)

Example 34 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestFsck method testFsckReplicaDetails.

@Test(timeout = 90000)
public void testFsckReplicaDetails() throws Exception {
    final short replFactor = 1;
    short numDn = 1;
    final long blockSize = 512;
    final long fileSize = 1024;
    String[] racks = { "/rack1" };
    String[] hosts = { "host1" };
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
    DistributedFileSystem dfs;
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
    cluster.waitClusterUp();
    dfs = cluster.getFileSystem();
    // create files
    final String testFile = new String("/testfile");
    final Path path = new Path(testFile);
    DFSTestUtil.createFile(dfs, path, fileSize, replFactor, 1000L);
    DFSTestUtil.waitReplication(dfs, path, replFactor);
    // make sure datanode that has replica is fine before decommission
    String fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
    assertTrue(fsckOut.contains("(LIVE)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // decommission datanode
    FSNamesystem fsn = cluster.getNameNode().getNamesystem();
    BlockManager bm = fsn.getBlockManager();
    final DatanodeManager dnm = bm.getDatanodeManager();
    DatanodeDescriptor dnDesc0 = dnm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
    bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
    final String dn0Name = dnDesc0.getXferAddr();
    // check the replica status while decommissioning
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // Start 2nd DataNode
    cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack2" }, new String[] { "host2" }, null, false);
    // Wait for decommission to start
    final AtomicBoolean checkDecommissionInProgress = new AtomicBoolean(false);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            DatanodeInfo datanodeInfo = null;
            try {
                for (DatanodeInfo info : dfs.getDataNodeStats()) {
                    if (dn0Name.equals(info.getXferAddr())) {
                        datanodeInfo = info;
                    }
                }
                if (!checkDecommissionInProgress.get() && datanodeInfo != null && datanodeInfo.isDecommissionInProgress()) {
                    checkDecommissionInProgress.set(true);
                }
                if (datanodeInfo != null && datanodeInfo.isDecommissioned()) {
                    return true;
                }
            } catch (Exception e) {
                LOG.warn("Unexpected exception: " + e);
                return false;
            }
            return false;
        }
    }, 500, 30000);
    // check the replica status after decommission is done
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    DatanodeDescriptor dnDesc1 = dnm.getDatanode(cluster.getDataNodes().get(1).getDatanodeId());
    final String dn1Name = dnDesc1.getXferAddr();
    bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1, Long.MAX_VALUE);
    // check the replica status while entering maintenance
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertTrue(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // check entering maintenance replicas are printed only when requested
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
    // Start 3rd DataNode
    cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack3" }, new String[] { "host3" }, null, false);
    // Wait for the 2nd node to reach in maintenance state
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            DatanodeInfo dnInfo = null;
            try {
                for (DatanodeInfo info : dfs.getDataNodeStats()) {
                    if (dn1Name.equals(info.getXferAddr())) {
                        dnInfo = info;
                    }
                }
                if (dnInfo != null && dnInfo.isInMaintenance()) {
                    return true;
                }
            } catch (Exception e) {
                LOG.warn("Unexpected exception: " + e);
                return false;
            }
            return false;
        }
    }, 500, 30000);
    // check the replica status after decommission is done
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertTrue(fsckOut.contains("(IN MAINTENANCE)"));
    // check in maintenance replicas are not printed when not requested
    fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
    assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
    assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
    assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Matchers.anyString(org.mockito.Matchers.anyString) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) IOException(java.io.IOException) ChecksumException(org.apache.hadoop.fs.ChecksumException) TimeoutException(java.util.concurrent.TimeoutException) UnresolvedLinkException(org.apache.hadoop.fs.UnresolvedLinkException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(org.apache.hadoop.security.AccessControlException) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Test(org.junit.Test)

Example 35 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestWebHdfsDataLocality method testDataLocality.

@Test
public void testDataLocality() throws Exception {
    final Configuration conf = WebHdfsTestUtil.createConf();
    final String[] racks = { RACK0, RACK0, RACK1, RACK1, RACK2, RACK2 };
    final int nDataNodes = racks.length;
    LOG.info("nDataNodes=" + nDataNodes + ", racks=" + Arrays.asList(racks));
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(nDataNodes).racks(racks).build();
    try {
        cluster.waitActive();
        final DistributedFileSystem dfs = cluster.getFileSystem();
        final NameNode namenode = cluster.getNameNode();
        final DatanodeManager dm = namenode.getNamesystem().getBlockManager().getDatanodeManager();
        LOG.info("dm=" + dm);
        final long blocksize = DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
        final String f = "/foo";
        {
            //test CREATE
            for (int i = 0; i < nDataNodes; i++) {
                //set client address to a particular datanode
                final DataNode dn = cluster.getDataNodes().get(i);
                final String ipAddr = dm.getDatanode(dn.getDatanodeId()).getIpAddr();
                //The chosen datanode must be the same as the client address
                final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, PutOpParam.Op.CREATE, -1L, blocksize, null, LOCALHOST);
                Assert.assertEquals(ipAddr, chosen.getIpAddr());
            }
        }
        //create a file with one replica.
        final Path p = new Path(f);
        final FSDataOutputStream out = dfs.create(p, (short) 1);
        out.write(1);
        out.close();
        //get replica location.
        final LocatedBlocks locatedblocks = NameNodeAdapter.getBlockLocations(namenode, f, 0, 1);
        final List<LocatedBlock> lb = locatedblocks.getLocatedBlocks();
        Assert.assertEquals(1, lb.size());
        final DatanodeInfo[] locations = lb.get(0).getLocations();
        Assert.assertEquals(1, locations.length);
        final DatanodeInfo expected = locations[0];
        //For GETFILECHECKSUM, OPEN and APPEND,
        //the chosen datanode must be the same as the replica location.
        {
            //test GETFILECHECKSUM
            final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, GetOpParam.Op.GETFILECHECKSUM, -1L, blocksize, null, LOCALHOST);
            Assert.assertEquals(expected, chosen);
        }
        {
            //test OPEN
            final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, GetOpParam.Op.OPEN, 0, blocksize, null, LOCALHOST);
            Assert.assertEquals(expected, chosen);
        }
        {
            //test APPEND
            final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, PostOpParam.Op.APPEND, -1L, blocksize, null, LOCALHOST);
            Assert.assertEquals(expected, chosen);
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Aggregations

DatanodeManager (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager)39 Test (org.junit.Test)30 Path (org.apache.hadoop.fs.Path)21 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)21 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)12 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)9 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)9 File (java.io.File)8 ArrayList (java.util.ArrayList)8 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)8 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)7 Configuration (org.apache.hadoop.conf.Configuration)6 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)6 IOException (java.io.IOException)5 FileNotFoundException (java.io.FileNotFoundException)4 TimeoutException (java.util.concurrent.TimeoutException)4 ChecksumException (org.apache.hadoop.fs.ChecksumException)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 UnresolvedLinkException (org.apache.hadoop.fs.UnresolvedLinkException)4