Search in sources :

Example 26 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestDataNodeVolumeFailureReporting method testVolFailureStatsPreservedOnNNRestart.

/**
   * Test that the NN re-learns of volume failures after restart.
   */
@Test
public void testVolFailureStatsPreservedOnNNRestart() throws Exception {
    // Bring up two more datanodes that can tolerate 1 failure
    cluster.startDataNodes(conf, 2, true, null, null);
    cluster.waitActive();
    final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
    long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
    long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
    // Fail the first volume on both datanodes (we have to keep the 
    // third healthy so one node in the pipeline will not fail). 
    File dn1Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    File dn2Vol1 = new File(dataDir, "data" + (2 * 1 + 1));
    DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);
    Path file1 = new Path("/test1");
    DFSTestUtil.createFile(fs, file1, 1024, (short) 2, 1L);
    DFSTestUtil.waitReplication(fs, file1, (short) 2);
    ArrayList<DataNode> dns = cluster.getDataNodes();
    // The NN reports two volumes failures
    DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS);
    checkAggregateFailuresAtNameNode(true, 2);
    checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
    checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
    // After restarting the NN it still see the two failures
    cluster.restartNameNode(0);
    cluster.waitActive();
    DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS);
    checkAggregateFailuresAtNameNode(true, 2);
    checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
    checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) File(java.io.File) Test(org.junit.Test)

Example 27 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestReconstructStripedBlocks method getNumberOfBlocksToBeErasureCoded.

private static int getNumberOfBlocksToBeErasureCoded(MiniDFSCluster cluster) throws Exception {
    DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
    int count = 0;
    for (DataNode dn : cluster.getDataNodes()) {
        DatanodeDescriptor dd = dm.getDatanode(dn.getDatanodeId());
        count += dd.getNumberOfBlocksToBeErasureCoded();
    }
    return count;
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode)

Example 28 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestNamenodeCapacityReport method testXceiverCountInternal.

public void testXceiverCountInternal(int minMaintenanceR) throws Exception {
    Configuration conf = new HdfsConfiguration();
    // retry one time, if close fails
    conf.setInt(HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY, minMaintenanceR);
    MiniDFSCluster cluster = null;
    final int nodes = 8;
    final int fileCount = 5;
    final short fileRepl = 3;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(nodes).build();
        cluster.waitActive();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
        List<DataNode> datanodes = cluster.getDataNodes();
        final DistributedFileSystem fs = cluster.getFileSystem();
        // trigger heartbeats in case not already sent
        triggerHeartbeats(datanodes);
        // check that all nodes are live and in service
        // xceiver server adds 1 to load
        int expectedTotalLoad = nodes;
        int expectedInServiceNodes = nodes;
        int expectedInServiceLoad = nodes;
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // Ensure counts are accurate.
        for (int i = 0; i < nodes / 2; i++) {
            DataNode dn = datanodes.get(i);
            DatanodeDescriptor dnd = dnm.getDatanode(dn.getDatanodeId());
            dn.shutdown();
            DFSTestUtil.setDatanodeDead(dnd);
            BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
            //Admin operations on dead nodes won't impact nodesInService metrics.
            startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            expectedInServiceNodes--;
            assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
            stopDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
        }
        // restart the nodes to verify that counts are correct after
        // node re-registration 
        cluster.restartDataNodes();
        cluster.waitActive();
        datanodes = cluster.getDataNodes();
        expectedInServiceNodes = nodes;
        assertEquals(nodes, datanodes.size());
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // create streams and hsync to force datastreamers to start
        DFSOutputStream[] streams = new DFSOutputStream[fileCount];
        for (int i = 0; i < fileCount; i++) {
            streams[i] = (DFSOutputStream) fs.create(new Path("/f" + i), fileRepl).getWrappedStream();
            streams[i].write("1".getBytes());
            streams[i].hsync();
            // the load for writers is 2 because both the write xceiver & packet
            // responder threads are counted in the load
            expectedTotalLoad += 2 * fileRepl;
            expectedInServiceLoad += 2 * fileRepl;
        }
        // force nodes to send load update
        triggerHeartbeats(datanodes);
        checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        // expected load, trigger heartbeat to force load update.
        for (int i = 0; i < fileRepl; i++) {
            expectedInServiceNodes--;
            DatanodeDescriptor dnd = dnm.getDatanode(datanodes.get(i).getDatanodeId());
            expectedInServiceLoad -= dnd.getXceiverCount();
            startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
            DataNodeTestUtils.triggerHeartbeat(datanodes.get(i));
            Thread.sleep(100);
            checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        }
        // load based on whether the nodes in the pipeline are decomm
        for (int i = 0; i < fileCount; i++) {
            int adminOps = 0;
            for (DatanodeInfo dni : streams[i].getPipeline()) {
                DatanodeDescriptor dnd = dnm.getDatanode(dni);
                expectedTotalLoad -= 2;
                if (!dnd.isInService()) {
                    adminOps++;
                } else {
                    expectedInServiceLoad -= 2;
                }
            }
            try {
                streams[i].close();
            } catch (IOException ioe) {
                // bug for now
                if (adminOps < fileRepl) {
                    throw ioe;
                }
            }
            triggerHeartbeats(datanodes);
            // verify node count and loads 
            checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
        }
        // shutdown each node, verify node counts based on admin state
        for (int i = 0; i < nodes; i++) {
            DataNode dn = datanodes.get(i);
            dn.shutdown();
            // force it to appear dead so live count decreases
            DatanodeDescriptor dnDesc = dnm.getDatanode(dn.getDatanodeId());
            DFSTestUtil.setDatanodeDead(dnDesc);
            BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
            assertEquals(nodes - 1 - i, namesystem.getNumLiveDataNodes());
            // first few nodes are already out of service
            if (i >= fileRepl) {
                expectedInServiceNodes--;
            }
            assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
            // live nodes always report load of 1.  no nodes is load 0
            double expectedXceiverAvg = (i == nodes - 1) ? 0.0 : 1.0;
            assertEquals((double) expectedXceiverAvg, getInServiceXceiverAverage(namesystem), EPSILON);
        }
        // final sanity check
        checkClusterHealth(0, namesystem, 0.0, 0, 0.0);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream)

Example 29 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class TestNamenodeCapacityReport method testVolumeSize.

/**
   * The following test first creates a file.
   * It verifies the block information from a datanode.
   * Then, it updates the block with new information and verifies again. 
   */
@Test
public void testVolumeSize() throws Exception {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = null;
    // Set aside fifth of the total capacity as reserved
    long reserved = 10000;
    conf.setLong(DFSConfigKeys.DFS_DATANODE_DU_RESERVED_KEY, reserved);
    try {
        cluster = new MiniDFSCluster.Builder(conf).build();
        cluster.waitActive();
        final FSNamesystem namesystem = cluster.getNamesystem();
        final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
        // Ensure the data reported for each data node is right
        final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
        final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
        dm.fetchDatanodes(live, dead, false);
        assertTrue(live.size() == 1);
        long used, remaining, configCapacity, nonDFSUsed, bpUsed;
        float percentUsed, percentRemaining, percentBpUsed;
        for (final DatanodeDescriptor datanode : live) {
            used = datanode.getDfsUsed();
            remaining = datanode.getRemaining();
            nonDFSUsed = datanode.getNonDfsUsed();
            configCapacity = datanode.getCapacity();
            percentUsed = datanode.getDfsUsedPercent();
            percentRemaining = datanode.getRemainingPercent();
            bpUsed = datanode.getBlockPoolUsed();
            percentBpUsed = datanode.getBlockPoolUsedPercent();
            LOG.info("Datanode configCapacity " + configCapacity + " used " + used + " non DFS used " + nonDFSUsed + " remaining " + remaining + " perentUsed " + percentUsed + " percentRemaining " + percentRemaining);
            // There will be 5% space reserved in ext filesystem which is not
            // considered.
            assertTrue(configCapacity >= (used + remaining + nonDFSUsed));
            assertTrue(percentUsed == DFSUtilClient.getPercentUsed(used, configCapacity));
            assertTrue(percentRemaining == DFSUtilClient.getPercentRemaining(remaining, configCapacity));
            assertTrue(percentBpUsed == DFSUtilClient.getPercentUsed(bpUsed, configCapacity));
        }
        //
        // Currently two data directories are created by the data node
        // in the MiniDFSCluster. This results in each data directory having
        // capacity equals to the disk capacity of the data directory.
        // Hence the capacity reported by the data node is twice the disk space
        // the disk capacity
        //
        // So multiply the disk capacity and reserved space by two 
        // for accommodating it
        //
        final FsDatasetTestUtils utils = cluster.getFsDatasetTestUtils(0);
        int numOfDataDirs = utils.getDefaultNumOfDataDirs();
        long diskCapacity = numOfDataDirs * utils.getRawCapacity();
        reserved *= numOfDataDirs;
        configCapacity = namesystem.getCapacityTotal();
        used = namesystem.getCapacityUsed();
        nonDFSUsed = namesystem.getNonDfsUsedSpace();
        remaining = namesystem.getCapacityRemaining();
        percentUsed = namesystem.getPercentUsed();
        percentRemaining = namesystem.getPercentRemaining();
        bpUsed = namesystem.getBlockPoolUsedSpace();
        percentBpUsed = namesystem.getPercentBlockPoolUsed();
        LOG.info("Data node directory " + cluster.getDataDirectory());
        LOG.info("Name node diskCapacity " + diskCapacity + " configCapacity " + configCapacity + " reserved " + reserved + " used " + used + " remaining " + remaining + " nonDFSUsed " + nonDFSUsed + " remaining " + remaining + " percentUsed " + percentUsed + " percentRemaining " + percentRemaining + " bpUsed " + bpUsed + " percentBpUsed " + percentBpUsed);
        // Ensure new total capacity reported excludes the reserved space
        assertTrue(configCapacity == diskCapacity - reserved);
        // Ensure new total capacity reported excludes the reserved space
        // There will be 5% space reserved in ext filesystem which is not
        // considered.
        assertTrue(configCapacity >= (used + remaining + nonDFSUsed));
        // Ensure percent used is calculated based on used and present capacity
        assertTrue(percentUsed == DFSUtilClient.getPercentUsed(used, configCapacity));
        // Ensure percent used is calculated based on used and present capacity
        assertTrue(percentBpUsed == DFSUtilClient.getPercentUsed(bpUsed, configCapacity));
        // Ensure percent used is calculated based on used and present capacity
        assertTrue(percentRemaining == ((float) remaining * 100.0f) / (float) configCapacity);
        //Adding testcase for non-dfs used where we need to consider
        // reserved replica also.
        final int fileCount = 5;
        final DistributedFileSystem fs = cluster.getFileSystem();
        // create streams and hsync to force datastreamers to start
        DFSOutputStream[] streams = new DFSOutputStream[fileCount];
        for (int i = 0; i < fileCount; i++) {
            streams[i] = (DFSOutputStream) fs.create(new Path("/f" + i)).getWrappedStream();
            streams[i].write("1".getBytes());
            streams[i].hsync();
        }
        triggerHeartbeats(cluster.getDataNodes());
        assertTrue(configCapacity > (namesystem.getCapacityUsed() + namesystem.getCapacityRemaining() + namesystem.getNonDfsUsedSpace()));
        // There is a chance that nonDFS usage might have slightly due to
        // testlogs, So assume 1MB other files used within this gap
        assertTrue((namesystem.getCapacityUsed() + namesystem.getCapacityRemaining() + namesystem.getNonDfsUsedSpace() + fileCount * fs.getDefaultBlockSize()) - configCapacity < 1 * 1024);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FsDatasetTestUtils(org.apache.hadoop.hdfs.server.datanode.FsDatasetTestUtils) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) ArrayList(java.util.ArrayList) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DFSOutputStream(org.apache.hadoop.hdfs.DFSOutputStream) Test(org.junit.Test)

Example 30 with DatanodeManager

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.

the class FSNamesystem method getDatanodeStorageReport.

DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type) throws AccessControlException, StandbyException {
    checkSuperuserPrivilege();
    checkOperation(OperationCategory.UNCHECKED);
    readLock();
    try {
        checkOperation(OperationCategory.UNCHECKED);
        final DatanodeManager dm = getBlockManager().getDatanodeManager();
        final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type);
        DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()];
        for (int i = 0; i < reports.length; i++) {
            final DatanodeDescriptor d = datanodes.get(i);
            reports[i] = new DatanodeStorageReport(new DatanodeInfoBuilder().setFrom(d).build(), d.getStorageReports());
        }
        return reports;
    } finally {
        readUnlock("getDatanodeStorageReport");
    }
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeManager(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager) DatanodeInfoBuilder(org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder) DatanodeStorageReport(org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport)

Aggregations

DatanodeManager (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager)39 Test (org.junit.Test)30 Path (org.apache.hadoop.fs.Path)21 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)21 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)12 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)9 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)9 File (java.io.File)8 ArrayList (java.util.ArrayList)8 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)8 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)7 Configuration (org.apache.hadoop.conf.Configuration)6 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)6 IOException (java.io.IOException)5 FileNotFoundException (java.io.FileNotFoundException)4 TimeoutException (java.util.concurrent.TimeoutException)4 ChecksumException (org.apache.hadoop.fs.ChecksumException)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 UnresolvedLinkException (org.apache.hadoop.fs.UnresolvedLinkException)4