use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestDataNodeVolumeFailureReporting method testVolFailureStatsPreservedOnNNRestart.
/**
* Test that the NN re-learns of volume failures after restart.
*/
@Test
public void testVolFailureStatsPreservedOnNNRestart() throws Exception {
// Bring up two more datanodes that can tolerate 1 failure
cluster.startDataNodes(conf, 2, true, null, null);
cluster.waitActive();
final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
// Fail the first volume on both datanodes (we have to keep the
// third healthy so one node in the pipeline will not fail).
File dn1Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
File dn2Vol1 = new File(dataDir, "data" + (2 * 1 + 1));
DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);
Path file1 = new Path("/test1");
DFSTestUtil.createFile(fs, file1, 1024, (short) 2, 1L);
DFSTestUtil.waitReplication(fs, file1, (short) 2);
ArrayList<DataNode> dns = cluster.getDataNodes();
// The NN reports two volumes failures
DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS);
checkAggregateFailuresAtNameNode(true, 2);
checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
// After restarting the NN it still see the two failures
cluster.restartNameNode(0);
cluster.waitActive();
DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS);
checkAggregateFailuresAtNameNode(true, 2);
checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestReconstructStripedBlocks method getNumberOfBlocksToBeErasureCoded.
private static int getNumberOfBlocksToBeErasureCoded(MiniDFSCluster cluster) throws Exception {
DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
int count = 0;
for (DataNode dn : cluster.getDataNodes()) {
DatanodeDescriptor dd = dm.getDatanode(dn.getDatanodeId());
count += dd.getNumberOfBlocksToBeErasureCoded();
}
return count;
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestNamenodeCapacityReport method testXceiverCountInternal.
public void testXceiverCountInternal(int minMaintenanceR) throws Exception {
Configuration conf = new HdfsConfiguration();
// retry one time, if close fails
conf.setInt(HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY, minMaintenanceR);
MiniDFSCluster cluster = null;
final int nodes = 8;
final int fileCount = 5;
final short fileRepl = 3;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(nodes).build();
cluster.waitActive();
final FSNamesystem namesystem = cluster.getNamesystem();
final DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
List<DataNode> datanodes = cluster.getDataNodes();
final DistributedFileSystem fs = cluster.getFileSystem();
// trigger heartbeats in case not already sent
triggerHeartbeats(datanodes);
// check that all nodes are live and in service
// xceiver server adds 1 to load
int expectedTotalLoad = nodes;
int expectedInServiceNodes = nodes;
int expectedInServiceLoad = nodes;
checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
// Ensure counts are accurate.
for (int i = 0; i < nodes / 2; i++) {
DataNode dn = datanodes.get(i);
DatanodeDescriptor dnd = dnm.getDatanode(dn.getDatanodeId());
dn.shutdown();
DFSTestUtil.setDatanodeDead(dnd);
BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
//Admin operations on dead nodes won't impact nodesInService metrics.
startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
expectedInServiceNodes--;
assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
stopDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
}
// restart the nodes to verify that counts are correct after
// node re-registration
cluster.restartDataNodes();
cluster.waitActive();
datanodes = cluster.getDataNodes();
expectedInServiceNodes = nodes;
assertEquals(nodes, datanodes.size());
checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
// create streams and hsync to force datastreamers to start
DFSOutputStream[] streams = new DFSOutputStream[fileCount];
for (int i = 0; i < fileCount; i++) {
streams[i] = (DFSOutputStream) fs.create(new Path("/f" + i), fileRepl).getWrappedStream();
streams[i].write("1".getBytes());
streams[i].hsync();
// the load for writers is 2 because both the write xceiver & packet
// responder threads are counted in the load
expectedTotalLoad += 2 * fileRepl;
expectedInServiceLoad += 2 * fileRepl;
}
// force nodes to send load update
triggerHeartbeats(datanodes);
checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
// expected load, trigger heartbeat to force load update.
for (int i = 0; i < fileRepl; i++) {
expectedInServiceNodes--;
DatanodeDescriptor dnd = dnm.getDatanode(datanodes.get(i).getDatanodeId());
expectedInServiceLoad -= dnd.getXceiverCount();
startDecommissionOrMaintenance(dnm, dnd, (i % 2 == 0));
DataNodeTestUtils.triggerHeartbeat(datanodes.get(i));
Thread.sleep(100);
checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
}
// load based on whether the nodes in the pipeline are decomm
for (int i = 0; i < fileCount; i++) {
int adminOps = 0;
for (DatanodeInfo dni : streams[i].getPipeline()) {
DatanodeDescriptor dnd = dnm.getDatanode(dni);
expectedTotalLoad -= 2;
if (!dnd.isInService()) {
adminOps++;
} else {
expectedInServiceLoad -= 2;
}
}
try {
streams[i].close();
} catch (IOException ioe) {
// bug for now
if (adminOps < fileRepl) {
throw ioe;
}
}
triggerHeartbeats(datanodes);
// verify node count and loads
checkClusterHealth(nodes, namesystem, expectedTotalLoad, expectedInServiceNodes, expectedInServiceLoad);
}
// shutdown each node, verify node counts based on admin state
for (int i = 0; i < nodes; i++) {
DataNode dn = datanodes.get(i);
dn.shutdown();
// force it to appear dead so live count decreases
DatanodeDescriptor dnDesc = dnm.getDatanode(dn.getDatanodeId());
DFSTestUtil.setDatanodeDead(dnDesc);
BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
assertEquals(nodes - 1 - i, namesystem.getNumLiveDataNodes());
// first few nodes are already out of service
if (i >= fileRepl) {
expectedInServiceNodes--;
}
assertEquals(expectedInServiceNodes, getNumDNInService(namesystem));
// live nodes always report load of 1. no nodes is load 0
double expectedXceiverAvg = (i == nodes - 1) ? 0.0 : 1.0;
assertEquals((double) expectedXceiverAvg, getInServiceXceiverAverage(namesystem), EPSILON);
}
// final sanity check
checkClusterHealth(0, namesystem, 0.0, 0, 0.0);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestNamenodeCapacityReport method testVolumeSize.
/**
* The following test first creates a file.
* It verifies the block information from a datanode.
* Then, it updates the block with new information and verifies again.
*/
@Test
public void testVolumeSize() throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = null;
// Set aside fifth of the total capacity as reserved
long reserved = 10000;
conf.setLong(DFSConfigKeys.DFS_DATANODE_DU_RESERVED_KEY, reserved);
try {
cluster = new MiniDFSCluster.Builder(conf).build();
cluster.waitActive();
final FSNamesystem namesystem = cluster.getNamesystem();
final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
// Ensure the data reported for each data node is right
final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
dm.fetchDatanodes(live, dead, false);
assertTrue(live.size() == 1);
long used, remaining, configCapacity, nonDFSUsed, bpUsed;
float percentUsed, percentRemaining, percentBpUsed;
for (final DatanodeDescriptor datanode : live) {
used = datanode.getDfsUsed();
remaining = datanode.getRemaining();
nonDFSUsed = datanode.getNonDfsUsed();
configCapacity = datanode.getCapacity();
percentUsed = datanode.getDfsUsedPercent();
percentRemaining = datanode.getRemainingPercent();
bpUsed = datanode.getBlockPoolUsed();
percentBpUsed = datanode.getBlockPoolUsedPercent();
LOG.info("Datanode configCapacity " + configCapacity + " used " + used + " non DFS used " + nonDFSUsed + " remaining " + remaining + " perentUsed " + percentUsed + " percentRemaining " + percentRemaining);
// There will be 5% space reserved in ext filesystem which is not
// considered.
assertTrue(configCapacity >= (used + remaining + nonDFSUsed));
assertTrue(percentUsed == DFSUtilClient.getPercentUsed(used, configCapacity));
assertTrue(percentRemaining == DFSUtilClient.getPercentRemaining(remaining, configCapacity));
assertTrue(percentBpUsed == DFSUtilClient.getPercentUsed(bpUsed, configCapacity));
}
//
// Currently two data directories are created by the data node
// in the MiniDFSCluster. This results in each data directory having
// capacity equals to the disk capacity of the data directory.
// Hence the capacity reported by the data node is twice the disk space
// the disk capacity
//
// So multiply the disk capacity and reserved space by two
// for accommodating it
//
final FsDatasetTestUtils utils = cluster.getFsDatasetTestUtils(0);
int numOfDataDirs = utils.getDefaultNumOfDataDirs();
long diskCapacity = numOfDataDirs * utils.getRawCapacity();
reserved *= numOfDataDirs;
configCapacity = namesystem.getCapacityTotal();
used = namesystem.getCapacityUsed();
nonDFSUsed = namesystem.getNonDfsUsedSpace();
remaining = namesystem.getCapacityRemaining();
percentUsed = namesystem.getPercentUsed();
percentRemaining = namesystem.getPercentRemaining();
bpUsed = namesystem.getBlockPoolUsedSpace();
percentBpUsed = namesystem.getPercentBlockPoolUsed();
LOG.info("Data node directory " + cluster.getDataDirectory());
LOG.info("Name node diskCapacity " + diskCapacity + " configCapacity " + configCapacity + " reserved " + reserved + " used " + used + " remaining " + remaining + " nonDFSUsed " + nonDFSUsed + " remaining " + remaining + " percentUsed " + percentUsed + " percentRemaining " + percentRemaining + " bpUsed " + bpUsed + " percentBpUsed " + percentBpUsed);
// Ensure new total capacity reported excludes the reserved space
assertTrue(configCapacity == diskCapacity - reserved);
// Ensure new total capacity reported excludes the reserved space
// There will be 5% space reserved in ext filesystem which is not
// considered.
assertTrue(configCapacity >= (used + remaining + nonDFSUsed));
// Ensure percent used is calculated based on used and present capacity
assertTrue(percentUsed == DFSUtilClient.getPercentUsed(used, configCapacity));
// Ensure percent used is calculated based on used and present capacity
assertTrue(percentBpUsed == DFSUtilClient.getPercentUsed(bpUsed, configCapacity));
// Ensure percent used is calculated based on used and present capacity
assertTrue(percentRemaining == ((float) remaining * 100.0f) / (float) configCapacity);
//Adding testcase for non-dfs used where we need to consider
// reserved replica also.
final int fileCount = 5;
final DistributedFileSystem fs = cluster.getFileSystem();
// create streams and hsync to force datastreamers to start
DFSOutputStream[] streams = new DFSOutputStream[fileCount];
for (int i = 0; i < fileCount; i++) {
streams[i] = (DFSOutputStream) fs.create(new Path("/f" + i)).getWrappedStream();
streams[i].write("1".getBytes());
streams[i].hsync();
}
triggerHeartbeats(cluster.getDataNodes());
assertTrue(configCapacity > (namesystem.getCapacityUsed() + namesystem.getCapacityRemaining() + namesystem.getNonDfsUsedSpace()));
// There is a chance that nonDFS usage might have slightly due to
// testlogs, So assume 1MB other files used within this gap
assertTrue((namesystem.getCapacityUsed() + namesystem.getCapacityRemaining() + namesystem.getNonDfsUsedSpace() + fileCount * fs.getDefaultBlockSize()) - configCapacity < 1 * 1024);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class FSNamesystem method getDatanodeStorageReport.
DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type) throws AccessControlException, StandbyException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.UNCHECKED);
readLock();
try {
checkOperation(OperationCategory.UNCHECKED);
final DatanodeManager dm = getBlockManager().getDatanodeManager();
final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type);
DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()];
for (int i = 0; i < reports.length; i++) {
final DatanodeDescriptor d = datanodes.get(i);
reports[i] = new DatanodeStorageReport(new DatanodeInfoBuilder().setFrom(d).build(), d.getStorageReports());
}
return reports;
} finally {
readUnlock("getDatanodeStorageReport");
}
}
Aggregations