use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.
the class TestFsck method testFsckReplicaDetails.
@Test(timeout = 90000)
public void testFsckReplicaDetails() throws Exception {
final short replFactor = 1;
short numDn = 1;
final long blockSize = 512;
final long fileSize = 1024;
String[] racks = { "/rack1" };
String[] hosts = { "host1" };
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
DistributedFileSystem dfs;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
cluster.waitClusterUp();
dfs = cluster.getFileSystem();
// create files
final String testFile = new String("/testfile");
final Path path = new Path(testFile);
DFSTestUtil.createFile(dfs, path, fileSize, replFactor, 1000L);
DFSTestUtil.waitReplication(dfs, path, replFactor);
// make sure datanode that has replica is fine before decommission
String fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
assertTrue(fsckOut.contains("(LIVE)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// decommission datanode
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
final DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dnDesc0 = dnm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
final String dn0Name = dnDesc0.getXferAddr();
// check the replica status while decommissioning
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// Start 2nd DataNode
cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack2" }, new String[] { "host2" }, null, false);
// Wait for decommission to start
final AtomicBoolean checkDecommissionInProgress = new AtomicBoolean(false);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dn0Name.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (!checkDecommissionInProgress.get() && datanodeInfo != null && datanodeInfo.isDecommissionInProgress()) {
checkDecommissionInProgress.set(true);
}
if (datanodeInfo != null && datanodeInfo.isDecommissioned()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// check the replica status after decommission is done
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
DatanodeDescriptor dnDesc1 = dnm.getDatanode(cluster.getDataNodes().get(1).getDatanodeId());
final String dn1Name = dnDesc1.getXferAddr();
bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1, Long.MAX_VALUE);
// check the replica status while entering maintenance
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertTrue(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// check entering maintenance replicas are printed only when requested
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// Start 3rd DataNode
cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack3" }, new String[] { "host3" }, null, false);
// Wait for the 2nd node to reach in maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo dnInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dn1Name.equals(info.getXferAddr())) {
dnInfo = info;
}
}
if (dnInfo != null && dnInfo.isInMaintenance()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// check the replica status after decommission is done
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertTrue(fsckOut.contains("(IN MAINTENANCE)"));
// check in maintenance replicas are not printed when not requested
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.
the class TestReconstructStripedBlocks method doTestMissingStripedBlock.
/**
* Start GROUP_SIZE + 1 datanodes.
* Inject striped blocks to first GROUP_SIZE datanodes.
* Then make numOfBusy datanodes busy, make numOfMissed datanodes missed.
* Then trigger BlockManager to compute reconstruction works. (so all
* reconstruction work will be scheduled to the last datanode)
* Finally, verify the reconstruction work of the last datanode.
*/
private void doTestMissingStripedBlock(int numOfMissed, int numOfBusy) throws Exception {
Configuration conf = new HdfsConfiguration();
initConf(conf);
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 1).build();
try {
cluster.waitActive();
final int numBlocks = 4;
DFSTestUtil.createStripedFile(cluster, filePath, dirPath, numBlocks, 1, true);
// all blocks will be located at first GROUP_SIZE DNs, the last DN is
// empty because of the util function createStripedFile
// make sure the file is complete in NN
final INodeFile fileNode = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile();
assertFalse(fileNode.isUnderConstruction());
assertTrue(fileNode.isStriped());
BlockInfo[] blocks = fileNode.getBlocks();
assertEquals(numBlocks, blocks.length);
for (BlockInfo blk : blocks) {
assertTrue(blk.isStriped());
assertTrue(blk.isComplete());
assertEquals(cellSize * dataBlocks, blk.getNumBytes());
final BlockInfoStriped sb = (BlockInfoStriped) blk;
assertEquals(groupSize, sb.numNodes());
}
final BlockManager bm = cluster.getNamesystem().getBlockManager();
BlockInfo firstBlock = fileNode.getBlocks()[0];
DatanodeStorageInfo[] storageInfos = bm.getStorages(firstBlock);
// make numOfBusy nodes busy
int i = 0;
for (; i < numOfBusy; i++) {
DatanodeDescriptor busyNode = storageInfos[i].getDatanodeDescriptor();
for (int j = 0; j < maxReplicationStreams + 1; j++) {
BlockManagerTestUtil.addBlockToBeReplicated(busyNode, new Block(j), new DatanodeStorageInfo[] { storageInfos[0] });
}
}
// make numOfMissed internal blocks missed
for (; i < numOfBusy + numOfMissed; i++) {
DatanodeDescriptor missedNode = storageInfos[i].getDatanodeDescriptor();
assertEquals(numBlocks, missedNode.numBlocks());
bm.getDatanodeManager().removeDatanode(missedNode);
}
BlockManagerTestUtil.getComputedDatanodeWork(bm);
// all the reconstruction work will be scheduled on the last DN
DataNode lastDn = cluster.getDataNodes().get(groupSize);
DatanodeDescriptor last = bm.getDatanodeManager().getDatanode(lastDn.getDatanodeId());
assertEquals("Counting the number of outstanding EC tasks", numBlocks, last.getNumberOfBlocksToBeErasureCoded());
List<BlockECReconstructionInfo> reconstruction = last.getErasureCodeCommand(numBlocks);
for (BlockECReconstructionInfo info : reconstruction) {
assertEquals(1, info.getTargetDnInfos().length);
assertEquals(last, info.getTargetDnInfos()[0]);
assertEquals(info.getSourceDnInfos().length, info.getLiveBlockIndices().length);
if (groupSize - numOfMissed == dataBlocks) {
// It's a QUEUE_HIGHEST_PRIORITY block, so the busy DNs will be chosen
// to make sure we have NUM_DATA_BLOCKS DNs to do reconstruction
// work.
assertEquals(dataBlocks, info.getSourceDnInfos().length);
} else {
// The block has no highest priority, so we don't use the busy DNs as
// sources
assertEquals(groupSize - numOfMissed - numOfBusy, info.getSourceDnInfos().length);
}
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.
the class TestNetworkTopology method testSortByDistance.
@Test
public void testSortByDistance() throws Exception {
DatanodeDescriptor[] testNodes = new DatanodeDescriptor[3];
// array contains both local node & local rack node
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[2];
testNodes[2] = dataNodes[0];
cluster.setRandomSeed(0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[2]);
// array contains both local node & local rack node & decommissioned node
DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5];
dtestNodes[0] = dataNodes[8];
dtestNodes[1] = dataNodes[12];
dtestNodes[2] = dataNodes[11];
dtestNodes[3] = dataNodes[9];
dtestNodes[4] = dataNodes[10];
cluster.setRandomSeed(0xDEADBEEF);
cluster.sortByDistance(dataNodes[8], dtestNodes, dtestNodes.length - 2);
assertTrue(dtestNodes[0] == dataNodes[8]);
assertTrue(dtestNodes[1] == dataNodes[11]);
assertTrue(dtestNodes[2] == dataNodes[12]);
assertTrue(dtestNodes[3] == dataNodes[9]);
assertTrue(dtestNodes[4] == dataNodes[10]);
// array contains local node
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[0];
cluster.setRandomSeed(0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[3]);
// array contains local rack node
testNodes[0] = dataNodes[5];
testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[1];
cluster.setRandomSeed(0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]);
// array contains local rack node which happens to be in position 0
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3];
cluster.setRandomSeed(0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]);
// Same as previous, but with a different random seed to test randomization
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3];
cluster.setRandomSeed(0xDEAD);
cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length);
assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]);
// Array of just rack-local nodes
// Expect a random first node
DatanodeDescriptor first = null;
boolean foundRandom = false;
for (int i = 5; i <= 7; i++) {
testNodes[0] = dataNodes[5];
testNodes[1] = dataNodes[6];
testNodes[2] = dataNodes[7];
cluster.sortByDistance(dataNodes[i], testNodes, testNodes.length);
if (first == null) {
first = testNodes[0];
} else {
if (first != testNodes[0]) {
foundRandom = true;
break;
}
}
}
assertTrue("Expected to find a different first location", foundRandom);
// Array of just remote nodes
// Expect random first node
first = null;
for (int i = 1; i <= 4; i++) {
testNodes[0] = dataNodes[13];
testNodes[1] = dataNodes[14];
testNodes[2] = dataNodes[15];
cluster.sortByDistance(dataNodes[i], testNodes, testNodes.length);
if (first == null) {
first = testNodes[0];
} else {
if (first != testNodes[0]) {
foundRandom = true;
break;
}
}
}
assertTrue("Expected to find a different first location", foundRandom);
//Reader is not a datanode, but is in one of the datanode's rack.
testNodes[0] = dataNodes[0];
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[8];
Node rackClient = new NodeBase("/d3/r1/25.25.25");
cluster.setRandomSeed(0xDEADBEEF);
cluster.sortByDistance(rackClient, testNodes, testNodes.length);
assertTrue(testNodes[0] == dataNodes[8]);
assertTrue(testNodes[1] == dataNodes[5]);
assertTrue(testNodes[2] == dataNodes[0]);
//Reader is not a datanode , but is in one of the datanode's data center.
testNodes[0] = dataNodes[8];
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[0];
Node dcClient = new NodeBase("/d1/r2/25.25.25");
cluster.setRandomSeed(0xDEADBEEF);
cluster.sortByDistance(dcClient, testNodes, testNodes.length);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[5]);
assertTrue(testNodes[2] == dataNodes[8]);
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor in project hadoop by apache.
the class TestNetworkTopology method testContains.
@Test
public void testContains() throws Exception {
DatanodeDescriptor nodeNotInMap = DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r4");
for (int i = 0; i < dataNodes.length; i++) {
assertTrue(cluster.contains(dataNodes[i]));
}
assertFalse(cluster.contains(nodeNotInMap));
}
Aggregations