use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestDataNodeVolumeFailureToleration method testConfigureMinValidVolumes.
/**
* Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
* option, ie the DN shuts itself down when the number of failures
* experienced drops below the tolerated amount.
*/
@Test
public void testConfigureMinValidVolumes() throws Exception {
assumeNotWindows();
// Bring up two additional datanodes that need both of their volumes
// functioning in order to stay up.
conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 0);
conf.setTimeDuration(DFSConfigKeys.DFS_DATANODE_DISK_CHECK_MIN_GAP_KEY, 0, TimeUnit.MILLISECONDS);
cluster.startDataNodes(conf, 2, true, null, null);
cluster.waitActive();
final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();
long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
// Fail a volume on the 2nd DN
File dn2Vol1 = new File(dataDir, "data" + (2 * 1 + 1));
DataNodeTestUtils.injectDataDirFailure(dn2Vol1);
// Should only get two replicas (the first DN and the 3rd)
Path file1 = new Path("/test1");
DFSTestUtil.createFile(fs, file1, 1024, (short) 3, 1L);
DFSTestUtil.waitReplication(fs, file1, (short) 2);
// Check that this single failure caused a DN to die.
DFSTestUtil.waitForDatanodeStatus(dm, 2, 1, 0, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS);
// If we restore the volume we should still only be able to get
// two replicas since the DN is still considered dead.
DataNodeTestUtils.restoreDataDirFromFailure(dn2Vol1);
Path file2 = new Path("/test2");
DFSTestUtil.createFile(fs, file2, 1024, (short) 3, 1L);
DFSTestUtil.waitReplication(fs, file2, (short) 2);
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestDecommissioningStatus method testDecommissionStatus.
/**
* Tests Decommissioning Status in DFS.
*/
@Test
public void testDecommissionStatus() throws Exception {
InetSocketAddress addr = new InetSocketAddress("localhost", cluster.getNameNodePort());
DFSClient client = new DFSClient(addr, conf);
DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
assertEquals("Number of Datanodes ", 2, info.length);
DistributedFileSystem fileSys = cluster.getFileSystem();
DFSAdmin admin = new DFSAdmin(cluster.getConfiguration(0));
short replicas = numDatanodes;
//
// Decommission one node. Verify the decommission status
//
Path file1 = new Path("decommission.dat");
DFSTestUtil.createFile(fileSys, file1, fileSize, fileSize, blockSize, replicas, seed);
Path file2 = new Path("decommission1.dat");
FSDataOutputStream st1 = AdminStatesBaseTest.writeIncompleteFile(fileSys, file2, replicas, (short) (fileSize / blockSize));
for (DataNode d : cluster.getDataNodes()) {
DataNodeTestUtils.triggerBlockReport(d);
}
FSNamesystem fsn = cluster.getNamesystem();
final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
for (int iteration = 0; iteration < numDatanodes; iteration++) {
String downnode = decommissionNode(client, iteration);
dm.refreshNodes(conf);
decommissionedNodes.add(downnode);
BlockManagerTestUtil.recheckDecommissionState(dm);
final List<DatanodeDescriptor> decommissioningNodes = dm.getDecommissioningNodes();
if (iteration == 0) {
assertEquals(decommissioningNodes.size(), 1);
DatanodeDescriptor decommNode = decommissioningNodes.get(0);
checkDecommissionStatus(decommNode, 3, 0, 1);
checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 1), fileSys, admin);
} else {
assertEquals(decommissioningNodes.size(), 2);
DatanodeDescriptor decommNode1 = decommissioningNodes.get(0);
DatanodeDescriptor decommNode2 = decommissioningNodes.get(1);
// This one is still 3,3,1 since it passed over the UC block
// earlier, before node 2 was decommed
checkDecommissionStatus(decommNode1, 3, 3, 1);
// This one is 4,4,2 since it has the full state
checkDecommissionStatus(decommNode2, 4, 4, 2);
checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 2), fileSys, admin);
}
}
// Call refreshNodes on FSNamesystem with empty exclude file.
// This will remove the datanodes from decommissioning list and
// make them available again.
hostsFileWriter.initExcludeHost("");
dm.refreshNodes(conf);
st1.close();
AdminStatesBaseTest.cleanupFile(fileSys, file1);
AdminStatesBaseTest.cleanupFile(fileSys, file2);
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestDefaultBlockPlacementPolicy method testPlacementWithLocalRackNodesDecommissioned.
/**
* Verify decommissioned nodes should not be selected.
*/
@Test
public void testPlacementWithLocalRackNodesDecommissioned() throws Exception {
String clientMachine = "client.foo.com";
// Map client to RACK3
String clientRack = "/RACK3";
StaticMapping.addNodeToRack(clientMachine, clientRack);
final DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
DatanodeDescriptor dnd3 = dnm.getDatanode(cluster.getDataNodes().get(3).getDatanodeId());
assertEquals(dnd3.getNetworkLocation(), clientRack);
dnm.getDecomManager().startDecommission(dnd3);
try {
testPlacement(clientMachine, clientRack, false);
} finally {
dnm.getDecomManager().stopDecommission(dnd3);
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestFsck method testFsckReplicaDetails.
@Test(timeout = 90000)
public void testFsckReplicaDetails() throws Exception {
final short replFactor = 1;
short numDn = 1;
final long blockSize = 512;
final long fileSize = 1024;
String[] racks = { "/rack1" };
String[] hosts = { "host1" };
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
DistributedFileSystem dfs;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
cluster.waitClusterUp();
dfs = cluster.getFileSystem();
// create files
final String testFile = new String("/testfile");
final Path path = new Path(testFile);
DFSTestUtil.createFile(dfs, path, fileSize, replFactor, 1000L);
DFSTestUtil.waitReplication(dfs, path, replFactor);
// make sure datanode that has replica is fine before decommission
String fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
assertTrue(fsckOut.contains("(LIVE)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// decommission datanode
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
final DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dnDesc0 = dnm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
final String dn0Name = dnDesc0.getXferAddr();
// check the replica status while decommissioning
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// Start 2nd DataNode
cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack2" }, new String[] { "host2" }, null, false);
// Wait for decommission to start
final AtomicBoolean checkDecommissionInProgress = new AtomicBoolean(false);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dn0Name.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (!checkDecommissionInProgress.get() && datanodeInfo != null && datanodeInfo.isDecommissionInProgress()) {
checkDecommissionInProgress.set(true);
}
if (datanodeInfo != null && datanodeInfo.isDecommissioned()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// check the replica status after decommission is done
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
DatanodeDescriptor dnDesc1 = dnm.getDatanode(cluster.getDataNodes().get(1).getDatanodeId());
final String dn1Name = dnDesc1.getXferAddr();
bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1, Long.MAX_VALUE);
// check the replica status while entering maintenance
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertTrue(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// check entering maintenance replicas are printed only when requested
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// Start 3rd DataNode
cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack3" }, new String[] { "host3" }, null, false);
// Wait for the 2nd node to reach in maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo dnInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dn1Name.equals(info.getXferAddr())) {
dnInfo = info;
}
}
if (dnInfo != null && dnInfo.isInMaintenance()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// check the replica status after decommission is done
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertTrue(fsckOut.contains("(IN MAINTENANCE)"));
// check in maintenance replicas are not printed when not requested
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestWebHdfsDataLocality method testDataLocality.
@Test
public void testDataLocality() throws Exception {
final Configuration conf = WebHdfsTestUtil.createConf();
final String[] racks = { RACK0, RACK0, RACK1, RACK1, RACK2, RACK2 };
final int nDataNodes = racks.length;
LOG.info("nDataNodes=" + nDataNodes + ", racks=" + Arrays.asList(racks));
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(nDataNodes).racks(racks).build();
try {
cluster.waitActive();
final DistributedFileSystem dfs = cluster.getFileSystem();
final NameNode namenode = cluster.getNameNode();
final DatanodeManager dm = namenode.getNamesystem().getBlockManager().getDatanodeManager();
LOG.info("dm=" + dm);
final long blocksize = DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
final String f = "/foo";
{
//test CREATE
for (int i = 0; i < nDataNodes; i++) {
//set client address to a particular datanode
final DataNode dn = cluster.getDataNodes().get(i);
final String ipAddr = dm.getDatanode(dn.getDatanodeId()).getIpAddr();
//The chosen datanode must be the same as the client address
final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, PutOpParam.Op.CREATE, -1L, blocksize, null, LOCALHOST);
Assert.assertEquals(ipAddr, chosen.getIpAddr());
}
}
//create a file with one replica.
final Path p = new Path(f);
final FSDataOutputStream out = dfs.create(p, (short) 1);
out.write(1);
out.close();
//get replica location.
final LocatedBlocks locatedblocks = NameNodeAdapter.getBlockLocations(namenode, f, 0, 1);
final List<LocatedBlock> lb = locatedblocks.getLocatedBlocks();
Assert.assertEquals(1, lb.size());
final DatanodeInfo[] locations = lb.get(0).getLocations();
Assert.assertEquals(1, locations.length);
final DatanodeInfo expected = locations[0];
//For GETFILECHECKSUM, OPEN and APPEND,
//the chosen datanode must be the same as the replica location.
{
//test GETFILECHECKSUM
final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, GetOpParam.Op.GETFILECHECKSUM, -1L, blocksize, null, LOCALHOST);
Assert.assertEquals(expected, chosen);
}
{
//test OPEN
final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, GetOpParam.Op.OPEN, 0, blocksize, null, LOCALHOST);
Assert.assertEquals(expected, chosen);
}
{
//test APPEND
final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(namenode, f, PostOpParam.Op.APPEND, -1L, blocksize, null, LOCALHOST);
Assert.assertEquals(expected, chosen);
}
} finally {
cluster.shutdown();
}
}
Aggregations