use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class TestMaintenanceState method testWithNNAndDNRestart.
/**
* Verify the following scenario.
* a. Put a live node to maintenance => 1 maintenance, 2 live.
* b. The maintenance node becomes dead => block map still has 1 maintenance,
* 2 live.
* c. Restart nn => block map only has 2 live => restore the 3 live.
* d. Restart the maintenance dn => 1 maintenance, 3 live.
* e. Take the node out of maintenance => over replication => 3 live.
*/
@Test(timeout = 360000)
public void testWithNNAndDNRestart() throws Exception {
LOG.info("Starting testWithNNAndDNRestart");
final int numNamenodes = 1;
final int numDatanodes = 4;
startCluster(numNamenodes, numDatanodes);
final Path file = new Path("/testWithNNAndDNRestart.dat");
final int replicas = 3;
final FileSystem fileSys = getCluster().getFileSystem(0);
FSNamesystem ns = getCluster().getNamesystem(0);
writeFile(fileSys, file, replicas, 1);
DatanodeInfo nodeOutofService = takeNodeOutofService(0, getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
assertNull(checkWithRetry(ns, fileSys, file, replicas - 1, nodeOutofService));
DFSClient client = getDfsClient(0);
assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length);
MiniDFSCluster.DataNodeProperties dnProp = getCluster().stopDataNode(nodeOutofService.getXferAddr());
DFSTestUtil.waitForDatanodeState(getCluster(), nodeOutofService.getDatanodeUuid(), false, 20000);
assertEquals("maintenance node shouldn't be alive", numDatanodes - 1, client.datanodeReport(DatanodeReportType.LIVE).length);
// Dead maintenance node's blocks should remain in block map.
assertNull(checkWithRetry(ns, fileSys, file, replicas - 1, nodeOutofService));
// restart nn, nn will restore 3 live replicas given it doesn't
// know the maintenance node has the replica.
getCluster().restartNameNode(0);
ns = getCluster().getNamesystem(0);
assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
// restart dn, nn has 1 maintenance replica and 3 live replicas.
getCluster().restartDataNode(dnProp, true);
getCluster().waitActive();
assertNull(checkWithRetry(ns, fileSys, file, replicas, nodeOutofService));
// Put the node in service, a redundant replica should be removed.
putNodeInService(0, nodeOutofService.getDatanodeUuid());
assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
cleanupFile(fileSys, file);
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class TestMaintenanceState method testFileCloseAfterEnteringMaintenance.
@Test(timeout = 120000)
public void testFileCloseAfterEnteringMaintenance() throws Exception {
LOG.info("Starting testFileCloseAfterEnteringMaintenance");
int expirationInMs = 30 * 1000;
int numDataNodes = 3;
int numNameNodes = 1;
getConf().setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, 2);
startCluster(numNameNodes, numDataNodes);
getCluster().waitActive();
FSNamesystem fsn = getCluster().getNameNode().getNamesystem();
List<String> hosts = new ArrayList<>();
for (DataNode dn : getCluster().getDataNodes()) {
hosts.add(dn.getDisplayName());
putNodeInService(0, dn.getDatanodeUuid());
}
assertEquals(numDataNodes, fsn.getNumLiveDataNodes());
Path openFile = new Path("/testClosingFileInMaintenance.dat");
// Lets write 2 blocks of data to the openFile
writeFile(getCluster().getFileSystem(), openFile, (short) 3);
// Lets write some more data and keep the file open
FSDataOutputStream fsDataOutputStream = getCluster().getFileSystem().append(openFile);
byte[] bytes = new byte[1024];
fsDataOutputStream.write(bytes);
fsDataOutputStream.hsync();
LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(getCluster().getNameNode(0), openFile.toString(), 0, 3 * blockSize);
DatanodeInfo[] dnInfos4LastBlock = lbs.getLastLocatedBlock().getLocations();
// Request maintenance for DataNodes 1 and 2 which has the last block.
takeNodeOutofService(0, Lists.newArrayList(dnInfos4LastBlock[0].getDatanodeUuid(), dnInfos4LastBlock[1].getDatanodeUuid()), Time.now() + expirationInMs, null, null, AdminStates.ENTERING_MAINTENANCE);
// Closing the file should succeed even when the
// last blocks' nodes are entering maintenance.
fsDataOutputStream.close();
cleanupFile(getCluster().getFileSystem(), openFile);
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class TestMaintenanceState method testExpectedReplication.
private void testExpectedReplication(int replicationFactor, int expectedReplicasInRead) throws IOException {
setup();
startCluster(1, 5);
final Path file = new Path("/testExpectedReplication.dat");
final FileSystem fileSys = getCluster().getFileSystem(0);
final FSNamesystem ns = getCluster().getNamesystem(0);
writeFile(fileSys, file, replicationFactor, 1);
DatanodeInfo nodeOutofService = takeNodeOutofService(0, getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
// The block should be replicated to another datanode to meet
// expected replication count.
assertNull(checkWithRetry(ns, fileSys, file, expectedReplicasInRead, nodeOutofService));
cleanupFile(fileSys, file);
teardown();
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class TestMaintenanceState method checkFile.
/*
* Verify that the number of replicas are as expected for each block in
* the given file.
*
* @return - null if no failure found, else an error message string.
*/
static String checkFile(FSNamesystem ns, FileSystem fileSys, Path name, int repl, DatanodeInfo expectedExcludedNode, DatanodeInfo expectedMaintenanceNode) throws IOException {
// need a raw stream
assertTrue("Not HDFS:" + fileSys.getUri(), fileSys instanceof DistributedFileSystem);
HdfsDataInputStream dis = (HdfsDataInputStream) fileSys.open(name);
BlockManager bm = ns.getBlockManager();
Collection<LocatedBlock> dinfo = dis.getAllBlocks();
String output;
for (LocatedBlock blk : dinfo) {
// for each block
DatanodeInfo[] nodes = blk.getLocations();
for (int j = 0; j < nodes.length; j++) {
// for each replica
if (expectedExcludedNode != null && nodes[j].equals(expectedExcludedNode)) {
//excluded node must not be in LocatedBlock.
output = "For block " + blk.getBlock() + " replica on " + nodes[j] + " found in LocatedBlock.";
LOG.info(output);
return output;
} else {
if (nodes[j].isInMaintenance()) {
//IN_MAINTENANCE node must not be in LocatedBlock.
output = "For block " + blk.getBlock() + " replica on " + nodes[j] + " which is in maintenance state.";
LOG.info(output);
return output;
}
}
}
if (repl != nodes.length) {
output = "Wrong number of replicas for block " + blk.getBlock() + ": expected " + repl + ", got " + nodes.length + " ,";
for (int j = 0; j < nodes.length; j++) {
// for each replica
output += nodes[j] + ",";
}
output += "pending block # " + ns.getPendingReplicationBlocks() + " ,";
output += "under replicated # " + ns.getUnderReplicatedBlocks() + " ,";
if (expectedExcludedNode != null) {
output += "excluded node " + expectedExcludedNode;
}
LOG.info(output);
return output;
}
// Verify it has the expected maintenance node
Iterator<DatanodeStorageInfo> storageInfoIter = bm.getStorages(blk.getBlock().getLocalBlock()).iterator();
List<DatanodeInfo> maintenanceNodes = new ArrayList<>();
while (storageInfoIter.hasNext()) {
DatanodeInfo node = storageInfoIter.next().getDatanodeDescriptor();
if (node.isMaintenance()) {
maintenanceNodes.add(node);
}
}
if (expectedMaintenanceNode != null) {
if (!maintenanceNodes.contains(expectedMaintenanceNode)) {
output = "No maintenance replica on " + expectedMaintenanceNode;
LOG.info(output);
return output;
}
} else {
if (maintenanceNodes.size() != 0) {
output = "Has maintenance replica(s)";
LOG.info(output);
return output;
}
}
}
return null;
}
use of org.apache.hadoop.hdfs.protocol.DatanodeInfo in project hadoop by apache.
the class TestMaintenanceState method testTransitionToDecommission.
/**
* Transition from IN_MAINTENANCE to DECOMMISSIONED.
*/
@Test(timeout = 360000)
public void testTransitionToDecommission() throws IOException {
LOG.info("Starting testTransitionToDecommission");
final int numNamenodes = 1;
final int numDatanodes = 4;
startCluster(numNamenodes, numDatanodes);
final Path file = new Path("testTransitionToDecommission.dat");
final int replicas = 3;
FileSystem fileSys = getCluster().getFileSystem(0);
FSNamesystem ns = getCluster().getNamesystem(0);
writeFile(fileSys, file, replicas, 1);
DatanodeInfo nodeOutofService = takeNodeOutofService(0, getFirstBlockFirstReplicaUuid(fileSys, file), Long.MAX_VALUE, null, AdminStates.IN_MAINTENANCE);
DFSClient client = getDfsClient(0);
assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length);
// test 1, verify the replica in IN_MAINTENANCE state isn't in LocatedBlock
assertNull(checkWithRetry(ns, fileSys, file, replicas - 1, nodeOutofService));
takeNodeOutofService(0, nodeOutofService.getDatanodeUuid(), 0, null, AdminStates.DECOMMISSIONED);
// test 2 after decommission has completed, the replication count is
// replicas + 1 which includes the decommissioned node.
assertNull(checkWithRetry(ns, fileSys, file, replicas + 1, null));
// test 3, put the node in service, replication count should restore.
putNodeInService(0, nodeOutofService.getDatanodeUuid());
assertNull(checkWithRetry(ns, fileSys, file, replicas, null));
cleanupFile(fileSys, file);
}
Aggregations