use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestDecommissioningStatus method testDecommissionStatusAfterDNRestart.
/**
* Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked
* as dead before decommission has completed. That will allow DN to resume
* the replication process after it rejoins the cluster.
*/
@Test(timeout = 120000)
public void testDecommissionStatusAfterDNRestart() throws Exception {
DistributedFileSystem fileSys = (DistributedFileSystem) cluster.getFileSystem();
// Create a file with one block. That block has one replica.
Path f = new Path("decommission.dat");
DFSTestUtil.createFile(fileSys, f, fileSize, fileSize, fileSize, (short) 1, seed);
// Find the DN that owns the only replica.
RemoteIterator<LocatedFileStatus> fileList = fileSys.listLocatedStatus(f);
BlockLocation[] blockLocations = fileList.next().getBlockLocations();
String dnName = blockLocations[0].getNames()[0];
// Decommission the DN.
FSNamesystem fsn = cluster.getNamesystem();
final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
decommissionNode(dnName);
dm.refreshNodes(conf);
// Stop the DN when decommission is in progress.
// Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of
// the block, it will take much longer time that test timeout value for
// the decommission to complete. So when stopDataNode is called,
// decommission should be in progress.
DataNodeProperties dataNodeProperties = cluster.stopDataNode(dnName);
final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
while (true) {
dm.fetchDatanodes(null, dead, false);
if (dead.size() == 1) {
break;
}
Thread.sleep(1000);
}
// Force removal of the dead node's blocks.
BlockManagerTestUtil.checkHeartbeat(fsn.getBlockManager());
// Force DatanodeManager to check decommission state.
BlockManagerTestUtil.recheckDecommissionState(dm);
// Verify that the DN remains in DECOMMISSION_INPROGRESS state.
assertTrue("the node should be DECOMMISSION_IN_PROGRESSS", dead.get(0).isDecommissionInProgress());
// Check DatanodeManager#getDecommissionNodes, make sure it returns
// the node as decommissioning, even if it's dead
List<DatanodeDescriptor> decomlist = dm.getDecommissioningNodes();
assertTrue("The node should be be decommissioning", decomlist.size() == 1);
// Delete the under-replicated file, which should let the
// DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
AdminStatesBaseTest.cleanupFile(fileSys, f);
BlockManagerTestUtil.recheckDecommissionState(dm);
assertTrue("the node should be decommissioned", dead.get(0).isDecommissioned());
// Add the node back
cluster.restartDataNode(dataNodeProperties, true);
cluster.waitActive();
// Call refreshNodes on FSNamesystem with empty exclude file.
// This will remove the datanodes from decommissioning list and
// make them available again.
hostsFileWriter.initExcludeHost("");
dm.refreshNodes(conf);
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestDecommissioningStatus method testDecommissionDeadDN.
/**
* Verify the support for decommissioning a datanode that is already dead.
* Under this scenario the datanode should immediately be marked as
* DECOMMISSIONED
*/
@Test(timeout = 120000)
public void testDecommissionDeadDN() throws Exception {
Logger log = Logger.getLogger(DecommissionManager.class);
log.setLevel(Level.DEBUG);
DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
String dnName = dnID.getXferAddr();
DataNodeProperties stoppedDN = cluster.stopDataNode(0);
DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(), false, 30000);
FSNamesystem fsn = cluster.getNamesystem();
final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
decommissionNode(dnName);
dm.refreshNodes(conf);
BlockManagerTestUtil.recheckDecommissionState(dm);
assertTrue(dnDescriptor.isDecommissioned());
// Add the node back
cluster.restartDataNode(stoppedDN, true);
cluster.waitActive();
// Call refreshNodes on FSNamesystem with empty exclude file to remove the
// datanode from decommissioning list and make it available again.
hostsFileWriter.initExcludeHost("");
dm.refreshNodes(conf);
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestDecommissionWithStriped method testDecommissionWithURBlockForSameBlockGroup.
@Test(timeout = 120000)
public void testDecommissionWithURBlockForSameBlockGroup() throws Exception {
LOG.info("Starting test testDecommissionWithURBlocksForSameBlockGroup");
final Path ecFile = new Path(ecDir, "testDecommissionWithCorruptBlocks");
int writeBytes = cellSize * dataBlocks * 2;
writeStripedFile(dfs, ecFile, writeBytes);
Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks());
final List<DatanodeInfo> decommisionNodes = new ArrayList<DatanodeInfo>();
LocatedBlock lb = dfs.getClient().getLocatedBlocks(ecFile.toString(), 0).get(0);
DatanodeInfo[] dnLocs = lb.getLocations();
assertEquals(dataBlocks + parityBlocks, dnLocs.length);
int decommNodeIndex = dataBlocks - 1;
int stopNodeIndex = 1;
// add the nodes which will be decommissioning
decommisionNodes.add(dnLocs[decommNodeIndex]);
// stop excess dns to avoid immediate reconstruction.
DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
List<DataNodeProperties> stoppedDns = new ArrayList<>();
for (DatanodeInfo liveDn : info) {
boolean usedNode = false;
for (DatanodeInfo datanodeInfo : dnLocs) {
if (liveDn.getXferAddr().equals(datanodeInfo.getXferAddr())) {
usedNode = true;
break;
}
}
if (!usedNode) {
DataNode dn = cluster.getDataNode(liveDn.getIpcPort());
stoppedDns.add(cluster.stopDataNode(liveDn.getXferAddr()));
cluster.setDataNodeDead(dn.getDatanodeId());
LOG.info("stop datanode " + dn.getDatanodeId().getHostName());
}
}
DataNode dn = cluster.getDataNode(dnLocs[stopNodeIndex].getIpcPort());
cluster.stopDataNode(dnLocs[stopNodeIndex].getXferAddr());
cluster.setDataNodeDead(dn.getDatanodeId());
numDNs = numDNs - 1;
// Decommission node in a new thread. Verify that node is decommissioned.
final CountDownLatch decomStarted = new CountDownLatch(0);
Thread decomTh = new Thread() {
public void run() {
try {
decomStarted.countDown();
decommissionNode(0, decommisionNodes, AdminStates.DECOMMISSIONED);
} catch (Exception e) {
LOG.error("Exception while decommissioning", e);
Assert.fail("Shouldn't throw exception!");
}
}
;
};
int deadDecomissioned = fsn.getNumDecomDeadDataNodes();
int liveDecomissioned = fsn.getNumDecomLiveDataNodes();
decomTh.start();
decomStarted.await(5, TimeUnit.SECONDS);
// grace period to trigger decommissioning call
Thread.sleep(3000);
// start datanode so that decommissioning live node will be finished
for (DataNodeProperties dnp : stoppedDns) {
cluster.restartDataNode(dnp);
LOG.info("Restarts stopped datanode:{} to trigger block reconstruction", dnp.datanode);
}
cluster.waitActive();
LOG.info("Waiting to finish decommissioning node:{}", decommisionNodes);
// waiting 20secs to finish decommission
decomTh.join(20000);
LOG.info("Finished decommissioning node:{}", decommisionNodes);
assertEquals(deadDecomissioned, fsn.getNumDecomDeadDataNodes());
assertEquals(liveDecomissioned + decommisionNodes.size(), fsn.getNumDecomLiveDataNodes());
// Ensure decommissioned datanode is not automatically shutdown
DFSClient client = getDfsClient(cluster.getNameNode(0), conf);
assertEquals("All datanodes must be alive", numDNs, client.datanodeReport(DatanodeReportType.LIVE).length);
assertNull(checkFile(dfs, ecFile, 9, decommisionNodes, numDNs));
StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommisionNodes, null, blockGroupSize);
cleanupFile(dfs, ecFile);
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestDataTransferKeepalive method testSlowReader.
/**
* Test for the case where the client beings to read a long block, but doesn't
* read bytes off the stream quickly. The datanode should time out sending the
* chunks and the transceiver should die, even if it has a long keepalive.
*/
@Test(timeout = 300000)
public void testSlowReader() throws Exception {
// Set a client socket cache expiry time much longer than
// the datanode-side expiration time.
final long CLIENT_EXPIRY_MS = 600000L;
Configuration clientConf = new Configuration(conf);
clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader");
DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(cluster.getURI(), clientConf);
// Restart the DN with a shorter write timeout.
DataNodeProperties props = cluster.stopDataNode(0);
props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY, WRITE_TIMEOUT);
props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, 120000);
assertTrue(cluster.restartDataNode(props, true));
dn = cluster.getDataNodes().get(0);
// Wait for heartbeats to avoid a startup race where we
// try to write the block while the DN is still starting.
cluster.triggerHeartbeats();
DFSTestUtil.createFile(fs, TEST_FILE, 1024 * 1024 * 8L, (short) 1, 0L);
FSDataInputStream stm = fs.open(TEST_FILE);
stm.read();
assertXceiverCount(1);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
// the xceiver to exit.
return getXceiverCountWithoutServer() == 0;
}
}, 500, 50000);
IOUtils.closeStream(stm);
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestDataNodeMultipleRegistrations method testDNWithInvalidStorageWithHA.
@Test
public void testDNWithInvalidStorageWithHA() throws Exception {
MiniDFSNNTopology top = new MiniDFSNNTopology().addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(new MiniDFSNNTopology.NNConf("nn0").setClusterId("cluster-1")).addNN(new MiniDFSNNTopology.NNConf("nn1").setClusterId("cluster-1")));
top.setFederation(true);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(top).numDataNodes(0).build();
try {
cluster.startDataNodes(conf, 1, true, null, null);
// let the initialization be complete
Thread.sleep(10000);
DataNode dn = cluster.getDataNodes().get(0);
assertTrue("Datanode should be running", dn.isDatanodeUp());
assertEquals("BPOfferService should be running", 1, dn.getAllBpOs().size());
DataNodeProperties dnProp = cluster.stopDataNode(0);
cluster.getNameNode(0).stop();
cluster.getNameNode(1).stop();
Configuration nn1 = cluster.getConfiguration(0);
Configuration nn2 = cluster.getConfiguration(1);
// setting up invalid cluster
StartupOption.FORMAT.setClusterId("cluster-2");
DFSTestUtil.formatNameNode(nn1);
MiniDFSCluster.copyNameDirs(FSNamesystem.getNamespaceDirs(nn1), FSNamesystem.getNamespaceDirs(nn2), nn2);
cluster.restartNameNode(0, false);
cluster.restartNameNode(1, false);
cluster.restartDataNode(dnProp);
// let the initialization be complete
Thread.sleep(10000);
dn = cluster.getDataNodes().get(0);
assertFalse("Datanode should have shutdown as only service failed", dn.isDatanodeUp());
} finally {
cluster.shutdown();
}
}
Aggregations