use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestNodeCount method testNodeCount.
@Test(timeout = 60000)
public void testNodeCount() throws Exception {
final Configuration conf = new HdfsConfiguration();
// avoid invalidation by startup delay in order to make test non-transient
conf.setInt(DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_KEY, 60);
// reduce intervals to make test execution time shorter
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
// start a mini dfs cluster of 2 nodes
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build();
try {
final FSNamesystem namesystem = cluster.getNamesystem();
final BlockManager bm = namesystem.getBlockManager();
final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
final FileSystem fs = cluster.getFileSystem();
// populate the cluster with a one block file
final Path FILE_PATH = new Path("/testfile");
DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
// keep a copy of all datanode descriptor
final DatanodeDescriptor[] datanodes = hm.getDatanodes();
// start two new nodes
cluster.startDataNodes(conf, 2, true, null, null);
cluster.waitActive();
// bring down first datanode
DatanodeDescriptor datanode = datanodes[0];
DataNodeProperties dnprop = cluster.stopDataNode(datanode.getXferAddr());
// make sure that NN detects that the datanode is down
BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), datanode.getXferAddr());
// the block will be replicated
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
// restart the first datanode
cluster.restartDataNode(dnprop);
cluster.waitActive();
// check if excessive replica is detected
initializeTimeout(TIMEOUT);
while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) {
checkTimeout("excess replicas not detected");
}
// find out a non-excess node
DatanodeDescriptor nonExcessDN = null;
for (DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) {
final DatanodeDescriptor dn = storage.getDatanodeDescriptor();
final BlockInfo info = new BlockInfoContiguous(block.getLocalBlock(), (short) 0);
if (!bm.isExcess(dn, info)) {
nonExcessDN = dn;
break;
}
}
assertTrue(nonExcessDN != null);
// bring down non excessive datanode
dnprop = cluster.stopDataNode(nonExcessDN.getXferAddr());
// make sure that NN detects that the datanode is down
BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), nonExcessDN.getXferAddr());
// The block should be replicated
initializeTimeout(TIMEOUT);
while (countNodes(block.getLocalBlock(), namesystem).liveReplicas() != REPLICATION_FACTOR) {
checkTimeout("live replica count not correct", 1000);
}
// restart the first datanode
cluster.restartDataNode(dnprop);
cluster.waitActive();
// check if excessive replica is detected
initializeTimeout(TIMEOUT);
while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() != 2) {
checkTimeout("excess replica count not equal to 2");
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestOverReplicatedBlocks method testProcesOverReplicateBlock.
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
* It make sure that it won't treat corrupt replicas as valid ones
* thus prevents NN deleting valid replicas but keeping
* corrupt ones.
*/
@Test
public void testProcesOverReplicateBlock() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
conf.set(DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
FileSystem fs = cluster.getFileSystem();
try {
final Path fileName = new Path("/foo1");
DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
DFSTestUtil.waitReplication(fs, fileName, (short) 3);
// corrupt the block on datanode 0
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
cluster.corruptReplica(0, block);
DataNodeProperties dnProps = cluster.stopDataNode(0);
// remove block scanner log to trigger block scanning
File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(), "scanner.cursor");
//wait for one minute for deletion to succeed;
for (int i = 0; !scanCursor.delete(); i++) {
assertTrue("Could not delete " + scanCursor.getAbsolutePath() + " in one minute", i < 60);
try {
Thread.sleep(1000);
} catch (InterruptedException ignored) {
}
}
// restart the datanode so the corrupt replica will be detected
cluster.restartDataNode(dnProps);
DFSTestUtil.waitReplication(fs, fileName, (short) 2);
String blockPoolId = cluster.getNamesystem().getBlockPoolId();
final DatanodeID corruptDataNode = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), blockPoolId);
final FSNamesystem namesystem = cluster.getNamesystem();
final BlockManager bm = namesystem.getBlockManager();
final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
try {
namesystem.writeLock();
synchronized (hm) {
// set live datanode's remaining space to be 0
// so they will be chosen to be deleted when over-replication occurs
String corruptMachineName = corruptDataNode.getXferAddr();
for (DatanodeDescriptor datanode : hm.getDatanodes()) {
if (!corruptMachineName.equals(datanode.getXferAddr())) {
datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
datanode.updateHeartbeat(BlockManagerTestUtil.getStorageReportsForDatanode(datanode), 0L, 0L, 0, 0, null);
}
}
// decrease the replication factor to 1;
NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short) 1);
// corrupt one won't be chosen to be excess one
// without 4910 the number of live replicas would be 0: block gets lost
assertEquals(1, bm.countNodes(bm.getStoredBlock(block.getLocalBlock())).liveReplicas());
}
} finally {
namesystem.writeUnlock();
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestProcessCorruptBlocks method testByAddingAnExtraDataNode.
/**
* The corrupt block has to be removed when the number of valid replicas
* matches replication factor for the file. In this test, the above
* condition is achieved by increasing the number of good replicas by
* replicating on a new Datanode.
* The test strategy :
* Bring up Cluster with 3 DataNodes
* Create a file of replication factor 3
* Corrupt one replica of a block of the file
* Verify that there are still 2 good replicas and 1 corrupt replica
* (corrupt replica should not be removed since number of good replicas
* (2) is less than replication factor (3))
* Start a new data node
* Verify that the a new replica is created and corrupt replica is
* removed.
*
*/
@Test
public void testByAddingAnExtraDataNode() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
conf.set(DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
FileSystem fs = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);
try {
final Path fileName = new Path("/foo1");
DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
DFSTestUtil.waitReplication(fs, fileName, (short) 3);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
corruptBlock(cluster, fs, fileName, 0, block);
DFSTestUtil.waitReplication(fs, fileName, (short) 2);
assertEquals(2, countReplicas(namesystem, block).liveReplicas());
assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
cluster.restartDataNode(dnPropsFourth);
DFSTestUtil.waitReplication(fs, fileName, (short) 3);
assertEquals(3, countReplicas(namesystem, block).liveReplicas());
assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestPendingCorruptDnMessages method testChangedStorageId.
@Test(timeout = 60000)
public void testChangedStorageId() throws IOException, URISyntaxException, InterruptedException, TimeoutException {
HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
try {
cluster.transitionToActive(0);
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
OutputStream out = fs.create(filePath);
out.write("foo bar baz".getBytes());
out.close();
HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), cluster.getNameNode(1));
// Change the gen stamp of the block on datanode to go back in time (gen
// stamps start at 1000)
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
cluster.changeGenStampOfBlock(0, block, 900);
// Run directory dsscanner to update Datanode's volumeMap
DataNodeTestUtils.runDirectoryScanner(cluster.getDataNodes().get(0));
// Stop the DN so the replica with the changed gen stamp will be reported
// when this DN starts up.
DataNodeProperties dnProps = cluster.stopDataNode(0);
// Restart the namenode so that when the DN comes up it will see an initial
// block report.
cluster.restartNameNode(1, false);
assertTrue(cluster.restartDataNode(dnProps, true));
// Wait until the standby NN queues up the corrupt block in the pending DN
// message queue.
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
return cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount() == 1;
}
}, 1000, 30000);
final String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
assertNotNull(oldStorageId);
// Reformat/restart the DN.
assertTrue(wipeAndRestartDn(cluster, 0));
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
final String newStorageId = getRegisteredDatanodeUid(cluster, 1);
return newStorageId != null && !newStorageId.equals(oldStorageId);
}
}, 1000, 30000);
assertEquals(0, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
// Now try to fail over.
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestStandbyIsHot method testDatanodeRestarts.
/**
* Regression test for HDFS-2795:
* - Start an HA cluster with a DN.
* - Write several blocks to the FS with replication 1.
* - Shutdown the DN
* - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
* - Restart the DN.
* In the bug, the standby node would only very slowly notice the blocks returning
* to the cluster.
*/
@Test(timeout = 60000)
public void testDatanodeRestarts() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
// We read from the standby to watch block locations
HAUtil.setAllowStandbyReads(conf, true);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).build();
try {
NameNode nn0 = cluster.getNameNode(0);
NameNode nn1 = cluster.getNameNode(1);
cluster.transitionToActive(0);
// Create 5 blocks.
DFSTestUtil.createFile(cluster.getFileSystem(0), TEST_FILE_PATH, 5 * 1024, (short) 1, 1L);
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
// Stop the DN.
DataNode dn = cluster.getDataNodes().get(0);
String dnName = dn.getDatanodeId().getXferAddr();
DataNodeProperties dnProps = cluster.stopDataNode(0);
// Make sure both NNs register it as dead.
BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
// The SBN will not have any blocks in its neededReplication queue
// since the SBN doesn't process replication.
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(TEST_FILE, 0, 1);
assertEquals("Standby should have registered that the block has no replicas", 0, locs.get(0).getLocations().length);
cluster.restartDataNode(dnProps);
// Wait for both NNs to re-register the DN.
cluster.waitActive(0);
cluster.waitActive(1);
cluster.waitFirstBRCompleted(0, 10000);
cluster.waitFirstBRCompleted(1, 10000);
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
locs = nn1.getRpcServer().getBlockLocations(TEST_FILE, 0, 1);
assertEquals("Standby should have registered that the block has replicas again", 1, locs.get(0).getLocations().length);
} finally {
cluster.shutdown();
}
}
Aggregations