use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestOverReplicatedBlocks method testChooseReplicaToDelete.
/**
* The test verifies that replica for deletion is chosen on a node,
* with the oldest heartbeat, when this heartbeat is larger than the
* tolerable heartbeat interval.
* It creates a file with several blocks and replication 4.
* The last DN is configured to send heartbeats rarely.
*
* Test waits until the tolerable heartbeat interval expires, and reduces
* replication of the file. All replica deletions should be scheduled for the
* last node. No replicas will actually be deleted, since last DN doesn't
* send heartbeats.
*/
@Test
public void testChooseReplicaToDelete() throws Exception {
MiniDFSCluster cluster = null;
FileSystem fs = null;
try {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, SMALL_BLOCK_SIZE);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
fs = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
final BlockManager bm = namesystem.getBlockManager();
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 300);
cluster.startDataNodes(conf, 1, true, null, null, null);
DataNode lastDN = cluster.getDataNodes().get(3);
DatanodeRegistration dnReg = InternalDataNodeTestUtils.getDNRegistrationForBP(lastDN, namesystem.getBlockPoolId());
String lastDNid = dnReg.getDatanodeUuid();
final Path fileName = new Path("/foo2");
DFSTestUtil.createFile(fs, fileName, SMALL_FILE_LENGTH, (short) 4, 0L);
DFSTestUtil.waitReplication(fs, fileName, (short) 4);
// Wait for tolerable number of heartbeats plus one
DatanodeDescriptor nodeInfo = null;
long lastHeartbeat = 0;
long waitTime = DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT * 1000 * (DFSConfigKeys.DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_DEFAULT + 1);
do {
nodeInfo = bm.getDatanodeManager().getDatanode(dnReg);
lastHeartbeat = nodeInfo.getLastUpdateMonotonic();
} while (monotonicNow() - lastHeartbeat < waitTime);
fs.setReplication(fileName, (short) 3);
BlockLocation[] locs = fs.getFileBlockLocations(fs.getFileStatus(fileName), 0, Long.MAX_VALUE);
// All replicas for deletion should be scheduled on lastDN.
// And should not actually be deleted, because lastDN does not heartbeat.
namesystem.readLock();
final int dnBlocks = bm.getExcessSize4Testing(dnReg.getDatanodeUuid());
assertEquals("Replicas on node " + lastDNid + " should have been deleted", SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks);
namesystem.readUnlock();
for (BlockLocation location : locs) assertEquals("Block should still have 4 replicas", 4, location.getNames().length);
} finally {
if (fs != null)
fs.close();
if (cluster != null)
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestOverReplicatedBlocks method testProcesOverReplicateBlock.
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
* It make sure that it won't treat corrupt replicas as valid ones
* thus prevents NN deleting valid replicas but keeping
* corrupt ones.
*/
@Test
public void testProcesOverReplicateBlock() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
conf.set(DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
FileSystem fs = cluster.getFileSystem();
try {
final Path fileName = new Path("/foo1");
DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
DFSTestUtil.waitReplication(fs, fileName, (short) 3);
// corrupt the block on datanode 0
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
cluster.corruptReplica(0, block);
DataNodeProperties dnProps = cluster.stopDataNode(0);
// remove block scanner log to trigger block scanning
File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(), "scanner.cursor");
//wait for one minute for deletion to succeed;
for (int i = 0; !scanCursor.delete(); i++) {
assertTrue("Could not delete " + scanCursor.getAbsolutePath() + " in one minute", i < 60);
try {
Thread.sleep(1000);
} catch (InterruptedException ignored) {
}
}
// restart the datanode so the corrupt replica will be detected
cluster.restartDataNode(dnProps);
DFSTestUtil.waitReplication(fs, fileName, (short) 2);
String blockPoolId = cluster.getNamesystem().getBlockPoolId();
final DatanodeID corruptDataNode = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), blockPoolId);
final FSNamesystem namesystem = cluster.getNamesystem();
final BlockManager bm = namesystem.getBlockManager();
final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
try {
namesystem.writeLock();
synchronized (hm) {
// set live datanode's remaining space to be 0
// so they will be chosen to be deleted when over-replication occurs
String corruptMachineName = corruptDataNode.getXferAddr();
for (DatanodeDescriptor datanode : hm.getDatanodes()) {
if (!corruptMachineName.equals(datanode.getXferAddr())) {
datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
datanode.updateHeartbeat(BlockManagerTestUtil.getStorageReportsForDatanode(datanode), 0L, 0L, 0, 0, null);
}
}
// decrease the replication factor to 1;
NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short) 1);
// corrupt one won't be chosen to be excess one
// without 4910 the number of live replicas would be 0: block gets lost
assertEquals(1, bm.countNodes(bm.getStoredBlock(block.getLocalBlock())).liveReplicas());
}
} finally {
namesystem.writeUnlock();
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestDatanodeManager method testNumVersionsReportedCorrect.
/**
* This test sends a random sequence of node registrations and node removals
* to the DatanodeManager (of nodes with different IDs and versions), and
* checks that the DatanodeManager keeps a correct count of different software
* versions at all times.
*/
@Test
public void testNumVersionsReportedCorrect() throws IOException {
//Create the DatanodeManager which will be tested
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
DatanodeManager dm = mockDatanodeManager(fsn, new Configuration());
//Seed the RNG with a known value so test failures are easier to reproduce
Random rng = new Random();
int seed = rng.nextInt();
rng = new Random(seed);
LOG.info("Using seed " + seed + " for testing");
//A map of the Storage IDs to the DN registration it was registered with
HashMap<String, DatanodeRegistration> sIdToDnReg = new HashMap<String, DatanodeRegistration>();
for (int i = 0; i < NUM_ITERATIONS; ++i) {
//If true, remove a node for every 3rd time (if there's one)
if (rng.nextBoolean() && i % 3 == 0 && sIdToDnReg.size() != 0) {
//Pick a random node.
int randomIndex = rng.nextInt() % sIdToDnReg.size();
//Iterate to that random position
Iterator<Map.Entry<String, DatanodeRegistration>> it = sIdToDnReg.entrySet().iterator();
for (int j = 0; j < randomIndex - 1; ++j) {
it.next();
}
DatanodeRegistration toRemove = it.next().getValue();
LOG.info("Removing node " + toRemove.getDatanodeUuid() + " ip " + toRemove.getXferAddr() + " version : " + toRemove.getSoftwareVersion());
//Remove that random node
dm.removeDatanode(toRemove);
it.remove();
} else // Otherwise register a node. This node may be a new / an old one
{
//Pick a random storageID to register.
String storageID = "someStorageID" + rng.nextInt(5000);
DatanodeRegistration dr = Mockito.mock(DatanodeRegistration.class);
Mockito.when(dr.getDatanodeUuid()).thenReturn(storageID);
//If this storageID had already been registered before
if (sIdToDnReg.containsKey(storageID)) {
dr = sIdToDnReg.get(storageID);
//Half of the times, change the IP address
if (rng.nextBoolean()) {
dr.setIpAddr(dr.getIpAddr() + "newIP");
}
} else {
//This storageID has never been registered
//Ensure IP address is unique to storageID
String ip = "someIP" + storageID;
Mockito.when(dr.getIpAddr()).thenReturn(ip);
Mockito.when(dr.getXferAddr()).thenReturn(ip + ":9000");
Mockito.when(dr.getXferPort()).thenReturn(9000);
}
//Pick a random version to register with
Mockito.when(dr.getSoftwareVersion()).thenReturn("version" + rng.nextInt(5));
LOG.info("Registering node storageID: " + dr.getDatanodeUuid() + ", version: " + dr.getSoftwareVersion() + ", IP address: " + dr.getXferAddr());
//Register this random node
dm.registerDatanode(dr);
sIdToDnReg.put(storageID, dr);
}
//Verify DatanodeManager still has the right count
Map<String, Integer> mapToCheck = dm.getDatanodesSoftwareVersions();
//mapToCheck is empty
for (Entry<String, DatanodeRegistration> it : sIdToDnReg.entrySet()) {
String ver = it.getValue().getSoftwareVersion();
if (!mapToCheck.containsKey(ver)) {
throw new AssertionError("The correct number of datanodes of a " + "version was not found on iteration " + i);
}
mapToCheck.put(ver, mapToCheck.get(ver) - 1);
if (mapToCheck.get(ver) == 0) {
mapToCheck.remove(ver);
}
}
for (Entry<String, Integer> entry : mapToCheck.entrySet()) {
LOG.info("Still in map: " + entry.getKey() + " has " + entry.getValue());
}
assertEquals("The map of version counts returned by DatanodeManager was" + " not what it was expected to be on iteration " + i, 0, mapToCheck.size());
}
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestDatanodeManager method testRejectUnresolvedDatanodes.
@Test(timeout = 100000)
public void testRejectUnresolvedDatanodes() throws IOException {
//Create the DatanodeManager which will be tested
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
Configuration conf = new Configuration();
//Set configuration property for rejecting unresolved topology mapping
conf.setBoolean(DFSConfigKeys.DFS_REJECT_UNRESOLVED_DN_TOPOLOGY_MAPPING_KEY, true);
//set TestDatanodeManager.MyResolver to be used for topology resolving
conf.setClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, TestDatanodeManager.MyResolver.class, DNSToSwitchMapping.class);
//create DatanodeManager
DatanodeManager dm = mockDatanodeManager(fsn, conf);
//storageID to register.
String storageID = "someStorageID-123";
DatanodeRegistration dr = Mockito.mock(DatanodeRegistration.class);
Mockito.when(dr.getDatanodeUuid()).thenReturn(storageID);
try {
//Register this node
dm.registerDatanode(dr);
Assert.fail("Expected an UnresolvedTopologyException");
} catch (UnresolvedTopologyException ute) {
LOG.info("Expected - topology is not resolved and " + "registration is rejected.");
} catch (Exception e) {
Assert.fail("Expected an UnresolvedTopologyException");
}
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestHeartbeatHandling method testHeartbeat.
/**
* Test if
* {@link FSNamesystem#handleHeartbeat}
* can pick up replication and/or invalidate requests and observes the max
* limit
*/
@Test
public void testHeartbeat() throws Exception {
final Configuration conf = new HdfsConfiguration();
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
cluster.waitActive();
final FSNamesystem namesystem = cluster.getNamesystem();
final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
final String poolId = namesystem.getBlockPoolId();
final DatanodeRegistration nodeReg = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
final String storageID = DatanodeStorage.generateUuid();
dd.updateStorage(new DatanodeStorage(storageID));
final int REMAINING_BLOCKS = 1;
final int MAX_REPLICATE_LIMIT = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
final int MAX_INVALIDATE_BLOCKS = 2 * MAX_INVALIDATE_LIMIT + REMAINING_BLOCKS;
final int MAX_REPLICATE_BLOCKS = 2 * MAX_REPLICATE_LIMIT + REMAINING_BLOCKS;
final DatanodeStorageInfo[] ONE_TARGET = { dd.getStorageInfo(storageID) };
try {
namesystem.writeLock();
synchronized (hm) {
for (int i = 0; i < MAX_REPLICATE_BLOCKS; i++) {
dd.addBlockToBeReplicated(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP), ONE_TARGET);
}
DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(1, cmds.length);
assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
for (int i = 0; i < MAX_INVALIDATE_BLOCKS; i++) {
blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
}
dd.addBlocksToBeInvalidated(blockList);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(2, cmds.length);
assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(2, cmds.length);
assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(1, cmds.length);
assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
assertEquals(0, cmds.length);
}
} finally {
namesystem.writeUnlock();
}
} finally {
cluster.shutdown();
}
}
Aggregations