use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestHeartbeatHandling method testHeartbeatBlockRecovery.
/**
* Test if
* {@link FSNamesystem#handleHeartbeat}
* correctly selects data node targets for block recovery.
*/
@Test
public void testHeartbeatBlockRecovery() throws Exception {
final Configuration conf = new HdfsConfiguration();
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
try {
cluster.waitActive();
final FSNamesystem namesystem = cluster.getNamesystem();
final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
final String poolId = namesystem.getBlockPoolId();
final DatanodeRegistration nodeReg1 = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
final DatanodeDescriptor dd1 = NameNodeAdapter.getDatanode(namesystem, nodeReg1);
dd1.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
final DatanodeRegistration nodeReg2 = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(1), poolId);
final DatanodeDescriptor dd2 = NameNodeAdapter.getDatanode(namesystem, nodeReg2);
dd2.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
final DatanodeRegistration nodeReg3 = InternalDataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), poolId);
final DatanodeDescriptor dd3 = NameNodeAdapter.getDatanode(namesystem, nodeReg3);
dd3.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
try {
namesystem.writeLock();
synchronized (hm) {
NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem);
NameNodeAdapter.sendHeartBeat(nodeReg2, dd2, namesystem);
NameNodeAdapter.sendHeartBeat(nodeReg3, dd3, namesystem);
// Test with all alive nodes.
DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
DFSTestUtil.resetLastUpdatesWithOffset(dd2, 0);
DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
final DatanodeStorageInfo[] storages = { dd1.getStorageInfos()[0], dd2.getStorageInfos()[0], dd3.getStorageInfos()[0] };
BlockInfo blockInfo = new BlockInfoContiguous(new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
dd1.addBlockToBeRecovered(blockInfo);
DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
assertEquals(1, cmds.length);
assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand) cmds[0];
assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
DatanodeInfo[] recoveringNodes = recoveryCommand.getRecoveringBlocks().toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0].getLocations();
assertEquals(3, recoveringNodes.length);
assertEquals(recoveringNodes[0], dd1);
assertEquals(recoveringNodes[1], dd2);
assertEquals(recoveringNodes[2], dd3);
// Test with one stale node.
DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
// More than the default stale interval of 30 seconds.
DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
blockInfo = new BlockInfoContiguous(new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
dd1.addBlockToBeRecovered(blockInfo);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
assertEquals(1, cmds.length);
assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
recoveryCommand = (BlockRecoveryCommand) cmds[0];
assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
recoveringNodes = recoveryCommand.getRecoveringBlocks().toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0].getLocations();
assertEquals(2, recoveringNodes.length);
// dd2 is skipped.
assertEquals(recoveringNodes[0], dd1);
assertEquals(recoveringNodes[1], dd3);
// Test with all stale node.
DFSTestUtil.resetLastUpdatesWithOffset(dd1, -60 * 1000);
// More than the default stale interval of 30 seconds.
DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
DFSTestUtil.resetLastUpdatesWithOffset(dd3, -80 * 1000);
blockInfo = new BlockInfoContiguous(new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
dd1.addBlockToBeRecovered(blockInfo);
cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
assertEquals(1, cmds.length);
assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
recoveryCommand = (BlockRecoveryCommand) cmds[0];
assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
recoveringNodes = recoveryCommand.getRecoveringBlocks().toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0].getLocations();
// Only dd1 is included since it heart beated and hence its not stale
// when the list of recovery blocks is constructed.
assertEquals(3, recoveringNodes.length);
assertEquals(recoveringNodes[0], dd1);
assertEquals(recoveringNodes[1], dd2);
assertEquals(recoveringNodes[2], dd3);
}
} finally {
namesystem.writeUnlock();
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestHostFileManager method testIncludeExcludeLists.
@Test
@SuppressWarnings("unchecked")
public void testIncludeExcludeLists() throws IOException {
BlockManager bm = mock(BlockManager.class);
FSNamesystem fsn = mock(FSNamesystem.class);
Configuration conf = new Configuration();
HostFileManager hm = new HostFileManager();
HostSet includedNodes = new HostSet();
HostSet excludedNodes = new HostSet();
includedNodes.add(entry("127.0.0.1:12345"));
includedNodes.add(entry("localhost:12345"));
includedNodes.add(entry("127.0.0.1:12345"));
includedNodes.add(entry("127.0.0.2"));
excludedNodes.add(entry("127.0.0.1:12346"));
excludedNodes.add(entry("127.0.30.1:12346"));
Assert.assertEquals(2, includedNodes.size());
Assert.assertEquals(2, excludedNodes.size());
hm.refresh(includedNodes, excludedNodes);
DatanodeManager dm = new DatanodeManager(bm, fsn, conf);
Whitebox.setInternalState(dm, "hostConfigManager", hm);
Map<String, DatanodeDescriptor> dnMap = (Map<String, DatanodeDescriptor>) Whitebox.getInternalState(dm, "datanodeMap");
// After the de-duplication, there should be only one DN from the included
// nodes declared as dead.
Assert.assertEquals(2, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL).size());
Assert.assertEquals(2, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
dnMap.put("uuid-foo", new DatanodeDescriptor(new DatanodeID("127.0.0.1", "localhost", "uuid-foo", 12345, 1020, 1021, 1022)));
Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
dnMap.put("uuid-bar", new DatanodeDescriptor(new DatanodeID("127.0.0.2", "127.0.0.2", "uuid-bar", 12345, 1020, 1021, 1022)));
Assert.assertEquals(0, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
DatanodeDescriptor spam = new DatanodeDescriptor(new DatanodeID("127.0.0" + ".3", "127.0.0.3", "uuid-spam", 12345, 1020, 1021, 1022));
DFSTestUtil.setDatanodeDead(spam);
includedNodes.add(entry("127.0.0.3:12345"));
dnMap.put("uuid-spam", spam);
Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
dnMap.remove("uuid-spam");
Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
excludedNodes.add(entry("127.0.0.3"));
Assert.assertEquals(1, dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.DEAD).size());
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestSequentialBlockId method testTriggerBlockIdCollision.
/**
* Test that collisions in the block ID space are handled gracefully.
*
* @throws IOException
*/
@Test
public void testTriggerBlockIdCollision() throws IOException {
Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
try {
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
FSNamesystem fsn = cluster.getNamesystem();
final int blockCount = 10;
// Create a file with a few blocks to rev up the global block ID
// counter.
Path path1 = new Path("testBlockIdCollisionDetection_file1.dat");
DFSTestUtil.createFile(fs, path1, IO_SIZE, BLOCK_SIZE * blockCount, BLOCK_SIZE, REPLICATION, SEED);
List<LocatedBlock> blocks1 = DFSTestUtil.getAllBlocks(fs, path1);
// Rewind the block ID counter in the name system object. This will result
// in block ID collisions when we try to allocate new blocks.
SequentialBlockIdGenerator blockIdGenerator = fsn.getBlockManager().getBlockIdManager().getBlockIdGenerator();
blockIdGenerator.setCurrentValue(blockIdGenerator.getCurrentValue() - 5);
// Trigger collisions by creating a new file.
Path path2 = new Path("testBlockIdCollisionDetection_file2.dat");
DFSTestUtil.createFile(fs, path2, IO_SIZE, BLOCK_SIZE * blockCount, BLOCK_SIZE, REPLICATION, SEED);
List<LocatedBlock> blocks2 = DFSTestUtil.getAllBlocks(fs, path2);
assertThat(blocks2.size(), is(blockCount));
// Make sure that file2 block IDs start immediately after file1
assertThat(blocks2.get(0).getBlock().getBlockId(), is(blocks1.get(9).getBlock().getBlockId() + 1));
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestSequentialBlockGroupId method testTriggerBlockGroupIdCollisionWithLegacyBlockId.
/**
* Test that collisions in the blockGroup ID when the id is occupied by legacy
* block.
*/
@Test(timeout = 60000)
public void testTriggerBlockGroupIdCollisionWithLegacyBlockId() throws Exception {
long blockGroupIdInitialValue = blockGrpIdGenerator.getCurrentValue();
blockGrpIdGenerator.skipTo((blockGrpIdGenerator.getCurrentValue() & ~BLOCK_GROUP_INDEX_MASK) + MAX_BLOCKS_IN_GROUP);
final long curBlockGroupIdValue = blockGrpIdGenerator.getCurrentValue();
// Creates contiguous block with negative blockId so that it would trigger
// collision during blockGroup Id generation
FSNamesystem fsn = cluster.getNamesystem();
// Replace SequentialBlockIdGenerator with a spy
SequentialBlockIdGenerator blockIdGenerator = spy(fsn.getBlockManager().getBlockIdManager().getBlockIdGenerator());
Whitebox.setInternalState(fsn.getBlockManager().getBlockIdManager(), "blockIdGenerator", blockIdGenerator);
SequentialBlockIdGenerator spySequentialBlockIdGenerator = new SequentialBlockIdGenerator(null) {
@Override
public long nextValue() {
return curBlockGroupIdValue;
}
};
final Answer<Object> delegator = new GenericTestUtils.DelegateAnswer(spySequentialBlockIdGenerator);
doAnswer(delegator).when(blockIdGenerator).nextValue();
Path path1 = new Path("/testCollisionWithLegacyBlock_file1.dat");
DFSTestUtil.createFile(fs, path1, 1024, REPLICATION, SEED);
List<LocatedBlock> contiguousBlocks = DFSTestUtil.getAllBlocks(fs, path1);
assertThat(contiguousBlocks.size(), is(1));
Assert.assertEquals("Unexpected BlockId!", curBlockGroupIdValue, contiguousBlocks.get(0).getBlock().getBlockId());
// Reset back to the initial value to trigger collision
blockGrpIdGenerator.setCurrentValue(blockGroupIdInitialValue);
// Trigger collisions by creating a new file.
Path path2 = new Path(ecDir, "testCollisionWithLegacyBlock_file2.dat");
DFSTestUtil.createFile(fs, path2, cellSize, fileLen, blockSize, REPLICATION, SEED);
List<LocatedBlock> blocks2 = DFSTestUtil.getAllBlocks(fs, path2);
assertThat("Wrong BlockGrps", blocks2.size(), is(blockGrpCount));
// Make sure that file1 and file2 block IDs are different
for (LocatedBlock locBlock1 : contiguousBlocks) {
long blockId1 = locBlock1.getBlock().getBlockId();
for (LocatedBlock locBlock2 : blocks2) {
long blockId2 = locBlock2.getBlock().getBlockId();
assertThat("BlockGrpId mismatches!", blockId1, is(not(blockId2)));
}
}
}
use of org.apache.hadoop.hdfs.server.namenode.FSNamesystem in project hadoop by apache.
the class TestBlocksWithNotEnoughRacks method testReduceReplFactorDueToRejoinRespectsRackPolicy.
/*
* Test that when the excess replicas of a block are reduced due to
* a node re-joining the cluster the rack policy is not violated.
*/
@Test
public void testReduceReplFactorDueToRejoinRespectsRackPolicy() throws Exception {
Configuration conf = getConf();
short REPLICATION_FACTOR = 2;
final Path filePath = new Path("/testFile");
// Last datanode is on a different rack
String[] racks = { "/rack1", "/rack1", "/rack2" };
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(racks.length).racks(racks).build();
final FSNamesystem ns = cluster.getNameNode().getNamesystem();
final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();
try {
// Create a file with one block
final FileSystem fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
// Make the last (cross rack) datanode look like it failed
// to heartbeat by stopping it and calling removeDatanode.
ArrayList<DataNode> datanodes = cluster.getDataNodes();
assertEquals(3, datanodes.size());
DataNode dataNode = datanodes.get(2);
DatanodeID dnId = dataNode.getDatanodeId();
cluster.stopDataNode(2);
dm.removeDatanode(dnId);
// The block gets re-replicated to another datanode so it has a
// sufficient # replicas, but not across racks, so there should
// be 1 rack.
DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 0);
// Start the "failed" datanode, which has a replica so the block is
// now over-replicated and therefore a replica should be removed but
// not on the restarted datanode as that would violate the rack policy.
String[] rack2 = { "/rack2" };
cluster.startDataNodes(conf, 1, true, null, rack2);
cluster.waitActive();
// The block now has sufficient # replicas, across racks
DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
} finally {
cluster.shutdown();
}
}
Aggregations