use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.
the class TestFsck method testFsckMissingECFile.
@Test(timeout = 300000)
public void testFsckMissingECFile() throws Exception {
DistributedFileSystem fs = null;
int dataBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumDataUnits();
int parityBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits();
int cellSize = StripedFileTestUtil.getDefaultECPolicy().getCellSize();
int totalSize = dataBlocks + parityBlocks;
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(totalSize).build();
fs = cluster.getFileSystem();
// create file
Path ecDirPath = new Path("/striped");
fs.mkdir(ecDirPath, FsPermission.getDirDefault());
fs.getClient().setErasureCodingPolicy(ecDirPath.toString(), StripedFileTestUtil.getDefaultECPolicy().getName());
Path file = new Path(ecDirPath, "missing");
final int length = cellSize * dataBlocks;
final byte[] bytes = StripedFileTestUtil.generateBytes(length);
DFSTestUtil.writeFile(fs, file, bytes);
// make an unrecoverable ec file with missing blocks
ArrayList<DataNode> dns = cluster.getDataNodes();
DatanodeID dnId;
for (int i = 0; i < parityBlocks + 1; i++) {
dnId = dns.get(i).getDatanodeId();
cluster.stopDataNode(dnId.getXferAddr());
cluster.setDataNodeDead(dnId);
}
waitForUnrecoverableBlockGroup(conf);
String outStr = runFsck(conf, 1, true, "/", "-files", "-blocks", "-locations");
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
assertTrue(outStr.contains("Live_repl=" + (dataBlocks - 1)));
assertTrue(outStr.contains("Under-erasure-coded block groups:\t0"));
outStr = runFsck(conf, -1, true, "/", "-list-corruptfileblocks");
assertTrue(outStr.contains("has 1 CORRUPT files"));
}
use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.
the class TestListCorruptFileBlocks method testMaxCorruptFiles.
/**
* Test if NN.listCorruptFiles() returns the right number of results.
* The corrupt blocks are detected by the BlockPoolSliceScanner.
* Also, test that DFS.listCorruptFileBlocks can make multiple successive
* calls.
*/
@Test(timeout = 300000)
public void testMaxCorruptFiles() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new HdfsConfiguration();
// datanode sends block reports
conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000);
cluster = new MiniDFSCluster.Builder(conf).build();
FileSystem fs = cluster.getFileSystem();
final int maxCorruptFileBlocks = FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;
// create 110 files with one block each
DFSTestUtil util = new DFSTestUtil.Builder().setName("testMaxCorruptFiles").setNumFiles(maxCorruptFileBlocks * 3).setMaxLevels(1).setMaxSize(512).build();
util.createFiles(fs, "/srcdat2", (short) 1);
util.waitReplication(fs, "/srcdat2", (short) 1);
// verify that there are no bad blocks.
final NameNode namenode = cluster.getNameNode();
Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.", badFiles.size() == 0);
// Now deliberately blocks from all files
final String bpid = cluster.getNamesystem().getBlockPoolId();
for (int i = 0; i < 4; i++) {
for (int j = 0; j <= 1; j++) {
File storageDir = cluster.getInstanceStorageDir(i, j);
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
LOG.info("Removing files from " + data_dir);
List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
if (metadataFiles == null)
continue;
for (File metadataFile : metadataFiles) {
File blockFile = Block.metaToBlockFile(metadataFile);
assertTrue("Cannot remove file.", blockFile.delete());
assertTrue("Cannot remove file.", metadataFile.delete());
}
}
}
// Run the direcrtoryScanner to update the Datanodes volumeMap
DataNode dn = cluster.getDataNodes().get(0);
DataNodeTestUtils.runDirectoryScanner(dn);
// Occasionally the BlockPoolSliceScanner can run before we have removed
// the blocks. Restart the Datanode to trigger the scanner into running
// once more.
LOG.info("Restarting Datanode to trigger BlockPoolSliceScanner");
cluster.restartDataNodes();
cluster.waitActive();
badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
while (badFiles.size() < maxCorruptFileBlocks) {
LOG.info("# of corrupt files is: " + badFiles.size());
Thread.sleep(10000);
badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
}
badFiles = namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + maxCorruptFileBlocks + ".", badFiles.size() == maxCorruptFileBlocks);
CorruptFileBlockIterator iter = (CorruptFileBlockIterator) fs.listCorruptFileBlocks(new Path("/srcdat2"));
int corruptPaths = countPaths(iter);
assertTrue("Expected more than " + maxCorruptFileBlocks + " corrupt file blocks but got " + corruptPaths, corruptPaths > maxCorruptFileBlocks);
assertTrue("Iterator should have made more than 1 call but made " + iter.getCallsMade(), iter.getCallsMade() > 1);
util.cleanup(fs, "/srcdat2");
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.
the class TestNameNodeMXBean method testDecommissioningNodes.
@Test(timeout = 120000)
public void testDecommissioningNodes() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 30);
MiniDFSCluster cluster = null;
HostsFileWriter hostsFileWriter = new HostsFileWriter();
hostsFileWriter.initialize(conf, "temp/TestNameNodeMXBean");
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
cluster.waitActive();
FSNamesystem fsn = cluster.getNameNode().namesystem;
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName mxbeanName = new ObjectName("Hadoop:service=NameNode,name=NameNodeInfo");
List<String> hosts = new ArrayList<>();
for (DataNode dn : cluster.getDataNodes()) {
hosts.add(dn.getDisplayName());
}
hostsFileWriter.initIncludeHosts(hosts.toArray(new String[hosts.size()]));
fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
// 1. Verify Live nodes
String liveNodesInfo = (String) (mbs.getAttribute(mxbeanName, "LiveNodes"));
Map<String, Map<String, Object>> liveNodes = (Map<String, Map<String, Object>>) JSON.parse(liveNodesInfo);
assertEquals(fsn.getLiveNodes(), liveNodesInfo);
assertEquals(fsn.getNumLiveDataNodes(), liveNodes.size());
for (Map<String, Object> liveNode : liveNodes.values()) {
assertTrue(liveNode.containsKey("lastContact"));
assertTrue(liveNode.containsKey("xferaddr"));
}
// Add the 1st DataNode to Decommission list
hostsFileWriter.initExcludeHost(cluster.getDataNodes().get(0).getDisplayName());
fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
// Wait for the DecommissionManager to complete refresh nodes
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
String decomNodesInfo = (String) (mbs.getAttribute(mxbeanName, "DecomNodes"));
Map<String, Map<String, Object>> decomNodes = (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
if (decomNodes.size() > 0) {
return true;
}
} catch (Exception e) {
return false;
}
return false;
}
}, 1000, 60000);
// 2. Verify Decommission InProgress nodes
String decomNodesInfo = (String) (mbs.getAttribute(mxbeanName, "DecomNodes"));
Map<String, Map<String, Object>> decomNodes = (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
assertEquals(fsn.getDecomNodes(), decomNodesInfo);
assertEquals(fsn.getNumDecommissioningDataNodes(), decomNodes.size());
assertEquals(0, fsn.getNumDecomLiveDataNodes());
assertEquals(0, fsn.getNumDecomDeadDataNodes());
// Wait for the DecommissionManager to complete check
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
if (fsn.getNumDecomLiveDataNodes() == 1) {
return true;
}
return false;
}
}, 1000, 60000);
// 3. Verify Decommissioned nodes
decomNodesInfo = (String) (mbs.getAttribute(mxbeanName, "DecomNodes"));
decomNodes = (Map<String, Map<String, Object>>) JSON.parse(decomNodesInfo);
assertEquals(0, decomNodes.size());
assertEquals(fsn.getDecomNodes(), decomNodesInfo);
assertEquals(1, fsn.getNumDecomLiveDataNodes());
assertEquals(0, fsn.getNumDecomDeadDataNodes());
} finally {
if (cluster != null) {
cluster.shutdown();
}
hostsFileWriter.cleanup();
}
}
use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.
the class DFSTestUtil method addBlockToFile.
/**
* Adds a block or a striped block group to a file.
* This method only manipulates NameNode
* states of the file and the block without injecting data to DataNode.
* It does mimic block reports.
* You should disable periodical heartbeat before use this.
* @param isStripedBlock a boolean tell if the block added a striped block
* @param dataNodes List DataNodes to host the striped block group
* @param previous Previous block in the file
* @param numStripes Number of stripes in each block group
* @param len block size for a non striped block added
* @return The added block or block group
*/
public static Block addBlockToFile(boolean isStripedBlock, List<DataNode> dataNodes, DistributedFileSystem fs, FSNamesystem ns, String file, INodeFile fileNode, String clientName, ExtendedBlock previous, int numStripes, int len) throws Exception {
fs.getClient().namenode.addBlock(file, clientName, previous, null, fileNode.getId(), null, null);
final BlockInfo lastBlock = fileNode.getLastBlock();
final int groupSize = fileNode.getPreferredBlockReplication();
assert dataNodes.size() >= groupSize;
// 1. RECEIVING_BLOCK IBR
for (int i = 0; i < groupSize; i++) {
DataNode dn = dataNodes.get(i);
final Block block = new Block(lastBlock.getBlockId() + i, 0, lastBlock.getGenerationStamp());
DatanodeStorage storage = new DatanodeStorage(UUID.randomUUID().toString());
StorageReceivedDeletedBlocks[] reports = DFSTestUtil.makeReportForReceivedBlock(block, ReceivedDeletedBlockInfo.BlockStatus.RECEIVING_BLOCK, storage);
for (StorageReceivedDeletedBlocks report : reports) {
ns.processIncrementalBlockReport(dn.getDatanodeId(), report);
}
}
final ErasureCodingPolicy ecPolicy = fs.getErasureCodingPolicy(new Path(file));
// 2. RECEIVED_BLOCK IBR
long blockSize = isStripedBlock ? numStripes * ecPolicy.getCellSize() : len;
for (int i = 0; i < groupSize; i++) {
DataNode dn = dataNodes.get(i);
final Block block = new Block(lastBlock.getBlockId() + i, blockSize, lastBlock.getGenerationStamp());
DatanodeStorage storage = new DatanodeStorage(UUID.randomUUID().toString());
StorageReceivedDeletedBlocks[] reports = DFSTestUtil.makeReportForReceivedBlock(block, ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, storage);
for (StorageReceivedDeletedBlocks report : reports) {
ns.processIncrementalBlockReport(dn.getDatanodeId(), report);
}
}
long bytes = isStripedBlock ? numStripes * ecPolicy.getCellSize() * ecPolicy.getNumDataUnits() : len;
lastBlock.setNumBytes(bytes);
return lastBlock;
}
use of org.apache.hadoop.hdfs.server.datanode.DataNode in project hadoop by apache.
the class TestLeaseRecovery2 method hardLeaseRecoveryRestartHelper.
public void hardLeaseRecoveryRestartHelper(boolean doRename, int size) throws Exception {
if (size < 0) {
size = AppendTestUtil.nextInt(FILE_SIZE + 1);
}
//create a file
String fileStr = "/hardLeaseRecovery";
AppendTestUtil.LOG.info("filestr=" + fileStr);
Path filePath = new Path(fileStr);
FSDataOutputStream stm = dfs.create(filePath, true, BUF_SIZE, REPLICATION_NUM, BLOCK_SIZE);
assertTrue(dfs.dfs.exists(fileStr));
// write bytes into the file.
AppendTestUtil.LOG.info("size=" + size);
stm.write(buffer, 0, size);
String originalLeaseHolder = NameNodeAdapter.getLeaseHolderForPath(cluster.getNameNode(), fileStr);
assertFalse("original lease holder should not be the NN", originalLeaseHolder.equals(HdfsServerConstants.NAMENODE_LEASE_HOLDER));
// hflush file
AppendTestUtil.LOG.info("hflush");
stm.hflush();
// check visible length
final HdfsDataInputStream in = (HdfsDataInputStream) dfs.open(filePath);
Assert.assertEquals(size, in.getVisibleLength());
in.close();
if (doRename) {
fileStr += ".renamed";
Path renamedPath = new Path(fileStr);
assertTrue(dfs.rename(filePath, renamedPath));
filePath = renamedPath;
}
// kill the lease renewal thread
AppendTestUtil.LOG.info("leasechecker.interruptAndJoin()");
dfs.dfs.getLeaseRenewer().interruptAndJoin();
// won't actually get completed during lease recovery.
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
}
// set the hard limit to be 1 second
cluster.setLeasePeriod(LONG_LEASE_PERIOD, SHORT_LEASE_PERIOD);
// Make sure lease recovery begins.
final String path = fileStr;
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
return HdfsServerConstants.NAMENODE_LEASE_HOLDER.equals(NameNodeAdapter.getLeaseHolderForPath(cluster.getNameNode(), path));
}
}, (int) SHORT_LEASE_PERIOD, (int) SHORT_LEASE_PERIOD * 10);
// Normally, the in-progress edit log would be finalized by
// FSEditLog#endCurrentLogSegment. For testing purposes, we
// disable that here.
FSEditLog spyLog = spy(cluster.getNameNode().getFSImage().getEditLog());
doNothing().when(spyLog).endCurrentLogSegment(Mockito.anyBoolean());
DFSTestUtil.setEditLogForTesting(cluster.getNamesystem(), spyLog);
cluster.restartNameNode(false);
checkLease(fileStr, size);
// Let the DNs send heartbeats again.
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false);
}
cluster.waitActive();
// set the hard limit to be 1 second, to initiate lease recovery.
cluster.setLeasePeriod(LONG_LEASE_PERIOD, SHORT_LEASE_PERIOD);
// wait for lease recovery to complete
LocatedBlocks locatedBlocks;
do {
Thread.sleep(SHORT_LEASE_PERIOD);
locatedBlocks = dfs.dfs.getLocatedBlocks(fileStr, 0L, size);
} while (locatedBlocks.isUnderConstruction());
assertEquals(size, locatedBlocks.getFileLength());
// make sure that the client can't write data anymore.
try {
stm.write('b');
stm.hflush();
fail("Should not be able to flush after we've lost the lease");
} catch (IOException e) {
LOG.info("Expceted exception on write/hflush", e);
}
try {
stm.close();
fail("Should not be able to close after we've lost the lease");
} catch (IOException e) {
LOG.info("Expected exception on close", e);
}
// verify data
AppendTestUtil.LOG.info("File size is good. Now validating sizes from datanodes...");
AppendTestUtil.checkFullFile(dfs, filePath, size, buffer, fileStr);
}
Aggregations