use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestFsck method testFsckMove.
@Test
public void testFsckMove() throws Exception {
final int dfsBlockSize = 1024;
final int numDatanodes = 4;
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, dfsBlockSize);
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, (5 * dfsBlockSize) + (dfsBlockSize - 1), 5 * dfsBlockSize);
FileSystem fs = null;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes).build();
String topDir = "/srcdat";
fs = cluster.getFileSystem();
cluster.waitActive();
util.createFiles(fs, topDir);
util.waitReplication(fs, topDir, (short) 3);
String outStr = runFsck(conf, 0, true, "/");
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
String[] fileNames = util.getFileNames(topDir);
CorruptedTestFile[] ctFiles = new CorruptedTestFile[] { new CorruptedTestFile(fileNames[0], Sets.newHashSet(0), dfsClient, numDatanodes, dfsBlockSize), new CorruptedTestFile(fileNames[1], Sets.newHashSet(2, 3), dfsClient, numDatanodes, dfsBlockSize), new CorruptedTestFile(fileNames[2], Sets.newHashSet(4), dfsClient, numDatanodes, dfsBlockSize), new CorruptedTestFile(fileNames[3], Sets.newHashSet(0, 1, 2, 3), dfsClient, numDatanodes, dfsBlockSize), new CorruptedTestFile(fileNames[4], Sets.newHashSet(1, 2, 3, 4), dfsClient, numDatanodes, dfsBlockSize) };
int totalMissingBlocks = 0;
for (CorruptedTestFile ctFile : ctFiles) {
totalMissingBlocks += ctFile.getTotalMissingBlocks();
}
for (CorruptedTestFile ctFile : ctFiles) {
ctFile.removeBlocks(cluster);
}
// Wait for fsck to discover all the missing blocks
while (true) {
outStr = runFsck(conf, 1, false, "/");
String numMissing = null;
String numCorrupt = null;
for (String line : outStr.split(LINE_SEPARATOR)) {
Matcher m = NUM_MISSING_BLOCKS_PATTERN.matcher(line);
if (m.matches()) {
numMissing = m.group(1);
}
m = NUM_CORRUPT_BLOCKS_PATTERN.matcher(line);
if (m.matches()) {
numCorrupt = m.group(1);
}
if (numMissing != null && numCorrupt != null) {
break;
}
}
if (numMissing == null || numCorrupt == null) {
throw new IOException("failed to find number of missing or corrupt" + " blocks in fsck output.");
}
if (numMissing.equals(Integer.toString(totalMissingBlocks))) {
assertTrue(numCorrupt.equals(Integer.toString(0)));
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
break;
}
try {
Thread.sleep(100);
} catch (InterruptedException ignore) {
}
}
// Copy the non-corrupt blocks of corruptFileName to lost+found.
outStr = runFsck(conf, 1, false, "/", "-move");
LOG.info("WATERMELON: outStr = " + outStr);
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
// to lost+found
for (CorruptedTestFile ctFile : ctFiles) {
ctFile.checkSalvagedRemains();
}
// Fix the filesystem by removing corruptFileName
outStr = runFsck(conf, 1, true, "/", "-delete");
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
// Check to make sure we have a healthy filesystem
outStr = runFsck(conf, 0, true, "/");
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
util.cleanup(fs, topDir);
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestFsck method testFsckWithMaintenanceReplicas.
/**
* Test for blocks on maintenance hosts are not shown as missing.
*/
@Test(timeout = 90000)
public void testFsckWithMaintenanceReplicas() throws Exception {
final short replFactor = 2;
short numDn = 2;
final long blockSize = 512;
String[] hosts = { "host1", "host2" };
String[] racks = { "/rack1", "/rack2" };
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY, replFactor);
DistributedFileSystem dfs;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
dfs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
DFSTestUtil util = new DFSTestUtil.Builder().setName(getClass().getSimpleName()).setNumFiles(1).build();
//create files
final String testFile = new String("/testfile");
final Path path = new Path(testFile);
util.createFile(dfs, path, 1024, replFactor, 1000L);
util.waitReplication(dfs, path, replFactor);
StringBuilder sb = new StringBuilder();
for (LocatedBlock lb : util.getAllBlocks(dfs, path)) {
sb.append(lb.getBlock().getLocalBlock().getBlockName() + " ");
}
String[] bIds = sb.toString().split(" ");
//make sure datanode that has replica is fine before maintenance
String outStr = runFsck(conf, 0, true, testFile);
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
bm.getDatanodeManager().getDecomManager().startMaintenance(dn, Long.MAX_VALUE);
final String dnName = dn.getXferAddr();
//wait for the node to enter maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
// verify fsck returns Healthy status
String fsckOut = runFsck(conf, 0, true, testFile, "-maintenance");
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// Start 3rd DataNode and wait for node to reach in maintenance state
cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack3" }, new String[] { "host3" }, null, false);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// verify fsck returns Healthy status
String fsckOut = runFsck(conf, 0, true, testFile, "-maintenance");
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
// verify fsck returns Healthy status even without maintenance option
fsckOut = runFsck(conf, 0, true, testFile);
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestFsck method testFsck.
/** do fsck. */
@Test
public void testFsck() throws Exception {
DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFsck").setNumFiles(20).build();
FileSystem fs = null;
final long precision = 1L;
conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, precision);
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
fs = cluster.getFileSystem();
final String fileName = "/srcdat";
util.createFiles(fs, fileName);
util.waitReplication(fs, fileName, (short) 3);
final Path file = new Path(fileName);
long aTime = fs.getFileStatus(file).getAccessTime();
Thread.sleep(precision);
setupAuditLogs();
String outStr = runFsck(conf, 0, true, "/");
verifyAuditLogs();
assertEquals(aTime, fs.getFileStatus(file).getAccessTime());
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
shutdownCluster();
// restart the cluster; bring up namenode but not the data nodes
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(false).build();
outStr = runFsck(conf, 1, true, "/");
// expect the result is corrupt
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
System.out.println(outStr);
// bring up data nodes & cleanup cluster
cluster.startDataNodes(conf, 4, true, null, null);
cluster.waitActive();
cluster.waitClusterUp();
fs = cluster.getFileSystem();
util.cleanup(fs, "/srcdat");
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestListCorruptFileBlocks method testlistCorruptFileBlocks.
// deliberately remove blocks from a file and validate the list-corrupt-file-blocks API
@Test(timeout = 300000)
public void testlistCorruptFileBlocks() throws Exception {
Configuration conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
// datanode scans
conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
// directories
FileSystem fs = null;
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).build();
cluster.waitActive();
fs = cluster.getFileSystem();
DFSTestUtil util = new DFSTestUtil.Builder().setName("testGetCorruptFiles").setNumFiles(3).setMaxLevels(1).setMaxSize(1024).build();
util.createFiles(fs, "/corruptData");
final NameNode namenode = cluster.getNameNode();
Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = namenode.getNamesystem().listCorruptFileBlocks("/corruptData", null);
int numCorrupt = corruptFileBlocks.size();
assertTrue(numCorrupt == 0);
// delete the blocks
String bpid = cluster.getNamesystem().getBlockPoolId();
for (int i = 0; i < 4; i++) {
for (int j = 0; j <= 1; j++) {
File storageDir = cluster.getInstanceStorageDir(i, j);
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
if (metadataFiles == null)
continue;
// (blocks.length > 0));
for (File metadataFile : metadataFiles) {
File blockFile = Block.metaToBlockFile(metadataFile);
LOG.info("Deliberately removing file " + blockFile.getName());
assertTrue("Cannot remove file.", blockFile.delete());
LOG.info("Deliberately removing file " + metadataFile.getName());
assertTrue("Cannot remove file.", metadataFile.delete());
// break;
}
}
}
int count = 0;
corruptFileBlocks = namenode.getNamesystem().listCorruptFileBlocks("/corruptData", null);
numCorrupt = corruptFileBlocks.size();
while (numCorrupt < 3) {
Thread.sleep(1000);
corruptFileBlocks = namenode.getNamesystem().listCorruptFileBlocks("/corruptData", null);
numCorrupt = corruptFileBlocks.size();
count++;
if (count > 30)
break;
}
// Validate we get all the corrupt files
LOG.info("Namenode has bad files. " + numCorrupt);
assertTrue(numCorrupt == 3);
// test the paging here
FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks.toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
// now get the 2nd and 3rd file that is corrupt
String[] cookie = new String[] { "1" };
Collection<FSNamesystem.CorruptFileBlockInfo> nextCorruptFileBlocks = namenode.getNamesystem().listCorruptFileBlocks("/corruptData", cookie);
FSNamesystem.CorruptFileBlockInfo[] ncfb = nextCorruptFileBlocks.toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
numCorrupt = nextCorruptFileBlocks.size();
assertTrue(numCorrupt == 2);
assertTrue(ncfb[0].block.getBlockName().equalsIgnoreCase(cfb[1].block.getBlockName()));
corruptFileBlocks = namenode.getNamesystem().listCorruptFileBlocks("/corruptData", cookie);
numCorrupt = corruptFileBlocks.size();
assertTrue(numCorrupt == 0);
// Do a listing on a dir which doesn't have any corrupt blocks and
// validate
util.createFiles(fs, "/goodData");
corruptFileBlocks = namenode.getNamesystem().listCorruptFileBlocks("/goodData", null);
numCorrupt = corruptFileBlocks.size();
assertTrue(numCorrupt == 0);
util.cleanup(fs, "/corruptData");
util.cleanup(fs, "/goodData");
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestListCorruptFileBlocks method testlistCorruptFileBlocksDFS.
/**
* test listCorruptFileBlocks in DistributedFileSystem
*/
@Test(timeout = 300000)
public void testlistCorruptFileBlocksDFS() throws Exception {
Configuration conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
// datanode scans
conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
// directories
FileSystem fs = null;
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).build();
cluster.waitActive();
fs = cluster.getFileSystem();
DistributedFileSystem dfs = (DistributedFileSystem) fs;
DFSTestUtil util = new DFSTestUtil.Builder().setName("testGetCorruptFiles").setNumFiles(3).setMaxLevels(1).setMaxSize(1024).build();
util.createFiles(fs, "/corruptData");
RemoteIterator<Path> corruptFileBlocks = dfs.listCorruptFileBlocks(new Path("/corruptData"));
int numCorrupt = countPaths(corruptFileBlocks);
assertTrue(numCorrupt == 0);
// delete the blocks
String bpid = cluster.getNamesystem().getBlockPoolId();
// For loop through number of datadirectories per datanode (2)
for (int i = 0; i < 2; i++) {
File storageDir = cluster.getInstanceStorageDir(0, i);
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
if (metadataFiles == null)
continue;
// (blocks.length > 0));
for (File metadataFile : metadataFiles) {
File blockFile = Block.metaToBlockFile(metadataFile);
LOG.info("Deliberately removing file " + blockFile.getName());
assertTrue("Cannot remove file.", blockFile.delete());
LOG.info("Deliberately removing file " + metadataFile.getName());
assertTrue("Cannot remove file.", metadataFile.delete());
// break;
}
}
int count = 0;
corruptFileBlocks = dfs.listCorruptFileBlocks(new Path("/corruptData"));
numCorrupt = countPaths(corruptFileBlocks);
while (numCorrupt < 3) {
Thread.sleep(1000);
corruptFileBlocks = dfs.listCorruptFileBlocks(new Path("/corruptData"));
numCorrupt = countPaths(corruptFileBlocks);
count++;
if (count > 30)
break;
}
// Validate we get all the corrupt files
LOG.info("Namenode has bad files. " + numCorrupt);
assertTrue(numCorrupt == 3);
util.cleanup(fs, "/corruptData");
util.cleanup(fs, "/goodData");
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
Aggregations