use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestFsck method testBlockIdCKDecommission.
/**
* Test for blockIdCK with datanode decommission.
*/
@Test
public void testBlockIdCKDecommission() throws Exception {
final short replFactor = 1;
short numDn = 2;
final long blockSize = 512;
boolean checkDecommissionInProgress = false;
String[] racks = { "/rack1", "/rack2" };
String[] hosts = { "host1", "host2" };
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
DistributedFileSystem dfs;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
dfs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
DFSTestUtil util = new DFSTestUtil.Builder().setName(getClass().getSimpleName()).setNumFiles(1).build();
//create files
final String pathString = new String("/testfile");
final Path path = new Path(pathString);
util.createFile(dfs, path, 1024, replFactor, 1000L);
util.waitReplication(dfs, path, replFactor);
StringBuilder sb = new StringBuilder();
for (LocatedBlock lb : util.getAllBlocks(dfs, path)) {
sb.append(lb.getBlock().getLocalBlock().getBlockName() + " ");
}
String[] bIds = sb.toString().split(" ");
//make sure datanode that has replica is fine before decommission
String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
//decommission datanode
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
ExtendedBlock eb = util.getFirstBlock(dfs, path);
BlockCollection bc = null;
try {
fsn.writeLock();
BlockInfo bi = bm.getStoredBlock(eb.getLocalBlock());
bc = fsn.getBlockCollection(bi);
} finally {
fsn.writeUnlock();
}
DatanodeDescriptor dn = bc.getBlocks()[0].getDatanode(0);
bm.getDatanodeManager().getDecomManager().startDecommission(dn);
String dnName = dn.getXferAddr();
//wait for decommission start
DatanodeInfo datanodeInfo = null;
int count = 0;
do {
Thread.sleep(2000);
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
//check decommissioning only once
if (!checkDecommissionInProgress && datanodeInfo != null && datanodeInfo.isDecommissionInProgress()) {
String fsckOut = runFsck(conf, 3, true, "/", "-blockId", bIds[0]);
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONING_STATUS));
checkDecommissionInProgress = true;
}
} while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
//check decommissioned
String fsckOut = runFsck(conf, 2, true, "/", "-blockId", bIds[0]);
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONED_STATUS));
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestFsck method testFsckWithDecommissionedReplicas.
/**
* Test for blocks on decommissioning hosts are not shown as missing.
*/
@Test
public void testFsckWithDecommissionedReplicas() throws Exception {
final short replFactor = 1;
short numDn = 2;
final long blockSize = 512;
final long fileSize = 1024;
boolean checkDecommissionInProgress = false;
String[] racks = { "/rack1", "/rack2" };
String[] hosts = { "host1", "host2" };
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
DistributedFileSystem dfs;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
dfs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
DFSTestUtil util = new DFSTestUtil.Builder().setName(getClass().getSimpleName()).setNumFiles(1).build();
//create files
final String testFile = new String("/testfile");
final Path path = new Path(testFile);
util.createFile(dfs, path, fileSize, replFactor, 1000L);
util.waitReplication(dfs, path, replFactor);
// make sure datanode that has replica is fine before decommission
String outStr = runFsck(conf, 0, true, testFile);
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
// decommission datanode
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
ExtendedBlock eb = util.getFirstBlock(dfs, path);
BlockCollection bc = null;
try {
fsn.writeLock();
BlockInfo bi = bm.getStoredBlock(eb.getLocalBlock());
bc = fsn.getBlockCollection(bi);
} finally {
fsn.writeUnlock();
}
DatanodeDescriptor dn = bc.getBlocks()[0].getDatanode(0);
bm.getDatanodeManager().getDecomManager().startDecommission(dn);
String dnName = dn.getXferAddr();
// wait for decommission start
DatanodeInfo datanodeInfo = null;
int count = 0;
do {
Thread.sleep(2000);
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
// instead of corruption (1) during decommissioning
if (!checkDecommissionInProgress && datanodeInfo != null && datanodeInfo.isDecommissionInProgress()) {
String fsckOut = runFsck(conf, 0, true, testFile);
checkDecommissionInProgress = true;
}
} while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
// check the replica status should be healthy(0) after decommission
// is done
String fsckOut = runFsck(conf, 0, true, testFile);
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestFsck method testFsckPermission.
/** Test fsck with permission set on inodes. */
@Test
public void testFsckPermission() throws Exception {
final DFSTestUtil util = new DFSTestUtil.Builder().setName(getClass().getSimpleName()).setNumFiles(20).build();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
// Create a cluster with the current user, write some files
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
final MiniDFSCluster c2 = cluster;
final String dir = "/dfsck";
final Path dirpath = new Path(dir);
final FileSystem fs = c2.getFileSystem();
util.createFiles(fs, dir);
util.waitReplication(fs, dir, (short) 3);
fs.setPermission(dirpath, new FsPermission((short) 0700));
// run DFSck as another user, should fail with permission issue
UserGroupInformation fakeUGI = UserGroupInformation.createUserForTesting("ProbablyNotARealUserName", new String[] { "ShangriLa" });
fakeUGI.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
System.out.println(runFsck(conf, -1, true, dir));
return null;
}
});
// set permission and try DFSck again as the fake user, should succeed
fs.setPermission(dirpath, new FsPermission((short) 0777));
fakeUGI.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
final String outStr = runFsck(conf, 0, true, dir);
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
return null;
}
});
util.cleanup(fs, dir);
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestListCorruptFileBlocks method testListCorruptFilesCorruptedBlock.
/** check if nn.getCorruptFiles() returns a file that has corrupted blocks */
@Test(timeout = 300000)
public void testListCorruptFilesCorruptedBlock() throws Exception {
MiniDFSCluster cluster = null;
Random random = new Random();
try {
Configuration conf = new HdfsConfiguration();
// datanode scans directories
conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
// datanode sends block reports
conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000);
// Set short retry timeouts so this test runs faster
conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
cluster = new MiniDFSCluster.Builder(conf).build();
FileSystem fs = cluster.getFileSystem();
// create two files with one block each
DFSTestUtil util = new DFSTestUtil.Builder().setName("testCorruptFilesCorruptedBlock").setNumFiles(2).setMaxLevels(1).setMaxSize(512).build();
util.createFiles(fs, "/srcdat10");
// fetch bad file list from namenode. There should be none.
final NameNode namenode = cluster.getNameNode();
Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null);
assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting None.", badFiles.size() == 0);
// Now deliberately corrupt one block
String bpid = cluster.getNamesystem().getBlockPoolId();
File storageDir = cluster.getInstanceStorageDir(0, 1);
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
assertTrue("data directory does not exist", data_dir.exists());
List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
assertTrue("Data directory does not contain any blocks or there was an " + "IO error", metaFiles != null && !metaFiles.isEmpty());
File metaFile = metaFiles.get(0);
RandomAccessFile file = new RandomAccessFile(metaFile, "rw");
FileChannel channel = file.getChannel();
long position = channel.size() - 2;
int length = 2;
byte[] buffer = new byte[length];
random.nextBytes(buffer);
channel.write(ByteBuffer.wrap(buffer), position);
file.close();
LOG.info("Deliberately corrupting file " + metaFile.getName() + " at offset " + position + " length " + length);
// read all files to trigger detection of corrupted replica
try {
util.checkFiles(fs, "/srcdat10");
} catch (BlockMissingException e) {
System.out.println("Received BlockMissingException as expected.");
} catch (IOException e) {
assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " + " but received IOException " + e, false);
}
// fetch bad file list from namenode. There should be one file.
badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null);
LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", badFiles.size() == 1);
util.cleanup(fs, "/srcdat10");
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.DFSTestUtil in project hadoop by apache.
the class TestListCorruptFileBlocks method testListCorruptFileBlocksInSafeMode.
/**
* Check that listCorruptFileBlocks works while the namenode is still in safemode.
*/
@Test(timeout = 300000)
public void testListCorruptFileBlocksInSafeMode() throws Exception {
MiniDFSCluster cluster = null;
Random random = new Random();
try {
Configuration conf = new HdfsConfiguration();
// datanode scans directories
conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
// datanode sends block reports
conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000);
// never leave safemode automatically
conf.setFloat(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 1.5f);
// start populating repl queues immediately
conf.setFloat(DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 0f);
// Set short retry timeouts so this test runs faster
conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
cluster = new MiniDFSCluster.Builder(conf).waitSafeMode(false).build();
cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, false);
FileSystem fs = cluster.getFileSystem();
// create two files with one block each
DFSTestUtil util = new DFSTestUtil.Builder().setName("testListCorruptFileBlocksInSafeMode").setNumFiles(2).setMaxLevels(1).setMaxSize(512).build();
util.createFiles(fs, "/srcdat10");
// fetch bad file list from namenode. There should be none.
Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null);
assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting None.", badFiles.size() == 0);
// Now deliberately corrupt one block
File storageDir = cluster.getInstanceStorageDir(0, 0);
File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, cluster.getNamesystem().getBlockPoolId());
assertTrue("data directory does not exist", data_dir.exists());
List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
assertTrue("Data directory does not contain any blocks or there was an " + "IO error", metaFiles != null && !metaFiles.isEmpty());
File metaFile = metaFiles.get(0);
RandomAccessFile file = new RandomAccessFile(metaFile, "rw");
FileChannel channel = file.getChannel();
long position = channel.size() - 2;
int length = 2;
byte[] buffer = new byte[length];
random.nextBytes(buffer);
channel.write(ByteBuffer.wrap(buffer), position);
file.close();
LOG.info("Deliberately corrupting file " + metaFile.getName() + " at offset " + position + " length " + length);
// read all files to trigger detection of corrupted replica
try {
util.checkFiles(fs, "/srcdat10");
} catch (BlockMissingException e) {
System.out.println("Received BlockMissingException as expected.");
} catch (IOException e) {
assertTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false);
}
// fetch bad file list from namenode. There should be one file.
badFiles = cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null);
LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", badFiles.size() == 1);
// restart namenode
cluster.restartNameNode(0);
fs = cluster.getFileSystem();
// wait until replication queues have been initialized
while (!cluster.getNameNode().namesystem.getBlockManager().isPopulatingReplQueues()) {
try {
LOG.info("waiting for replication queues");
Thread.sleep(1000);
} catch (InterruptedException ignore) {
}
}
// read all files to trigger detection of corrupted replica
try {
util.checkFiles(fs, "/srcdat10");
} catch (BlockMissingException e) {
System.out.println("Received BlockMissingException as expected.");
} catch (IOException e) {
assertTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false);
}
// fetch bad file list from namenode. There should be one file.
badFiles = cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null);
LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", badFiles.size() == 1);
// check that we are still in safe mode
assertTrue("Namenode is not in safe mode", cluster.getNameNode().isInSafeMode());
// now leave safe mode so that we can clean up
cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, false);
util.cleanup(fs, "/srcdat10");
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
throw e;
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
Aggregations