use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.
the class TestDataNodeErasureCodingMetrics method getComputedDatanodeWork.
private int getComputedDatanodeWork() throws IOException, InterruptedException {
final BlockManager bm = cluster.getNamesystem().getBlockManager();
// Giving a grace period to compute datanode work.
int workCount = 0;
int retries = 20;
while (retries > 0) {
workCount = BlockManagerTestUtil.getComputedDatanodeWork(bm);
if (workCount > 0) {
break;
}
retries--;
Thread.sleep(500);
}
LOG.info("Computed datanode work: " + workCount + ", retries: " + retries);
return workCount;
}
use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.
the class TestDataNodeVolumeFailure method testVolumeFailure.
/*
* Verify the number of blocks and files are correct after volume failure,
* and that we can replicate to both datanodes even after a single volume
* failure if the configuration parameter allows this.
*/
@Test(timeout = 120000)
public void testVolumeFailure() throws Exception {
System.out.println("Data dir: is " + dataDir.getPath());
// Data dir structure is dataDir/data[1-4]/[current,tmp...]
// data1,2 is for datanode 1, data2,3 - datanode2
String filename = "/test.txt";
Path filePath = new Path(filename);
// we use only small number of blocks to avoid creating subdirs in the data dir..
int filesize = block_size * blocks_num;
DFSTestUtil.createFile(fs, filePath, filesize, repl, 1L);
DFSTestUtil.waitReplication(fs, filePath, repl);
System.out.println("file " + filename + "(size " + filesize + ") is created and replicated");
// fail the volume
// delete/make non-writable one of the directories (failed volume)
data_fail = new File(dataDir, "data3");
failedDir = MiniDFSCluster.getFinalizedDir(data_fail, cluster.getNamesystem().getBlockPoolId());
if (failedDir.exists() && //!FileUtil.fullyDelete(failedDir)
!deteteBlocks(failedDir)) {
throw new IOException("Could not delete hdfs directory '" + failedDir + "'");
}
data_fail.setReadOnly();
failedDir.setReadOnly();
System.out.println("Deleteing " + failedDir.getPath() + "; exist=" + failedDir.exists());
// access all the blocks on the "failed" DataNode,
// we need to make sure that the "failed" volume is being accessed -
// and that will cause failure, blocks removal, "emergency" block report
triggerFailure(filename, filesize);
// DN eventually have latest volume failure information for next heartbeat
final DataNode dn = cluster.getDataNodes().get(1);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
final VolumeFailureSummary summary = dn.getFSDataset().getVolumeFailureSummary();
return summary != null && summary.getFailedStorageLocations() != null && summary.getFailedStorageLocations().length == 1;
}
}, 10, 30 * 1000);
// trigger DN to send heartbeat
DataNodeTestUtils.triggerHeartbeat(dn);
final BlockManager bm = cluster.getNamesystem().getBlockManager();
// trigger NN handel heartbeat
BlockManagerTestUtil.checkHeartbeat(bm);
// NN now should have latest volume failure
assertEquals(1, cluster.getNamesystem().getVolumeFailuresTotal());
// verify number of blocks and files...
verify(filename, filesize);
// create another file (with one volume failed).
System.out.println("creating file test1.txt");
Path fileName1 = new Path("/test1.txt");
DFSTestUtil.createFile(fs, fileName1, filesize, repl, 1L);
// should be able to replicate to both nodes (2 DN, repl=2)
DFSTestUtil.waitReplication(fs, fileName1, repl);
System.out.println("file " + fileName1.getName() + " is created and replicated");
}
use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.
the class TestReconstructStripedBlocks method testCountLiveReplicas.
/**
* make sure the NN can detect the scenario where there are enough number of
* internal blocks (>=9 by default) but there is still missing data/parity
* block.
*/
@Test
public void testCountLiveReplicas() throws Exception {
final HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false);
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
cluster.waitActive();
DistributedFileSystem fs = cluster.getFileSystem();
try {
fs.mkdirs(dirPath);
fs.setErasureCodingPolicy(dirPath, StripedFileTestUtil.getDefaultECPolicy().getName());
DFSTestUtil.createFile(fs, filePath, cellSize * dataBlocks * 2, (short) 1, 0L);
// stop a dn
LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
DatanodeInfo dnToStop = block.getLocations()[0];
MiniDFSCluster.DataNodeProperties dnProp = cluster.stopDataNode(dnToStop.getXferAddr());
cluster.setDataNodeDead(dnToStop);
// wait for reconstruction to happen
DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000);
// bring the dn back: 10 internal blocks now
cluster.restartDataNode(dnProp);
cluster.waitActive();
// stop another dn: 9 internal blocks, but only cover 8 real one
dnToStop = block.getLocations()[1];
cluster.stopDataNode(dnToStop.getXferAddr());
cluster.setDataNodeDead(dnToStop);
// currently namenode is able to track the missing block. but restart NN
cluster.restartNameNode(true);
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.triggerBlockReport(dn);
}
FSNamesystem fsn = cluster.getNamesystem();
BlockManager bm = fsn.getBlockManager();
// wait 3 running cycles of redundancy monitor
Thread.sleep(3000);
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.triggerHeartbeat(dn);
}
// check if NN can detect the missing internal block and finish the
// reconstruction
StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
boolean reconstructed = false;
for (int i = 0; i < 5; i++) {
NumberReplicas num = null;
fsn.readLock();
try {
BlockInfo blockInfo = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile().getLastBlock();
num = bm.countNodes(blockInfo);
} finally {
fsn.readUnlock();
}
if (num.liveReplicas() >= groupSize) {
reconstructed = true;
break;
} else {
Thread.sleep(1000);
}
}
Assert.assertTrue(reconstructed);
blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
block = (LocatedStripedBlock) blks.getLastLocatedBlock();
BitSet bitSet = new BitSet(groupSize);
for (byte index : block.getBlockIndices()) {
bitSet.set(index);
}
for (int i = 0; i < groupSize; i++) {
Assert.assertTrue(bitSet.get(i));
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.
the class TestReconstructStripedBlocks method test2RecoveryTasksForSameBlockGroup.
@Test
public void test2RecoveryTasksForSameBlockGroup() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1000);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1000);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
try {
cluster.waitActive();
DistributedFileSystem fs = cluster.getFileSystem();
BlockManager bm = cluster.getNamesystem().getBlockManager();
fs.getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName());
int fileLen = dataBlocks * blockSize;
Path p = new Path("/test2RecoveryTasksForSameBlockGroup");
final byte[] data = new byte[fileLen];
DFSTestUtil.writeFile(fs, p, data);
LocatedStripedBlock lb = (LocatedStripedBlock) fs.getClient().getLocatedBlocks(p.toString(), 0).get(0);
LocatedBlock[] lbs = StripedBlockUtil.parseStripedBlockGroup(lb, cellSize, dataBlocks, parityBlocks);
assertEquals(0, getNumberOfBlocksToBeErasureCoded(cluster));
assertEquals(0, bm.getPendingReconstructionBlocksCount());
// missing 1 block, so 1 task should be scheduled
DatanodeInfo dn0 = lbs[0].getLocations()[0];
cluster.stopDataNode(dn0.getName());
cluster.setDataNodeDead(dn0);
BlockManagerTestUtil.getComputedDatanodeWork(bm);
assertEquals(1, getNumberOfBlocksToBeErasureCoded(cluster));
assertEquals(1, bm.getPendingReconstructionBlocksCount());
// missing another block, but no new task should be scheduled because
// previous task isn't finished.
DatanodeInfo dn1 = lbs[1].getLocations()[0];
cluster.stopDataNode(dn1.getName());
cluster.setDataNodeDead(dn1);
BlockManagerTestUtil.getComputedDatanodeWork(bm);
assertEquals(1, getNumberOfBlocksToBeErasureCoded(cluster));
assertEquals(1, bm.getPendingReconstructionBlocksCount());
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.
the class NamenodeWebHdfsMethods method chooseDatanode.
@VisibleForTesting
static DatanodeInfo chooseDatanode(final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset, final long blocksize, final String excludeDatanodes, final String remoteAddr) throws IOException {
FSNamesystem fsn = namenode.getNamesystem();
if (fsn == null) {
throw new IOException("Namesystem has not been intialized yet.");
}
final BlockManager bm = fsn.getBlockManager();
HashSet<Node> excludes = new HashSet<Node>();
if (excludeDatanodes != null) {
for (String host : StringUtils.getTrimmedStringCollection(excludeDatanodes)) {
int idx = host.indexOf(":");
if (idx != -1) {
excludes.add(bm.getDatanodeManager().getDatanodeByXferAddr(host.substring(0, idx), Integer.parseInt(host.substring(idx + 1))));
} else {
excludes.add(bm.getDatanodeManager().getDatanodeByHost(host));
}
}
}
if (op == PutOpParam.Op.CREATE) {
//choose a datanode near to client
final DatanodeDescriptor clientNode = bm.getDatanodeManager().getDatanodeByHost(remoteAddr);
if (clientNode != null) {
final DatanodeStorageInfo[] storages = bm.chooseTarget4WebHDFS(path, clientNode, excludes, blocksize);
if (storages.length > 0) {
return storages[0].getDatanodeDescriptor();
}
}
} else if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) {
//choose a datanode containing a replica
final NamenodeProtocols np = getRPCServer(namenode);
final HdfsFileStatus status = np.getFileInfo(path);
if (status == null) {
throw new FileNotFoundException("File " + path + " not found.");
}
final long len = status.getLen();
if (op == GetOpParam.Op.OPEN) {
if (openOffset < 0L || (openOffset >= len && len > 0)) {
throw new IOException("Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path);
}
}
if (len > 0) {
final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
final int count = locations.locatedBlockCount();
if (count > 0) {
return bestNode(locations.get(0).getLocations(), excludes);
}
}
}
return (DatanodeDescriptor) bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT, excludes);
}
Aggregations