use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.
the class TestReconstructStripedBlocks method testCountLiveReplicas.
/**
* make sure the NN can detect the scenario where there are enough number of
* internal blocks (>=9 by default) but there is still missing data/parity
* block.
*/
@Test
public void testCountLiveReplicas() throws Exception {
final HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false);
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
cluster.waitActive();
DistributedFileSystem fs = cluster.getFileSystem();
try {
fs.mkdirs(dirPath);
fs.setErasureCodingPolicy(dirPath, StripedFileTestUtil.getDefaultECPolicy().getName());
DFSTestUtil.createFile(fs, filePath, cellSize * dataBlocks * 2, (short) 1, 0L);
// stop a dn
LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
DatanodeInfo dnToStop = block.getLocations()[0];
MiniDFSCluster.DataNodeProperties dnProp = cluster.stopDataNode(dnToStop.getXferAddr());
cluster.setDataNodeDead(dnToStop);
// wait for reconstruction to happen
DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000);
// bring the dn back: 10 internal blocks now
cluster.restartDataNode(dnProp);
cluster.waitActive();
// stop another dn: 9 internal blocks, but only cover 8 real one
dnToStop = block.getLocations()[1];
cluster.stopDataNode(dnToStop.getXferAddr());
cluster.setDataNodeDead(dnToStop);
// currently namenode is able to track the missing block. but restart NN
cluster.restartNameNode(true);
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.triggerBlockReport(dn);
}
FSNamesystem fsn = cluster.getNamesystem();
BlockManager bm = fsn.getBlockManager();
// wait 3 running cycles of redundancy monitor
Thread.sleep(3000);
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.triggerHeartbeat(dn);
}
// check if NN can detect the missing internal block and finish the
// reconstruction
StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
boolean reconstructed = false;
for (int i = 0; i < 5; i++) {
NumberReplicas num = null;
fsn.readLock();
try {
BlockInfo blockInfo = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile().getLastBlock();
num = bm.countNodes(blockInfo);
} finally {
fsn.readUnlock();
}
if (num.liveReplicas() >= groupSize) {
reconstructed = true;
break;
} else {
Thread.sleep(1000);
}
}
Assert.assertTrue(reconstructed);
blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
block = (LocatedStripedBlock) blks.getLastLocatedBlock();
BitSet bitSet = new BitSet(groupSize);
for (byte index : block.getBlockIndices()) {
bitSet.set(index);
}
for (int i = 0; i < groupSize; i++) {
Assert.assertTrue(bitSet.get(i));
}
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.
the class NamenodeFsck method blockIdCK.
/**
* Check block information given a blockId number
*
*/
public void blockIdCK(String blockId) {
if (blockId == null) {
out.println("Please provide valid blockId!");
return;
}
try {
//get blockInfo
Block block = new Block(Block.getBlockId(blockId));
//find which file this block belongs to
BlockInfo blockInfo = blockManager.getStoredBlock(block);
if (blockInfo == null) {
out.println("Block " + blockId + " " + NONEXISTENT_STATUS);
LOG.warn("Block " + blockId + " " + NONEXISTENT_STATUS);
return;
}
final INodeFile iNode = namenode.getNamesystem().getBlockCollection(blockInfo);
NumberReplicas numberReplicas = blockManager.countNodes(blockInfo);
out.println("Block Id: " + blockId);
out.println("Block belongs to: " + iNode.getFullPathName());
out.println("No. of Expected Replica: " + blockManager.getExpectedRedundancyNum(blockInfo));
out.println("No. of live Replica: " + numberReplicas.liveReplicas());
out.println("No. of excess Replica: " + numberReplicas.excessReplicas());
out.println("No. of stale Replica: " + numberReplicas.replicasOnStaleNodes());
out.println("No. of decommissioned Replica: " + numberReplicas.decommissioned());
out.println("No. of decommissioning Replica: " + numberReplicas.decommissioning());
if (this.showMaintenanceState) {
out.println("No. of entering maintenance Replica: " + numberReplicas.liveEnteringMaintenanceReplicas());
out.println("No. of in maintenance Replica: " + numberReplicas.maintenanceNotForReadReplicas());
}
out.println("No. of corrupted Replica: " + numberReplicas.corruptReplicas());
//record datanodes that have corrupted block replica
Collection<DatanodeDescriptor> corruptionRecord = null;
if (blockManager.getCorruptReplicas(block) != null) {
corruptionRecord = blockManager.getCorruptReplicas(block);
}
//report block replicas status on datanodes
for (int idx = (blockInfo.numNodes() - 1); idx >= 0; idx--) {
DatanodeDescriptor dn = blockInfo.getDatanode(idx);
out.print("Block replica on datanode/rack: " + dn.getHostName() + dn.getNetworkLocation() + " ");
if (corruptionRecord != null && corruptionRecord.contains(dn)) {
out.print(CORRUPT_STATUS + "\t ReasonCode: " + blockManager.getCorruptReason(block, dn));
} else if (dn.isDecommissioned()) {
out.print(DECOMMISSIONED_STATUS);
} else if (dn.isDecommissionInProgress()) {
out.print(DECOMMISSIONING_STATUS);
} else if (this.showMaintenanceState && dn.isEnteringMaintenance()) {
out.print(ENTERING_MAINTENANCE_STATUS);
} else if (this.showMaintenanceState && dn.isInMaintenance()) {
out.print(IN_MAINTENANCE_STATUS);
} else {
out.print(HEALTHY_STATUS);
}
out.print("\n");
}
} catch (Exception e) {
String errMsg = "Fsck on blockId '" + blockId;
LOG.warn(errMsg, e);
out.println(e.getMessage());
out.print("\n\n" + errMsg);
LOG.warn("Error in looking up block", e);
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.
the class TestAddStripedBlockInFBR method testAddBlockInFullBlockReport.
@Test
public void testAddBlockInFullBlockReport() throws Exception {
BlockManager spy = Mockito.spy(cluster.getNamesystem().getBlockManager());
// let NN ignore one DataNode's IBR
final DataNode dn = cluster.getDataNodes().get(0);
final DatanodeID datanodeID = dn.getDatanodeId();
Mockito.doNothing().when(spy).processIncrementalBlockReport(Mockito.eq(datanodeID), Mockito.any());
Whitebox.setInternalState(cluster.getNamesystem(), "blockManager", spy);
final Path ecDir = new Path("/ec");
final Path repDir = new Path("/rep");
dfs.mkdirs(ecDir);
dfs.mkdirs(repDir);
dfs.getClient().setErasureCodingPolicy(ecDir.toString(), StripedFileTestUtil.getDefaultECPolicy().getName());
// create several non-EC files and one EC file
final Path[] repFiles = new Path[groupSize];
for (int i = 0; i < groupSize; i++) {
repFiles[i] = new Path(repDir, "f" + i);
DFSTestUtil.createFile(dfs, repFiles[i], 1L, (short) 3, 0L);
}
final Path ecFile = new Path(ecDir, "f");
DFSTestUtil.createFile(dfs, ecFile, cellSize * dataBlocks, (short) 1, 0L);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
// trigger dn's FBR. The FBR will add block-dn mapping.
cluster.triggerBlockReports();
// make sure NN has correct block-dn mapping
BlockInfoStriped blockInfo = (BlockInfoStriped) cluster.getNamesystem().getFSDirectory().getINode(ecFile.toString()).asFile().getLastBlock();
NumberReplicas nr = spy.countNodes(blockInfo);
return nr.excessReplicas() == 0 && nr.liveReplicas() == groupSize;
} catch (Exception ignored) {
// Ignore the exception
}
return false;
}
}, 3000, 60000);
}
use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.
the class NamenodeFsck method collectBlocksSummary.
private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res, LocatedBlocks blocks) throws IOException {
String path = file.getFullName(parent);
boolean isOpen = blocks.isUnderConstruction();
if (isOpen && !showOpenFiles) {
return;
}
int missing = 0;
int corrupt = 0;
long missize = 0;
long corruptSize = 0;
int underReplicatedPerFile = 0;
int misReplicatedPerFile = 0;
StringBuilder report = new StringBuilder();
int blockNumber = 0;
final LocatedBlock lastBlock = blocks.getLastLocatedBlock();
for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
ExtendedBlock block = lBlk.getBlock();
if (!blocks.isLastBlockComplete() && lastBlock != null && lastBlock.getBlock().equals(block)) {
// it is under construction
continue;
}
final BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
final int minReplication = blockManager.getMinStorageNum(storedBlock);
// count decommissionedReplicas / decommissioningReplicas
NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
int decommissionedReplicas = numberReplicas.decommissioned();
int decommissioningReplicas = numberReplicas.decommissioning();
int enteringMaintenanceReplicas = numberReplicas.liveEnteringMaintenanceReplicas();
int inMaintenanceReplicas = numberReplicas.maintenanceNotForReadReplicas();
res.decommissionedReplicas += decommissionedReplicas;
res.decommissioningReplicas += decommissioningReplicas;
res.enteringMaintenanceReplicas += enteringMaintenanceReplicas;
res.inMaintenanceReplicas += inMaintenanceReplicas;
// count total replicas
int liveReplicas = numberReplicas.liveReplicas();
int totalReplicasPerBlock = liveReplicas + decommissionedReplicas + decommissioningReplicas + enteringMaintenanceReplicas + inMaintenanceReplicas;
res.totalReplicas += totalReplicasPerBlock;
boolean isMissing;
if (storedBlock.isStriped()) {
isMissing = totalReplicasPerBlock < minReplication;
} else {
isMissing = totalReplicasPerBlock == 0;
}
// count expected replicas
short targetFileReplication;
if (file.getErasureCodingPolicy() != null) {
assert storedBlock instanceof BlockInfoStriped;
targetFileReplication = ((BlockInfoStriped) storedBlock).getRealTotalBlockNum();
} else {
targetFileReplication = file.getReplication();
}
res.numExpectedReplicas += targetFileReplication;
// count under min repl'd blocks
if (totalReplicasPerBlock < minReplication) {
res.numUnderMinReplicatedBlocks++;
}
// count excessive Replicas / over replicated blocks
if (liveReplicas > targetFileReplication) {
res.excessiveReplicas += (liveReplicas - targetFileReplication);
res.numOverReplicatedBlocks += 1;
}
// count corrupt blocks
boolean isCorrupt = lBlk.isCorrupt();
if (isCorrupt) {
res.addCorrupt(block.getNumBytes());
corrupt++;
corruptSize += block.getNumBytes();
out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName() + "\n");
}
// count minimally replicated blocks
if (totalReplicasPerBlock >= minReplication)
res.numMinReplicatedBlocks++;
// count missing replicas / under replicated blocks
if (totalReplicasPerBlock < targetFileReplication && !isMissing) {
res.missingReplicas += (targetFileReplication - totalReplicasPerBlock);
res.numUnderReplicatedBlocks += 1;
underReplicatedPerFile++;
if (!showFiles) {
out.print("\n" + path + ": ");
}
out.println(" Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + liveReplicas + " live replica(s), " + decommissionedReplicas + " decommissioned replica(s), " + decommissioningReplicas + " decommissioning replica(s)" + (this.showMaintenanceState ? (enteringMaintenanceReplicas + ", entering maintenance replica(s) and " + inMaintenanceReplicas + " in maintenance replica(s).") : "."));
}
// count mis replicated blocks
BlockPlacementStatus blockPlacementStatus = bpPolicies.getPolicy(lBlk.getBlockType()).verifyBlockPlacement(lBlk.getLocations(), targetFileReplication);
if (!blockPlacementStatus.isPlacementPolicySatisfied()) {
res.numMisReplicatedBlocks++;
misReplicatedPerFile++;
if (!showFiles) {
if (underReplicatedPerFile == 0)
out.println();
out.print(path + ": ");
}
out.println(" Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription());
}
// count storage summary
if (this.showStoragePolcies && lBlk.getStorageTypes() != null) {
countStorageTypeSummary(file, lBlk);
}
// report
String blkName = block.toString();
report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
if (isMissing && !isCorrupt) {
// If the block is corrupted, it means all its available replicas are
// corrupted in the case of replication, and it means the state of the
// block group is unrecoverable due to some corrupted intenal blocks in
// the case of EC. We don't mark it as missing given these available
// replicas/internal-blocks might still be accessible as the block might
// be incorrectly marked as corrupted by client machines.
report.append(" MISSING!");
res.addMissing(blkName, block.getNumBytes());
missing++;
missize += block.getNumBytes();
if (storedBlock.isStriped()) {
report.append(" Live_repl=" + liveReplicas);
String info = getReplicaInfo(storedBlock);
if (!info.isEmpty()) {
report.append(" ").append(info);
}
}
} else {
report.append(" Live_repl=" + liveReplicas);
String info = getReplicaInfo(storedBlock);
if (!info.isEmpty()) {
report.append(" ").append(info);
}
}
report.append('\n');
blockNumber++;
}
//display under construction block info.
if (!blocks.isLastBlockComplete() && lastBlock != null) {
ExtendedBlock block = lastBlock.getBlock();
String blkName = block.toString();
BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
DatanodeStorageInfo[] storages = storedBlock.getUnderConstructionFeature().getExpectedStorageLocations();
report.append('\n');
report.append("Under Construction Block:\n");
report.append(blockNumber).append(". ").append(blkName);
report.append(" len=").append(block.getNumBytes());
report.append(" Expected_repl=" + storages.length);
String info = getReplicaInfo(storedBlock);
if (!info.isEmpty()) {
report.append(" ").append(info);
}
}
// count corrupt file & move or delete if necessary
if ((missing > 0) || (corrupt > 0)) {
if (!showFiles) {
if (missing > 0) {
out.print("\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B.");
}
if (corrupt > 0) {
out.print("\n" + path + ": CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B.");
}
}
res.corruptFiles++;
if (isOpen) {
LOG.info("Fsck: ignoring open file " + path);
} else {
if (doMove)
copyBlocksToLostFound(parent, file, blocks);
if (doDelete)
deleteCorruptedFile(path);
}
}
if (showFiles) {
if (missing > 0 || corrupt > 0) {
if (missing > 0) {
out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
}
if (corrupt > 0) {
out.print(" CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B\n");
}
} else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
out.print(" OK\n");
}
if (showBlocks) {
out.print(report + "\n");
}
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.
the class TestReadOnlySharedStorage method validateNumberReplicas.
private void validateNumberReplicas(int expectedReplicas) throws IOException {
NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
assertThat(numberReplicas.liveReplicas(), is(expectedReplicas));
assertThat(numberReplicas.excessReplicas(), is(0));
assertThat(numberReplicas.corruptReplicas(), is(0));
assertThat(numberReplicas.decommissionedAndDecommissioning(), is(0));
assertThat(numberReplicas.replicasOnStaleNodes(), is(0));
BlockManagerTestUtil.updateState(blockManager);
assertThat(blockManager.getUnderReplicatedBlocksCount(), is(0L));
assertThat(blockManager.getExcessBlocksCount(), is(0L));
}
Aggregations