use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.
the class NamenodeFsck method collectBlocksSummary.
private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res, LocatedBlocks blocks) throws IOException {
String path = file.getFullName(parent);
boolean isOpen = blocks.isUnderConstruction();
if (isOpen && !showOpenFiles) {
return;
}
int missing = 0;
int corrupt = 0;
long missize = 0;
long corruptSize = 0;
int underReplicatedPerFile = 0;
int misReplicatedPerFile = 0;
StringBuilder report = new StringBuilder();
int blockNumber = 0;
final LocatedBlock lastBlock = blocks.getLastLocatedBlock();
for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
ExtendedBlock block = lBlk.getBlock();
if (!blocks.isLastBlockComplete() && lastBlock != null && lastBlock.getBlock().equals(block)) {
// it is under construction
continue;
}
final BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
final int minReplication = blockManager.getMinStorageNum(storedBlock);
// count decommissionedReplicas / decommissioningReplicas
NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
int decommissionedReplicas = numberReplicas.decommissioned();
int decommissioningReplicas = numberReplicas.decommissioning();
int enteringMaintenanceReplicas = numberReplicas.liveEnteringMaintenanceReplicas();
int inMaintenanceReplicas = numberReplicas.maintenanceNotForReadReplicas();
res.decommissionedReplicas += decommissionedReplicas;
res.decommissioningReplicas += decommissioningReplicas;
res.enteringMaintenanceReplicas += enteringMaintenanceReplicas;
res.inMaintenanceReplicas += inMaintenanceReplicas;
// count total replicas
int liveReplicas = numberReplicas.liveReplicas();
int totalReplicasPerBlock = liveReplicas + decommissionedReplicas + decommissioningReplicas + enteringMaintenanceReplicas + inMaintenanceReplicas;
res.totalReplicas += totalReplicasPerBlock;
boolean isMissing;
if (storedBlock.isStriped()) {
isMissing = totalReplicasPerBlock < minReplication;
} else {
isMissing = totalReplicasPerBlock == 0;
}
// count expected replicas
short targetFileReplication;
if (file.getErasureCodingPolicy() != null) {
assert storedBlock instanceof BlockInfoStriped;
targetFileReplication = ((BlockInfoStriped) storedBlock).getRealTotalBlockNum();
} else {
targetFileReplication = file.getReplication();
}
res.numExpectedReplicas += targetFileReplication;
// count under min repl'd blocks
if (totalReplicasPerBlock < minReplication) {
res.numUnderMinReplicatedBlocks++;
}
// count excessive Replicas / over replicated blocks
if (liveReplicas > targetFileReplication) {
res.excessiveReplicas += (liveReplicas - targetFileReplication);
res.numOverReplicatedBlocks += 1;
}
// count corrupt blocks
boolean isCorrupt = lBlk.isCorrupt();
if (isCorrupt) {
res.addCorrupt(block.getNumBytes());
corrupt++;
corruptSize += block.getNumBytes();
out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName() + "\n");
}
// count minimally replicated blocks
if (totalReplicasPerBlock >= minReplication)
res.numMinReplicatedBlocks++;
// count missing replicas / under replicated blocks
if (totalReplicasPerBlock < targetFileReplication && !isMissing) {
res.missingReplicas += (targetFileReplication - totalReplicasPerBlock);
res.numUnderReplicatedBlocks += 1;
underReplicatedPerFile++;
if (!showFiles) {
out.print("\n" + path + ": ");
}
out.println(" Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + liveReplicas + " live replica(s), " + decommissionedReplicas + " decommissioned replica(s), " + decommissioningReplicas + " decommissioning replica(s)" + (this.showMaintenanceState ? (enteringMaintenanceReplicas + ", entering maintenance replica(s) and " + inMaintenanceReplicas + " in maintenance replica(s).") : "."));
}
// count mis replicated blocks
BlockPlacementStatus blockPlacementStatus = bpPolicies.getPolicy(lBlk.getBlockType()).verifyBlockPlacement(lBlk.getLocations(), targetFileReplication);
if (!blockPlacementStatus.isPlacementPolicySatisfied()) {
res.numMisReplicatedBlocks++;
misReplicatedPerFile++;
if (!showFiles) {
if (underReplicatedPerFile == 0)
out.println();
out.print(path + ": ");
}
out.println(" Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription());
}
// count storage summary
if (this.showStoragePolcies && lBlk.getStorageTypes() != null) {
countStorageTypeSummary(file, lBlk);
}
// report
String blkName = block.toString();
report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
if (isMissing && !isCorrupt) {
// If the block is corrupted, it means all its available replicas are
// corrupted in the case of replication, and it means the state of the
// block group is unrecoverable due to some corrupted intenal blocks in
// the case of EC. We don't mark it as missing given these available
// replicas/internal-blocks might still be accessible as the block might
// be incorrectly marked as corrupted by client machines.
report.append(" MISSING!");
res.addMissing(blkName, block.getNumBytes());
missing++;
missize += block.getNumBytes();
if (storedBlock.isStriped()) {
report.append(" Live_repl=" + liveReplicas);
String info = getReplicaInfo(storedBlock);
if (!info.isEmpty()) {
report.append(" ").append(info);
}
}
} else {
report.append(" Live_repl=" + liveReplicas);
String info = getReplicaInfo(storedBlock);
if (!info.isEmpty()) {
report.append(" ").append(info);
}
}
report.append('\n');
blockNumber++;
}
//display under construction block info.
if (!blocks.isLastBlockComplete() && lastBlock != null) {
ExtendedBlock block = lastBlock.getBlock();
String blkName = block.toString();
BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
DatanodeStorageInfo[] storages = storedBlock.getUnderConstructionFeature().getExpectedStorageLocations();
report.append('\n');
report.append("Under Construction Block:\n");
report.append(blockNumber).append(". ").append(blkName);
report.append(" len=").append(block.getNumBytes());
report.append(" Expected_repl=" + storages.length);
String info = getReplicaInfo(storedBlock);
if (!info.isEmpty()) {
report.append(" ").append(info);
}
}
// count corrupt file & move or delete if necessary
if ((missing > 0) || (corrupt > 0)) {
if (!showFiles) {
if (missing > 0) {
out.print("\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B.");
}
if (corrupt > 0) {
out.print("\n" + path + ": CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B.");
}
}
res.corruptFiles++;
if (isOpen) {
LOG.info("Fsck: ignoring open file " + path);
} else {
if (doMove)
copyBlocksToLostFound(parent, file, blocks);
if (doDelete)
deleteCorruptedFile(path);
}
}
if (showFiles) {
if (missing > 0 || corrupt > 0) {
if (missing > 0) {
out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
}
if (corrupt > 0) {
out.print(" CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B\n");
}
} else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
out.print(" OK\n");
}
if (showBlocks) {
out.print(report + "\n");
}
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.
the class DFSTestUtil method getExpectedPrimaryNode.
/**
* @return the node which is expected to run the recovery of the
* given block, which is known to be under construction inside the
* given NameNOde.
*/
public static DatanodeDescriptor getExpectedPrimaryNode(NameNode nn, ExtendedBlock blk) {
BlockManager bm0 = nn.getNamesystem().getBlockManager();
BlockInfo storedBlock = bm0.getStoredBlock(blk.getLocalBlock());
assertTrue("Block " + blk + " should be under construction, " + "got: " + storedBlock, !storedBlock.isComplete());
// We expect that the replica with the most recent heart beat will be
// the one to be in charge of the synchronization / recovery protocol.
final DatanodeStorageInfo[] storages = storedBlock.getUnderConstructionFeature().getExpectedStorageLocations();
DatanodeStorageInfo expectedPrimary = storages[0];
long mostRecentLastUpdate = expectedPrimary.getDatanodeDescriptor().getLastUpdateMonotonic();
for (int i = 1; i < storages.length; i++) {
final long lastUpdate = storages[i].getDatanodeDescriptor().getLastUpdateMonotonic();
if (lastUpdate > mostRecentLastUpdate) {
expectedPrimary = storages[i];
mostRecentLastUpdate = lastUpdate;
}
}
return expectedPrimary.getDatanodeDescriptor();
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.
the class TestDatanodeRegistration method testForcedRegistration.
// IBRs are async operations to free up IPC handlers. This means the IBR
// response will not contain non-IPC level exceptions - which in practice
// should not occur other than dead/unregistered node which will trigger a
// re-registration. If a non-IPC exception does occur, the safety net is
// a forced re-registration on the next heartbeat.
@Test
public void testForcedRegistration() throws Exception {
final Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_KEY, 4);
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, Integer.MAX_VALUE);
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
cluster.getHttpUri(0);
FSNamesystem fsn = cluster.getNamesystem();
String bpId = fsn.getBlockPoolId();
DataNode dn = cluster.getDataNodes().get(0);
DatanodeDescriptor dnd = NameNodeAdapter.getDatanode(fsn, dn.getDatanodeId());
DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
DatanodeStorageInfo storage = dnd.getStorageInfos()[0];
// registration should not change after heartbeat.
assertTrue(dnd.isRegistered());
DatanodeRegistration lastReg = dn.getDNRegistrationForBP(bpId);
waitForHeartbeat(dn, dnd);
assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
// force a re-registration on next heartbeat.
dnd.setForceRegistration(true);
assertFalse(dnd.isRegistered());
waitForHeartbeat(dn, dnd);
assertTrue(dnd.isRegistered());
DatanodeRegistration newReg = dn.getDNRegistrationForBP(bpId);
assertNotSame(lastReg, newReg);
lastReg = newReg;
// registration should not change on subsequent heartbeats.
waitForHeartbeat(dn, dnd);
assertTrue(dnd.isRegistered());
assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
assertTrue(waitForBlockReport(dn, dnd));
assertTrue(dnd.isRegistered());
assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
// check that block report is not processed and registration didn't change.
dnd.setForceRegistration(true);
assertFalse(waitForBlockReport(dn, dnd));
assertFalse(dnd.isRegistered());
assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
// heartbeat should trigger re-registration, and next block report should
// not change registration.
waitForHeartbeat(dn, dnd);
assertTrue(dnd.isRegistered());
newReg = dn.getDNRegistrationForBP(bpId);
assertNotSame(lastReg, newReg);
lastReg = newReg;
assertTrue(waitForBlockReport(dn, dnd));
assertTrue(dnd.isRegistered());
assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
// registration doesn't change.
ExtendedBlock eb = new ExtendedBlock(bpId, 1234);
dn.notifyNamenodeDeletedBlock(eb, storage.getStorageID());
DataNodeTestUtils.triggerDeletionReport(dn);
assertTrue(dnd.isRegistered());
assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
// a failed IBR will effectively unregister the node.
boolean failed = false;
try {
// pass null to cause a failure since there aren't any easy failure
// modes since it shouldn't happen.
fsn.processIncrementalBlockReport(lastReg, null);
} catch (NullPointerException npe) {
failed = true;
}
assertTrue("didn't fail", failed);
assertFalse(dnd.isRegistered());
// should remain unregistered until next heartbeat.
dn.notifyNamenodeDeletedBlock(eb, storage.getStorageID());
DataNodeTestUtils.triggerDeletionReport(dn);
assertFalse(dnd.isRegistered());
assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
waitForHeartbeat(dn, dnd);
assertTrue(dnd.isRegistered());
assertNotSame(lastReg, dn.getDNRegistrationForBP(bpId));
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.
the class TestFileCorruption method updateAllStorages.
private void updateAllStorages(BlockManager bm) {
for (DatanodeDescriptor dd : bm.getDatanodeManager().getDatanodes()) {
Set<DatanodeStorageInfo> setInfos = new HashSet<DatanodeStorageInfo>();
DatanodeStorageInfo[] infos = dd.getStorageInfos();
Random random = new Random();
for (int i = 0; i < infos.length; i++) {
int blkId = random.nextInt(101);
DatanodeStorage storage = new DatanodeStorage(Integer.toString(blkId), DatanodeStorage.State.FAILED, StorageType.DISK);
infos[i].updateFromStorage(storage);
setInfos.add(infos[i]);
}
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.
the class TestDFSNetworkTopology method testGetSubtreeStorageCount.
/**
* Tests getting subtree storage counts, and see whether it is correct when
* we update subtree.
* @throws Exception
*/
@Test
public void testGetSubtreeStorageCount() throws Exception {
// add and remove a node to rack /l2/d3/r1. So all the inner nodes /l2,
// /l2/d3 and /l2/d3/r1 should be affected. /l2/d3/r3 should still be the
// same, only checked as a reference
Node l2 = CLUSTER.getNode("/l2");
Node l2d3 = CLUSTER.getNode("/l2/d3");
Node l2d3r1 = CLUSTER.getNode("/l2/d3/r1");
Node l2d3r3 = CLUSTER.getNode("/l2/d3/r3");
assertTrue(l2 instanceof DFSTopologyNodeImpl);
assertTrue(l2d3 instanceof DFSTopologyNodeImpl);
assertTrue(l2d3r1 instanceof DFSTopologyNodeImpl);
assertTrue(l2d3r3 instanceof DFSTopologyNodeImpl);
DFSTopologyNodeImpl innerl2 = (DFSTopologyNodeImpl) l2;
DFSTopologyNodeImpl innerl2d3 = (DFSTopologyNodeImpl) l2d3;
DFSTopologyNodeImpl innerl2d3r1 = (DFSTopologyNodeImpl) l2d3r1;
DFSTopologyNodeImpl innerl2d3r3 = (DFSTopologyNodeImpl) l2d3r3;
assertEquals(4, innerl2.getSubtreeStorageCount(StorageType.DISK));
assertEquals(2, innerl2d3.getSubtreeStorageCount(StorageType.DISK));
assertEquals(1, innerl2d3r1.getSubtreeStorageCount(StorageType.DISK));
assertEquals(1, innerl2d3r3.getSubtreeStorageCount(StorageType.DISK));
DatanodeStorageInfo storageInfo = DFSTestUtil.createDatanodeStorageInfo("StorageID", "1.2.3.4", "/l2/d3/r1", "newhost");
DatanodeDescriptor newNode = storageInfo.getDatanodeDescriptor();
CLUSTER.add(newNode);
// after adding a storage to /l2/d3/r1, ancestor inner node should have
// DISK count incremented by 1.
assertEquals(5, innerl2.getSubtreeStorageCount(StorageType.DISK));
assertEquals(3, innerl2d3.getSubtreeStorageCount(StorageType.DISK));
assertEquals(2, innerl2d3r1.getSubtreeStorageCount(StorageType.DISK));
assertEquals(1, innerl2d3r3.getSubtreeStorageCount(StorageType.DISK));
CLUSTER.remove(newNode);
assertEquals(4, innerl2.getSubtreeStorageCount(StorageType.DISK));
assertEquals(2, innerl2d3.getSubtreeStorageCount(StorageType.DISK));
assertEquals(1, innerl2d3r1.getSubtreeStorageCount(StorageType.DISK));
assertEquals(1, innerl2d3r3.getSubtreeStorageCount(StorageType.DISK));
}
Aggregations