Search in sources :

Example 21 with DatanodeStorageInfo

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.

the class NamenodeFsck method collectBlocksSummary.

private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res, LocatedBlocks blocks) throws IOException {
    String path = file.getFullName(parent);
    boolean isOpen = blocks.isUnderConstruction();
    if (isOpen && !showOpenFiles) {
        return;
    }
    int missing = 0;
    int corrupt = 0;
    long missize = 0;
    long corruptSize = 0;
    int underReplicatedPerFile = 0;
    int misReplicatedPerFile = 0;
    StringBuilder report = new StringBuilder();
    int blockNumber = 0;
    final LocatedBlock lastBlock = blocks.getLastLocatedBlock();
    for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
        ExtendedBlock block = lBlk.getBlock();
        if (!blocks.isLastBlockComplete() && lastBlock != null && lastBlock.getBlock().equals(block)) {
            // it is under construction
            continue;
        }
        final BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
        final int minReplication = blockManager.getMinStorageNum(storedBlock);
        // count decommissionedReplicas / decommissioningReplicas
        NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
        int decommissionedReplicas = numberReplicas.decommissioned();
        int decommissioningReplicas = numberReplicas.decommissioning();
        int enteringMaintenanceReplicas = numberReplicas.liveEnteringMaintenanceReplicas();
        int inMaintenanceReplicas = numberReplicas.maintenanceNotForReadReplicas();
        res.decommissionedReplicas += decommissionedReplicas;
        res.decommissioningReplicas += decommissioningReplicas;
        res.enteringMaintenanceReplicas += enteringMaintenanceReplicas;
        res.inMaintenanceReplicas += inMaintenanceReplicas;
        // count total replicas
        int liveReplicas = numberReplicas.liveReplicas();
        int totalReplicasPerBlock = liveReplicas + decommissionedReplicas + decommissioningReplicas + enteringMaintenanceReplicas + inMaintenanceReplicas;
        res.totalReplicas += totalReplicasPerBlock;
        boolean isMissing;
        if (storedBlock.isStriped()) {
            isMissing = totalReplicasPerBlock < minReplication;
        } else {
            isMissing = totalReplicasPerBlock == 0;
        }
        // count expected replicas
        short targetFileReplication;
        if (file.getErasureCodingPolicy() != null) {
            assert storedBlock instanceof BlockInfoStriped;
            targetFileReplication = ((BlockInfoStriped) storedBlock).getRealTotalBlockNum();
        } else {
            targetFileReplication = file.getReplication();
        }
        res.numExpectedReplicas += targetFileReplication;
        // count under min repl'd blocks
        if (totalReplicasPerBlock < minReplication) {
            res.numUnderMinReplicatedBlocks++;
        }
        // count excessive Replicas / over replicated blocks
        if (liveReplicas > targetFileReplication) {
            res.excessiveReplicas += (liveReplicas - targetFileReplication);
            res.numOverReplicatedBlocks += 1;
        }
        // count corrupt blocks
        boolean isCorrupt = lBlk.isCorrupt();
        if (isCorrupt) {
            res.addCorrupt(block.getNumBytes());
            corrupt++;
            corruptSize += block.getNumBytes();
            out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName() + "\n");
        }
        // count minimally replicated blocks
        if (totalReplicasPerBlock >= minReplication)
            res.numMinReplicatedBlocks++;
        // count missing replicas / under replicated blocks
        if (totalReplicasPerBlock < targetFileReplication && !isMissing) {
            res.missingReplicas += (targetFileReplication - totalReplicasPerBlock);
            res.numUnderReplicatedBlocks += 1;
            underReplicatedPerFile++;
            if (!showFiles) {
                out.print("\n" + path + ": ");
            }
            out.println(" Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + liveReplicas + " live replica(s), " + decommissionedReplicas + " decommissioned replica(s), " + decommissioningReplicas + " decommissioning replica(s)" + (this.showMaintenanceState ? (enteringMaintenanceReplicas + ", entering maintenance replica(s) and " + inMaintenanceReplicas + " in maintenance replica(s).") : "."));
        }
        // count mis replicated blocks
        BlockPlacementStatus blockPlacementStatus = bpPolicies.getPolicy(lBlk.getBlockType()).verifyBlockPlacement(lBlk.getLocations(), targetFileReplication);
        if (!blockPlacementStatus.isPlacementPolicySatisfied()) {
            res.numMisReplicatedBlocks++;
            misReplicatedPerFile++;
            if (!showFiles) {
                if (underReplicatedPerFile == 0)
                    out.println();
                out.print(path + ": ");
            }
            out.println(" Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription());
        }
        // count storage summary
        if (this.showStoragePolcies && lBlk.getStorageTypes() != null) {
            countStorageTypeSummary(file, lBlk);
        }
        // report
        String blkName = block.toString();
        report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
        if (isMissing && !isCorrupt) {
            // If the block is corrupted, it means all its available replicas are
            // corrupted in the case of replication, and it means the state of the
            // block group is unrecoverable due to some corrupted intenal blocks in
            // the case of EC. We don't mark it as missing given these available
            // replicas/internal-blocks might still be accessible as the block might
            // be incorrectly marked as corrupted by client machines.
            report.append(" MISSING!");
            res.addMissing(blkName, block.getNumBytes());
            missing++;
            missize += block.getNumBytes();
            if (storedBlock.isStriped()) {
                report.append(" Live_repl=" + liveReplicas);
                String info = getReplicaInfo(storedBlock);
                if (!info.isEmpty()) {
                    report.append(" ").append(info);
                }
            }
        } else {
            report.append(" Live_repl=" + liveReplicas);
            String info = getReplicaInfo(storedBlock);
            if (!info.isEmpty()) {
                report.append(" ").append(info);
            }
        }
        report.append('\n');
        blockNumber++;
    }
    //display under construction block info.
    if (!blocks.isLastBlockComplete() && lastBlock != null) {
        ExtendedBlock block = lastBlock.getBlock();
        String blkName = block.toString();
        BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
        DatanodeStorageInfo[] storages = storedBlock.getUnderConstructionFeature().getExpectedStorageLocations();
        report.append('\n');
        report.append("Under Construction Block:\n");
        report.append(blockNumber).append(". ").append(blkName);
        report.append(" len=").append(block.getNumBytes());
        report.append(" Expected_repl=" + storages.length);
        String info = getReplicaInfo(storedBlock);
        if (!info.isEmpty()) {
            report.append(" ").append(info);
        }
    }
    // count corrupt file & move or delete if necessary
    if ((missing > 0) || (corrupt > 0)) {
        if (!showFiles) {
            if (missing > 0) {
                out.print("\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B.");
            }
            if (corrupt > 0) {
                out.print("\n" + path + ": CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B.");
            }
        }
        res.corruptFiles++;
        if (isOpen) {
            LOG.info("Fsck: ignoring open file " + path);
        } else {
            if (doMove)
                copyBlocksToLostFound(parent, file, blocks);
            if (doDelete)
                deleteCorruptedFile(path);
        }
    }
    if (showFiles) {
        if (missing > 0 || corrupt > 0) {
            if (missing > 0) {
                out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
            }
            if (corrupt > 0) {
                out.print(" CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B\n");
            }
        } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
            out.print(" OK\n");
        }
        if (showBlocks) {
            out.print(report + "\n");
        }
    }
}
Also used : BlockInfoStriped(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas) BlockPlacementStatus(org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo)

Example 22 with DatanodeStorageInfo

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.

the class DFSTestUtil method getExpectedPrimaryNode.

/**
   * @return the node which is expected to run the recovery of the
   * given block, which is known to be under construction inside the
   * given NameNOde.
   */
public static DatanodeDescriptor getExpectedPrimaryNode(NameNode nn, ExtendedBlock blk) {
    BlockManager bm0 = nn.getNamesystem().getBlockManager();
    BlockInfo storedBlock = bm0.getStoredBlock(blk.getLocalBlock());
    assertTrue("Block " + blk + " should be under construction, " + "got: " + storedBlock, !storedBlock.isComplete());
    // We expect that the replica with the most recent heart beat will be
    // the one to be in charge of the synchronization / recovery protocol.
    final DatanodeStorageInfo[] storages = storedBlock.getUnderConstructionFeature().getExpectedStorageLocations();
    DatanodeStorageInfo expectedPrimary = storages[0];
    long mostRecentLastUpdate = expectedPrimary.getDatanodeDescriptor().getLastUpdateMonotonic();
    for (int i = 1; i < storages.length; i++) {
        final long lastUpdate = storages[i].getDatanodeDescriptor().getLastUpdateMonotonic();
        if (lastUpdate > mostRecentLastUpdate) {
            expectedPrimary = storages[i];
            mostRecentLastUpdate = lastUpdate;
        }
    }
    return expectedPrimary.getDatanodeDescriptor();
}
Also used : DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) ReceivedDeletedBlockInfo(org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo)

Example 23 with DatanodeStorageInfo

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.

the class TestDatanodeRegistration method testForcedRegistration.

// IBRs are async operations to free up IPC handlers.  This means the IBR
// response will not contain non-IPC level exceptions - which in practice
// should not occur other than dead/unregistered node which will trigger a
// re-registration.  If a non-IPC exception does occur, the safety net is
// a forced re-registration on the next heartbeat.
@Test
public void testForcedRegistration() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_KEY, 4);
    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, Integer.MAX_VALUE);
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    cluster.getHttpUri(0);
    FSNamesystem fsn = cluster.getNamesystem();
    String bpId = fsn.getBlockPoolId();
    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeDescriptor dnd = NameNodeAdapter.getDatanode(fsn, dn.getDatanodeId());
    DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
    DatanodeStorageInfo storage = dnd.getStorageInfos()[0];
    // registration should not change after heartbeat.
    assertTrue(dnd.isRegistered());
    DatanodeRegistration lastReg = dn.getDNRegistrationForBP(bpId);
    waitForHeartbeat(dn, dnd);
    assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
    // force a re-registration on next heartbeat.
    dnd.setForceRegistration(true);
    assertFalse(dnd.isRegistered());
    waitForHeartbeat(dn, dnd);
    assertTrue(dnd.isRegistered());
    DatanodeRegistration newReg = dn.getDNRegistrationForBP(bpId);
    assertNotSame(lastReg, newReg);
    lastReg = newReg;
    // registration should not change on subsequent heartbeats.
    waitForHeartbeat(dn, dnd);
    assertTrue(dnd.isRegistered());
    assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
    assertTrue(waitForBlockReport(dn, dnd));
    assertTrue(dnd.isRegistered());
    assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
    // check that block report is not processed and registration didn't change.
    dnd.setForceRegistration(true);
    assertFalse(waitForBlockReport(dn, dnd));
    assertFalse(dnd.isRegistered());
    assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
    // heartbeat should trigger re-registration, and next block report should
    // not change registration.
    waitForHeartbeat(dn, dnd);
    assertTrue(dnd.isRegistered());
    newReg = dn.getDNRegistrationForBP(bpId);
    assertNotSame(lastReg, newReg);
    lastReg = newReg;
    assertTrue(waitForBlockReport(dn, dnd));
    assertTrue(dnd.isRegistered());
    assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
    // registration doesn't change.
    ExtendedBlock eb = new ExtendedBlock(bpId, 1234);
    dn.notifyNamenodeDeletedBlock(eb, storage.getStorageID());
    DataNodeTestUtils.triggerDeletionReport(dn);
    assertTrue(dnd.isRegistered());
    assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
    // a failed IBR will effectively unregister the node.
    boolean failed = false;
    try {
        // pass null to cause a failure since there aren't any easy failure
        // modes since it shouldn't happen.
        fsn.processIncrementalBlockReport(lastReg, null);
    } catch (NullPointerException npe) {
        failed = true;
    }
    assertTrue("didn't fail", failed);
    assertFalse(dnd.isRegistered());
    // should remain unregistered until next heartbeat.
    dn.notifyNamenodeDeletedBlock(eb, storage.getStorageID());
    DataNodeTestUtils.triggerDeletionReport(dn);
    assertFalse(dnd.isRegistered());
    assertSame(lastReg, dn.getDNRegistrationForBP(bpId));
    waitForHeartbeat(dn, dnd);
    assertTrue(dnd.isRegistered());
    assertNotSame(lastReg, dn.getDNRegistrationForBP(bpId));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeRegistration(org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) Test(org.junit.Test)

Example 24 with DatanodeStorageInfo

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.

the class TestFileCorruption method updateAllStorages.

private void updateAllStorages(BlockManager bm) {
    for (DatanodeDescriptor dd : bm.getDatanodeManager().getDatanodes()) {
        Set<DatanodeStorageInfo> setInfos = new HashSet<DatanodeStorageInfo>();
        DatanodeStorageInfo[] infos = dd.getStorageInfos();
        Random random = new Random();
        for (int i = 0; i < infos.length; i++) {
            int blkId = random.nextInt(101);
            DatanodeStorage storage = new DatanodeStorage(Integer.toString(blkId), DatanodeStorage.State.FAILED, StorageType.DISK);
            infos[i].updateFromStorage(storage);
            setInfos.add(infos[i]);
        }
    }
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) Random(java.util.Random) DatanodeStorage(org.apache.hadoop.hdfs.server.protocol.DatanodeStorage) HashSet(java.util.HashSet)

Example 25 with DatanodeStorageInfo

use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo in project hadoop by apache.

the class TestDFSNetworkTopology method testGetSubtreeStorageCount.

/**
   * Tests getting subtree storage counts, and see whether it is correct when
   * we update subtree.
   * @throws Exception
   */
@Test
public void testGetSubtreeStorageCount() throws Exception {
    // add and remove a node to rack /l2/d3/r1. So all the inner nodes /l2,
    // /l2/d3 and /l2/d3/r1 should be affected. /l2/d3/r3 should still be the
    // same, only checked as a reference
    Node l2 = CLUSTER.getNode("/l2");
    Node l2d3 = CLUSTER.getNode("/l2/d3");
    Node l2d3r1 = CLUSTER.getNode("/l2/d3/r1");
    Node l2d3r3 = CLUSTER.getNode("/l2/d3/r3");
    assertTrue(l2 instanceof DFSTopologyNodeImpl);
    assertTrue(l2d3 instanceof DFSTopologyNodeImpl);
    assertTrue(l2d3r1 instanceof DFSTopologyNodeImpl);
    assertTrue(l2d3r3 instanceof DFSTopologyNodeImpl);
    DFSTopologyNodeImpl innerl2 = (DFSTopologyNodeImpl) l2;
    DFSTopologyNodeImpl innerl2d3 = (DFSTopologyNodeImpl) l2d3;
    DFSTopologyNodeImpl innerl2d3r1 = (DFSTopologyNodeImpl) l2d3r1;
    DFSTopologyNodeImpl innerl2d3r3 = (DFSTopologyNodeImpl) l2d3r3;
    assertEquals(4, innerl2.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(2, innerl2d3.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(1, innerl2d3r1.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(1, innerl2d3r3.getSubtreeStorageCount(StorageType.DISK));
    DatanodeStorageInfo storageInfo = DFSTestUtil.createDatanodeStorageInfo("StorageID", "1.2.3.4", "/l2/d3/r1", "newhost");
    DatanodeDescriptor newNode = storageInfo.getDatanodeDescriptor();
    CLUSTER.add(newNode);
    // after adding a storage to /l2/d3/r1, ancestor inner node should have
    // DISK count incremented by 1.
    assertEquals(5, innerl2.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(3, innerl2d3.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(2, innerl2d3r1.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(1, innerl2d3r3.getSubtreeStorageCount(StorageType.DISK));
    CLUSTER.remove(newNode);
    assertEquals(4, innerl2.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(2, innerl2d3.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(1, innerl2d3r1.getSubtreeStorageCount(StorageType.DISK));
    assertEquals(1, innerl2d3r3.getSubtreeStorageCount(StorageType.DISK));
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) Node(org.apache.hadoop.net.Node) Test(org.junit.Test)

Aggregations

DatanodeStorageInfo (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo)25 Test (org.junit.Test)10 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)8 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)8 BlockInfo (org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo)7 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)6 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)6 ArrayList (java.util.ArrayList)5 DatanodeStorage (org.apache.hadoop.hdfs.server.protocol.DatanodeStorage)5 Configuration (org.apache.hadoop.conf.Configuration)4 Block (org.apache.hadoop.hdfs.protocol.Block)4 Node (org.apache.hadoop.net.Node)4 IOException (java.io.IOException)3 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)3 BlockECReconstructionInfo (org.apache.hadoop.hdfs.server.protocol.BlockECReconstructionCommand.BlockECReconstructionInfo)3 DatanodeRegistration (org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration)3 FileNotFoundException (java.io.FileNotFoundException)2 HashSet (java.util.HashSet)2 StorageType (org.apache.hadoop.fs.StorageType)2