Search in sources :

Example 1 with NumberReplicas

use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.

the class TestReconstructStripedBlocks method testCountLiveReplicas.

/**
   * make sure the NN can detect the scenario where there are enough number of
   * internal blocks (>=9 by default) but there is still missing data/parity
   * block.
   */
@Test
public void testCountLiveReplicas() throws Exception {
    final HdfsConfiguration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false);
    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    try {
        fs.mkdirs(dirPath);
        fs.setErasureCodingPolicy(dirPath, StripedFileTestUtil.getDefaultECPolicy().getName());
        DFSTestUtil.createFile(fs, filePath, cellSize * dataBlocks * 2, (short) 1, 0L);
        // stop a dn
        LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
        LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
        DatanodeInfo dnToStop = block.getLocations()[0];
        MiniDFSCluster.DataNodeProperties dnProp = cluster.stopDataNode(dnToStop.getXferAddr());
        cluster.setDataNodeDead(dnToStop);
        // wait for reconstruction to happen
        DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000);
        // bring the dn back: 10 internal blocks now
        cluster.restartDataNode(dnProp);
        cluster.waitActive();
        // stop another dn: 9 internal blocks, but only cover 8 real one
        dnToStop = block.getLocations()[1];
        cluster.stopDataNode(dnToStop.getXferAddr());
        cluster.setDataNodeDead(dnToStop);
        // currently namenode is able to track the missing block. but restart NN
        cluster.restartNameNode(true);
        for (DataNode dn : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerBlockReport(dn);
        }
        FSNamesystem fsn = cluster.getNamesystem();
        BlockManager bm = fsn.getBlockManager();
        // wait 3 running cycles of redundancy monitor
        Thread.sleep(3000);
        for (DataNode dn : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerHeartbeat(dn);
        }
        // check if NN can detect the missing internal block and finish the
        // reconstruction
        StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
        boolean reconstructed = false;
        for (int i = 0; i < 5; i++) {
            NumberReplicas num = null;
            fsn.readLock();
            try {
                BlockInfo blockInfo = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile().getLastBlock();
                num = bm.countNodes(blockInfo);
            } finally {
                fsn.readUnlock();
            }
            if (num.liveReplicas() >= groupSize) {
                reconstructed = true;
                break;
            } else {
                Thread.sleep(1000);
            }
        }
        Assert.assertTrue(reconstructed);
        blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
        block = (LocatedStripedBlock) blks.getLastLocatedBlock();
        BitSet bitSet = new BitSet(groupSize);
        for (byte index : block.getBlockIndices()) {
            bitSet.set(index);
        }
        for (int i = 0; i < groupSize; i++) {
            Assert.assertTrue(bitSet.get(i));
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) BitSet(java.util.BitSet) NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) Test(org.junit.Test)

Example 2 with NumberReplicas

use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.

the class NamenodeFsck method blockIdCK.

/**
   * Check block information given a blockId number
   *
  */
public void blockIdCK(String blockId) {
    if (blockId == null) {
        out.println("Please provide valid blockId!");
        return;
    }
    try {
        //get blockInfo
        Block block = new Block(Block.getBlockId(blockId));
        //find which file this block belongs to
        BlockInfo blockInfo = blockManager.getStoredBlock(block);
        if (blockInfo == null) {
            out.println("Block " + blockId + " " + NONEXISTENT_STATUS);
            LOG.warn("Block " + blockId + " " + NONEXISTENT_STATUS);
            return;
        }
        final INodeFile iNode = namenode.getNamesystem().getBlockCollection(blockInfo);
        NumberReplicas numberReplicas = blockManager.countNodes(blockInfo);
        out.println("Block Id: " + blockId);
        out.println("Block belongs to: " + iNode.getFullPathName());
        out.println("No. of Expected Replica: " + blockManager.getExpectedRedundancyNum(blockInfo));
        out.println("No. of live Replica: " + numberReplicas.liveReplicas());
        out.println("No. of excess Replica: " + numberReplicas.excessReplicas());
        out.println("No. of stale Replica: " + numberReplicas.replicasOnStaleNodes());
        out.println("No. of decommissioned Replica: " + numberReplicas.decommissioned());
        out.println("No. of decommissioning Replica: " + numberReplicas.decommissioning());
        if (this.showMaintenanceState) {
            out.println("No. of entering maintenance Replica: " + numberReplicas.liveEnteringMaintenanceReplicas());
            out.println("No. of in maintenance Replica: " + numberReplicas.maintenanceNotForReadReplicas());
        }
        out.println("No. of corrupted Replica: " + numberReplicas.corruptReplicas());
        //record datanodes that have corrupted block replica
        Collection<DatanodeDescriptor> corruptionRecord = null;
        if (blockManager.getCorruptReplicas(block) != null) {
            corruptionRecord = blockManager.getCorruptReplicas(block);
        }
        //report block replicas status on datanodes
        for (int idx = (blockInfo.numNodes() - 1); idx >= 0; idx--) {
            DatanodeDescriptor dn = blockInfo.getDatanode(idx);
            out.print("Block replica on datanode/rack: " + dn.getHostName() + dn.getNetworkLocation() + " ");
            if (corruptionRecord != null && corruptionRecord.contains(dn)) {
                out.print(CORRUPT_STATUS + "\t ReasonCode: " + blockManager.getCorruptReason(block, dn));
            } else if (dn.isDecommissioned()) {
                out.print(DECOMMISSIONED_STATUS);
            } else if (dn.isDecommissionInProgress()) {
                out.print(DECOMMISSIONING_STATUS);
            } else if (this.showMaintenanceState && dn.isEnteringMaintenance()) {
                out.print(ENTERING_MAINTENANCE_STATUS);
            } else if (this.showMaintenanceState && dn.isInMaintenance()) {
                out.print(IN_MAINTENANCE_STATUS);
            } else {
                out.print(HEALTHY_STATUS);
            }
            out.print("\n");
        }
    } catch (Exception e) {
        String errMsg = "Fsck on blockId '" + blockId;
        LOG.warn(errMsg, e);
        out.println(e.getMessage());
        out.print("\n\n" + errMsg);
        LOG.warn("Error in looking up block", e);
    }
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) Block(org.apache.hadoop.hdfs.protocol.Block) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas) UnresolvedLinkException(org.apache.hadoop.fs.UnresolvedLinkException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException)

Example 3 with NumberReplicas

use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.

the class TestAddStripedBlockInFBR method testAddBlockInFullBlockReport.

@Test
public void testAddBlockInFullBlockReport() throws Exception {
    BlockManager spy = Mockito.spy(cluster.getNamesystem().getBlockManager());
    // let NN ignore one DataNode's IBR
    final DataNode dn = cluster.getDataNodes().get(0);
    final DatanodeID datanodeID = dn.getDatanodeId();
    Mockito.doNothing().when(spy).processIncrementalBlockReport(Mockito.eq(datanodeID), Mockito.any());
    Whitebox.setInternalState(cluster.getNamesystem(), "blockManager", spy);
    final Path ecDir = new Path("/ec");
    final Path repDir = new Path("/rep");
    dfs.mkdirs(ecDir);
    dfs.mkdirs(repDir);
    dfs.getClient().setErasureCodingPolicy(ecDir.toString(), StripedFileTestUtil.getDefaultECPolicy().getName());
    // create several non-EC files and one EC file
    final Path[] repFiles = new Path[groupSize];
    for (int i = 0; i < groupSize; i++) {
        repFiles[i] = new Path(repDir, "f" + i);
        DFSTestUtil.createFile(dfs, repFiles[i], 1L, (short) 3, 0L);
    }
    final Path ecFile = new Path(ecDir, "f");
    DFSTestUtil.createFile(dfs, ecFile, cellSize * dataBlocks, (short) 1, 0L);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            try {
                // trigger dn's FBR. The FBR will add block-dn mapping.
                cluster.triggerBlockReports();
                // make sure NN has correct block-dn mapping
                BlockInfoStriped blockInfo = (BlockInfoStriped) cluster.getNamesystem().getFSDirectory().getINode(ecFile.toString()).asFile().getLastBlock();
                NumberReplicas nr = spy.countNodes(blockInfo);
                return nr.excessReplicas() == 0 && nr.liveReplicas() == groupSize;
            } catch (Exception ignored) {
            // Ignore the exception
            }
            return false;
        }
    }, 3000, 60000);
}
Also used : Path(org.apache.hadoop.fs.Path) BlockInfoStriped(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas) IOException(java.io.IOException) Test(org.junit.Test)

Example 4 with NumberReplicas

use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.

the class NamenodeFsck method collectBlocksSummary.

private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res, LocatedBlocks blocks) throws IOException {
    String path = file.getFullName(parent);
    boolean isOpen = blocks.isUnderConstruction();
    if (isOpen && !showOpenFiles) {
        return;
    }
    int missing = 0;
    int corrupt = 0;
    long missize = 0;
    long corruptSize = 0;
    int underReplicatedPerFile = 0;
    int misReplicatedPerFile = 0;
    StringBuilder report = new StringBuilder();
    int blockNumber = 0;
    final LocatedBlock lastBlock = blocks.getLastLocatedBlock();
    for (LocatedBlock lBlk : blocks.getLocatedBlocks()) {
        ExtendedBlock block = lBlk.getBlock();
        if (!blocks.isLastBlockComplete() && lastBlock != null && lastBlock.getBlock().equals(block)) {
            // it is under construction
            continue;
        }
        final BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
        final int minReplication = blockManager.getMinStorageNum(storedBlock);
        // count decommissionedReplicas / decommissioningReplicas
        NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
        int decommissionedReplicas = numberReplicas.decommissioned();
        int decommissioningReplicas = numberReplicas.decommissioning();
        int enteringMaintenanceReplicas = numberReplicas.liveEnteringMaintenanceReplicas();
        int inMaintenanceReplicas = numberReplicas.maintenanceNotForReadReplicas();
        res.decommissionedReplicas += decommissionedReplicas;
        res.decommissioningReplicas += decommissioningReplicas;
        res.enteringMaintenanceReplicas += enteringMaintenanceReplicas;
        res.inMaintenanceReplicas += inMaintenanceReplicas;
        // count total replicas
        int liveReplicas = numberReplicas.liveReplicas();
        int totalReplicasPerBlock = liveReplicas + decommissionedReplicas + decommissioningReplicas + enteringMaintenanceReplicas + inMaintenanceReplicas;
        res.totalReplicas += totalReplicasPerBlock;
        boolean isMissing;
        if (storedBlock.isStriped()) {
            isMissing = totalReplicasPerBlock < minReplication;
        } else {
            isMissing = totalReplicasPerBlock == 0;
        }
        // count expected replicas
        short targetFileReplication;
        if (file.getErasureCodingPolicy() != null) {
            assert storedBlock instanceof BlockInfoStriped;
            targetFileReplication = ((BlockInfoStriped) storedBlock).getRealTotalBlockNum();
        } else {
            targetFileReplication = file.getReplication();
        }
        res.numExpectedReplicas += targetFileReplication;
        // count under min repl'd blocks
        if (totalReplicasPerBlock < minReplication) {
            res.numUnderMinReplicatedBlocks++;
        }
        // count excessive Replicas / over replicated blocks
        if (liveReplicas > targetFileReplication) {
            res.excessiveReplicas += (liveReplicas - targetFileReplication);
            res.numOverReplicatedBlocks += 1;
        }
        // count corrupt blocks
        boolean isCorrupt = lBlk.isCorrupt();
        if (isCorrupt) {
            res.addCorrupt(block.getNumBytes());
            corrupt++;
            corruptSize += block.getNumBytes();
            out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName() + "\n");
        }
        // count minimally replicated blocks
        if (totalReplicasPerBlock >= minReplication)
            res.numMinReplicatedBlocks++;
        // count missing replicas / under replicated blocks
        if (totalReplicasPerBlock < targetFileReplication && !isMissing) {
            res.missingReplicas += (targetFileReplication - totalReplicasPerBlock);
            res.numUnderReplicatedBlocks += 1;
            underReplicatedPerFile++;
            if (!showFiles) {
                out.print("\n" + path + ": ");
            }
            out.println(" Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + liveReplicas + " live replica(s), " + decommissionedReplicas + " decommissioned replica(s), " + decommissioningReplicas + " decommissioning replica(s)" + (this.showMaintenanceState ? (enteringMaintenanceReplicas + ", entering maintenance replica(s) and " + inMaintenanceReplicas + " in maintenance replica(s).") : "."));
        }
        // count mis replicated blocks
        BlockPlacementStatus blockPlacementStatus = bpPolicies.getPolicy(lBlk.getBlockType()).verifyBlockPlacement(lBlk.getLocations(), targetFileReplication);
        if (!blockPlacementStatus.isPlacementPolicySatisfied()) {
            res.numMisReplicatedBlocks++;
            misReplicatedPerFile++;
            if (!showFiles) {
                if (underReplicatedPerFile == 0)
                    out.println();
                out.print(path + ": ");
            }
            out.println(" Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription());
        }
        // count storage summary
        if (this.showStoragePolcies && lBlk.getStorageTypes() != null) {
            countStorageTypeSummary(file, lBlk);
        }
        // report
        String blkName = block.toString();
        report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
        if (isMissing && !isCorrupt) {
            // If the block is corrupted, it means all its available replicas are
            // corrupted in the case of replication, and it means the state of the
            // block group is unrecoverable due to some corrupted intenal blocks in
            // the case of EC. We don't mark it as missing given these available
            // replicas/internal-blocks might still be accessible as the block might
            // be incorrectly marked as corrupted by client machines.
            report.append(" MISSING!");
            res.addMissing(blkName, block.getNumBytes());
            missing++;
            missize += block.getNumBytes();
            if (storedBlock.isStriped()) {
                report.append(" Live_repl=" + liveReplicas);
                String info = getReplicaInfo(storedBlock);
                if (!info.isEmpty()) {
                    report.append(" ").append(info);
                }
            }
        } else {
            report.append(" Live_repl=" + liveReplicas);
            String info = getReplicaInfo(storedBlock);
            if (!info.isEmpty()) {
                report.append(" ").append(info);
            }
        }
        report.append('\n');
        blockNumber++;
    }
    //display under construction block info.
    if (!blocks.isLastBlockComplete() && lastBlock != null) {
        ExtendedBlock block = lastBlock.getBlock();
        String blkName = block.toString();
        BlockInfo storedBlock = blockManager.getStoredBlock(block.getLocalBlock());
        DatanodeStorageInfo[] storages = storedBlock.getUnderConstructionFeature().getExpectedStorageLocations();
        report.append('\n');
        report.append("Under Construction Block:\n");
        report.append(blockNumber).append(". ").append(blkName);
        report.append(" len=").append(block.getNumBytes());
        report.append(" Expected_repl=" + storages.length);
        String info = getReplicaInfo(storedBlock);
        if (!info.isEmpty()) {
            report.append(" ").append(info);
        }
    }
    // count corrupt file & move or delete if necessary
    if ((missing > 0) || (corrupt > 0)) {
        if (!showFiles) {
            if (missing > 0) {
                out.print("\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B.");
            }
            if (corrupt > 0) {
                out.print("\n" + path + ": CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B.");
            }
        }
        res.corruptFiles++;
        if (isOpen) {
            LOG.info("Fsck: ignoring open file " + path);
        } else {
            if (doMove)
                copyBlocksToLostFound(parent, file, blocks);
            if (doDelete)
                deleteCorruptedFile(path);
        }
    }
    if (showFiles) {
        if (missing > 0 || corrupt > 0) {
            if (missing > 0) {
                out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
            }
            if (corrupt > 0) {
                out.print(" CORRUPT " + corrupt + " blocks of total size " + corruptSize + " B\n");
            }
        } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
            out.print(" OK\n");
        }
        if (showBlocks) {
            out.print(report + "\n");
        }
    }
}
Also used : BlockInfoStriped(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped) ExtendedBlock(org.apache.hadoop.hdfs.protocol.ExtendedBlock) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas) BlockPlacementStatus(org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo)

Example 5 with NumberReplicas

use of org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas in project hadoop by apache.

the class TestReadOnlySharedStorage method validateNumberReplicas.

private void validateNumberReplicas(int expectedReplicas) throws IOException {
    NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
    assertThat(numberReplicas.liveReplicas(), is(expectedReplicas));
    assertThat(numberReplicas.excessReplicas(), is(0));
    assertThat(numberReplicas.corruptReplicas(), is(0));
    assertThat(numberReplicas.decommissionedAndDecommissioning(), is(0));
    assertThat(numberReplicas.replicasOnStaleNodes(), is(0));
    BlockManagerTestUtil.updateState(blockManager);
    assertThat(blockManager.getUnderReplicatedBlocksCount(), is(0L));
    assertThat(blockManager.getExcessBlocksCount(), is(0L));
}
Also used : NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas)

Aggregations

NumberReplicas (org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas)7 Test (org.junit.Test)4 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)3 BlockInfo (org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo)3 IOException (java.io.IOException)2 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)2 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)2 BlockInfoStriped (org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped)2 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)2 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)2 FileNotFoundException (java.io.FileNotFoundException)1 BitSet (java.util.BitSet)1 Path (org.apache.hadoop.fs.Path)1 UnresolvedLinkException (org.apache.hadoop.fs.UnresolvedLinkException)1 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)1 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)1 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)1 Block (org.apache.hadoop.hdfs.protocol.Block)1 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)1 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)1