Search in sources :

Example 21 with BlockManager

use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.

the class TestDataNodeErasureCodingMetrics method getComputedDatanodeWork.

private int getComputedDatanodeWork() throws IOException, InterruptedException {
    final BlockManager bm = cluster.getNamesystem().getBlockManager();
    // Giving a grace period to compute datanode work.
    int workCount = 0;
    int retries = 20;
    while (retries > 0) {
        workCount = BlockManagerTestUtil.getComputedDatanodeWork(bm);
        if (workCount > 0) {
            break;
        }
        retries--;
        Thread.sleep(500);
    }
    LOG.info("Computed datanode work: " + workCount + ", retries: " + retries);
    return workCount;
}
Also used : BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)

Example 22 with BlockManager

use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.

the class TestDataNodeVolumeFailure method testVolumeFailure.

/*
   * Verify the number of blocks and files are correct after volume failure,
   * and that we can replicate to both datanodes even after a single volume
   * failure if the configuration parameter allows this.
   */
@Test(timeout = 120000)
public void testVolumeFailure() throws Exception {
    System.out.println("Data dir: is " + dataDir.getPath());
    // Data dir structure is dataDir/data[1-4]/[current,tmp...]
    // data1,2 is for datanode 1, data2,3 - datanode2 
    String filename = "/test.txt";
    Path filePath = new Path(filename);
    // we use only small number of blocks to avoid creating subdirs in the data dir..
    int filesize = block_size * blocks_num;
    DFSTestUtil.createFile(fs, filePath, filesize, repl, 1L);
    DFSTestUtil.waitReplication(fs, filePath, repl);
    System.out.println("file " + filename + "(size " + filesize + ") is created and replicated");
    // fail the volume
    // delete/make non-writable one of the directories (failed volume)
    data_fail = new File(dataDir, "data3");
    failedDir = MiniDFSCluster.getFinalizedDir(data_fail, cluster.getNamesystem().getBlockPoolId());
    if (failedDir.exists() && //!FileUtil.fullyDelete(failedDir)
    !deteteBlocks(failedDir)) {
        throw new IOException("Could not delete hdfs directory '" + failedDir + "'");
    }
    data_fail.setReadOnly();
    failedDir.setReadOnly();
    System.out.println("Deleteing " + failedDir.getPath() + "; exist=" + failedDir.exists());
    // access all the blocks on the "failed" DataNode, 
    // we need to make sure that the "failed" volume is being accessed - 
    // and that will cause failure, blocks removal, "emergency" block report
    triggerFailure(filename, filesize);
    // DN eventually have latest volume failure information for next heartbeat
    final DataNode dn = cluster.getDataNodes().get(1);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            final VolumeFailureSummary summary = dn.getFSDataset().getVolumeFailureSummary();
            return summary != null && summary.getFailedStorageLocations() != null && summary.getFailedStorageLocations().length == 1;
        }
    }, 10, 30 * 1000);
    // trigger DN to send heartbeat
    DataNodeTestUtils.triggerHeartbeat(dn);
    final BlockManager bm = cluster.getNamesystem().getBlockManager();
    // trigger NN handel heartbeat
    BlockManagerTestUtil.checkHeartbeat(bm);
    // NN now should have latest volume failure
    assertEquals(1, cluster.getNamesystem().getVolumeFailuresTotal());
    // verify number of blocks and files...
    verify(filename, filesize);
    // create another file (with one volume failed).
    System.out.println("creating file test1.txt");
    Path fileName1 = new Path("/test1.txt");
    DFSTestUtil.createFile(fs, fileName1, filesize, repl, 1L);
    // should be able to replicate to both nodes (2 DN, repl=2)
    DFSTestUtil.waitReplication(fs, fileName1, repl);
    System.out.println("file " + fileName1.getName() + " is created and replicated");
}
Also used : Path(org.apache.hadoop.fs.Path) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) IOException(java.io.IOException) File(java.io.File) VolumeFailureSummary(org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary) Test(org.junit.Test)

Example 23 with BlockManager

use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.

the class TestReconstructStripedBlocks method testCountLiveReplicas.

/**
   * make sure the NN can detect the scenario where there are enough number of
   * internal blocks (>=9 by default) but there is still missing data/parity
   * block.
   */
@Test
public void testCountLiveReplicas() throws Exception {
    final HdfsConfiguration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false);
    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    try {
        fs.mkdirs(dirPath);
        fs.setErasureCodingPolicy(dirPath, StripedFileTestUtil.getDefaultECPolicy().getName());
        DFSTestUtil.createFile(fs, filePath, cellSize * dataBlocks * 2, (short) 1, 0L);
        // stop a dn
        LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
        LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
        DatanodeInfo dnToStop = block.getLocations()[0];
        MiniDFSCluster.DataNodeProperties dnProp = cluster.stopDataNode(dnToStop.getXferAddr());
        cluster.setDataNodeDead(dnToStop);
        // wait for reconstruction to happen
        DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000);
        // bring the dn back: 10 internal blocks now
        cluster.restartDataNode(dnProp);
        cluster.waitActive();
        // stop another dn: 9 internal blocks, but only cover 8 real one
        dnToStop = block.getLocations()[1];
        cluster.stopDataNode(dnToStop.getXferAddr());
        cluster.setDataNodeDead(dnToStop);
        // currently namenode is able to track the missing block. but restart NN
        cluster.restartNameNode(true);
        for (DataNode dn : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerBlockReport(dn);
        }
        FSNamesystem fsn = cluster.getNamesystem();
        BlockManager bm = fsn.getBlockManager();
        // wait 3 running cycles of redundancy monitor
        Thread.sleep(3000);
        for (DataNode dn : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerHeartbeat(dn);
        }
        // check if NN can detect the missing internal block and finish the
        // reconstruction
        StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
        boolean reconstructed = false;
        for (int i = 0; i < 5; i++) {
            NumberReplicas num = null;
            fsn.readLock();
            try {
                BlockInfo blockInfo = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile().getLastBlock();
                num = bm.countNodes(blockInfo);
            } finally {
                fsn.readUnlock();
            }
            if (num.liveReplicas() >= groupSize) {
                reconstructed = true;
                break;
            } else {
                Thread.sleep(1000);
            }
        }
        Assert.assertTrue(reconstructed);
        blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
        block = (LocatedStripedBlock) blks.getLastLocatedBlock();
        BitSet bitSet = new BitSet(groupSize);
        for (byte index : block.getBlockIndices()) {
            bitSet.set(index);
        }
        for (int i = 0; i < groupSize; i++) {
            Assert.assertTrue(bitSet.get(i));
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) BitSet(java.util.BitSet) NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) Test(org.junit.Test)

Example 24 with BlockManager

use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.

the class TestReconstructStripedBlocks method test2RecoveryTasksForSameBlockGroup.

@Test
public void test2RecoveryTasksForSameBlockGroup() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1000);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1000);
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
    try {
        cluster.waitActive();
        DistributedFileSystem fs = cluster.getFileSystem();
        BlockManager bm = cluster.getNamesystem().getBlockManager();
        fs.getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName());
        int fileLen = dataBlocks * blockSize;
        Path p = new Path("/test2RecoveryTasksForSameBlockGroup");
        final byte[] data = new byte[fileLen];
        DFSTestUtil.writeFile(fs, p, data);
        LocatedStripedBlock lb = (LocatedStripedBlock) fs.getClient().getLocatedBlocks(p.toString(), 0).get(0);
        LocatedBlock[] lbs = StripedBlockUtil.parseStripedBlockGroup(lb, cellSize, dataBlocks, parityBlocks);
        assertEquals(0, getNumberOfBlocksToBeErasureCoded(cluster));
        assertEquals(0, bm.getPendingReconstructionBlocksCount());
        // missing 1 block, so 1 task should be scheduled
        DatanodeInfo dn0 = lbs[0].getLocations()[0];
        cluster.stopDataNode(dn0.getName());
        cluster.setDataNodeDead(dn0);
        BlockManagerTestUtil.getComputedDatanodeWork(bm);
        assertEquals(1, getNumberOfBlocksToBeErasureCoded(cluster));
        assertEquals(1, bm.getPendingReconstructionBlocksCount());
        // missing another block, but no new task should be scheduled because
        // previous task isn't finished.
        DatanodeInfo dn1 = lbs[1].getLocations()[0];
        cluster.stopDataNode(dn1.getName());
        cluster.setDataNodeDead(dn1);
        BlockManagerTestUtil.getComputedDatanodeWork(bm);
        assertEquals(1, getNumberOfBlocksToBeErasureCoded(cluster));
        assertEquals(1, bm.getPendingReconstructionBlocksCount());
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) Test(org.junit.Test)

Example 25 with BlockManager

use of org.apache.hadoop.hdfs.server.blockmanagement.BlockManager in project hadoop by apache.

the class NamenodeWebHdfsMethods method chooseDatanode.

@VisibleForTesting
static DatanodeInfo chooseDatanode(final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset, final long blocksize, final String excludeDatanodes, final String remoteAddr) throws IOException {
    FSNamesystem fsn = namenode.getNamesystem();
    if (fsn == null) {
        throw new IOException("Namesystem has not been intialized yet.");
    }
    final BlockManager bm = fsn.getBlockManager();
    HashSet<Node> excludes = new HashSet<Node>();
    if (excludeDatanodes != null) {
        for (String host : StringUtils.getTrimmedStringCollection(excludeDatanodes)) {
            int idx = host.indexOf(":");
            if (idx != -1) {
                excludes.add(bm.getDatanodeManager().getDatanodeByXferAddr(host.substring(0, idx), Integer.parseInt(host.substring(idx + 1))));
            } else {
                excludes.add(bm.getDatanodeManager().getDatanodeByHost(host));
            }
        }
    }
    if (op == PutOpParam.Op.CREATE) {
        //choose a datanode near to client 
        final DatanodeDescriptor clientNode = bm.getDatanodeManager().getDatanodeByHost(remoteAddr);
        if (clientNode != null) {
            final DatanodeStorageInfo[] storages = bm.chooseTarget4WebHDFS(path, clientNode, excludes, blocksize);
            if (storages.length > 0) {
                return storages[0].getDatanodeDescriptor();
            }
        }
    } else if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) {
        //choose a datanode containing a replica 
        final NamenodeProtocols np = getRPCServer(namenode);
        final HdfsFileStatus status = np.getFileInfo(path);
        if (status == null) {
            throw new FileNotFoundException("File " + path + " not found.");
        }
        final long len = status.getLen();
        if (op == GetOpParam.Op.OPEN) {
            if (openOffset < 0L || (openOffset >= len && len > 0)) {
                throw new IOException("Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path);
            }
        }
        if (len > 0) {
            final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
            final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
            final int count = locations.locatedBlockCount();
            if (count > 0) {
                return bestNode(locations.get(0).getLocations(), excludes);
            }
        }
    }
    return (DatanodeDescriptor) bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT, excludes);
}
Also used : NamenodeProtocols(org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) Node(org.apache.hadoop.net.Node) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) HashSet(java.util.HashSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)47 Test (org.junit.Test)33 Path (org.apache.hadoop.fs.Path)21 BlockInfo (org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo)13 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)12 IOException (java.io.IOException)11 Configuration (org.apache.hadoop.conf.Configuration)11 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)11 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)11 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)10 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)9 Block (org.apache.hadoop.hdfs.protocol.Block)8 FileNotFoundException (java.io.FileNotFoundException)7 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)7 LocatedStripedBlock (org.apache.hadoop.hdfs.protocol.LocatedStripedBlock)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)6 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)6 DatanodeManager (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager)6