Search in sources :

Example 1 with CachedBlock

use of org.apache.hadoop.hdfs.server.namenode.CachedBlock in project hadoop by apache.

the class CacheReplicationMonitor method rescanCachedBlockMap.

/**
   * Scan through the cached block map.
   * Any blocks which are under-replicated should be assigned new Datanodes.
   * Blocks that are over-replicated should be removed from Datanodes.
   */
private void rescanCachedBlockMap() {
    // Remove pendingCached blocks that will make DN out-of-capacity.
    Set<DatanodeDescriptor> datanodes = blockManager.getDatanodeManager().getDatanodes();
    for (DatanodeDescriptor dn : datanodes) {
        long remaining = dn.getCacheRemaining();
        for (Iterator<CachedBlock> it = dn.getPendingCached().iterator(); it.hasNext(); ) {
            CachedBlock cblock = it.next();
            BlockInfo blockInfo = blockManager.getStoredBlock(new Block(cblock.getBlockId()));
            if (blockInfo == null) {
                // Cannot find this block on the NameNode, skip this block from
                // capacity calculation. Later logic will handle this block.
                LOG.debug("Block {}: cannot be found in block manager and hence" + " skipped from calculation for node {}.", cblock.getBlockId(), dn.getDatanodeUuid());
                continue;
            }
            if (blockInfo.getNumBytes() > remaining) {
                LOG.debug("Block {}: removing from PENDING_CACHED for node {} " + "because it cannot fit in remaining cache size {}.", cblock.getBlockId(), dn.getDatanodeUuid(), remaining);
                it.remove();
            } else {
                remaining -= blockInfo.getNumBytes();
            }
        }
    }
    for (Iterator<CachedBlock> cbIter = cachedBlocks.iterator(); cbIter.hasNext(); ) {
        scannedBlocks++;
        CachedBlock cblock = cbIter.next();
        List<DatanodeDescriptor> pendingCached = cblock.getDatanodes(Type.PENDING_CACHED);
        List<DatanodeDescriptor> cached = cblock.getDatanodes(Type.CACHED);
        List<DatanodeDescriptor> pendingUncached = cblock.getDatanodes(Type.PENDING_UNCACHED);
        // Remove nodes from PENDING_UNCACHED if they were actually uncached.
        for (Iterator<DatanodeDescriptor> iter = pendingUncached.iterator(); iter.hasNext(); ) {
            DatanodeDescriptor datanode = iter.next();
            if (!cblock.isInList(datanode.getCached())) {
                LOG.trace("Block {}: removing from PENDING_UNCACHED for node {} " + "because the DataNode uncached it.", cblock.getBlockId(), datanode.getDatanodeUuid());
                datanode.getPendingUncached().remove(cblock);
                iter.remove();
            }
        }
        BlockInfo blockInfo = blockManager.getStoredBlock(new Block(cblock.getBlockId()));
        String reason = findReasonForNotCaching(cblock, blockInfo);
        int neededCached = 0;
        if (reason != null) {
            LOG.trace("Block {}: can't cache block because it is {}", cblock.getBlockId(), reason);
        } else {
            neededCached = cblock.getReplication();
        }
        int numCached = cached.size();
        if (numCached >= neededCached) {
            // If we have enough replicas, drop all pending cached.
            for (Iterator<DatanodeDescriptor> iter = pendingCached.iterator(); iter.hasNext(); ) {
                DatanodeDescriptor datanode = iter.next();
                datanode.getPendingCached().remove(cblock);
                iter.remove();
                LOG.trace("Block {}: removing from PENDING_CACHED for node {} " + "because we already have {} cached replicas and we only" + " need {}", cblock.getBlockId(), datanode.getDatanodeUuid(), numCached, neededCached);
            }
        }
        if (numCached < neededCached) {
            // If we don't have enough replicas, drop all pending uncached.
            for (Iterator<DatanodeDescriptor> iter = pendingUncached.iterator(); iter.hasNext(); ) {
                DatanodeDescriptor datanode = iter.next();
                datanode.getPendingUncached().remove(cblock);
                iter.remove();
                LOG.trace("Block {}: removing from PENDING_UNCACHED for node {} " + "because we only have {} cached replicas and we need " + "{}", cblock.getBlockId(), datanode.getDatanodeUuid(), numCached, neededCached);
            }
        }
        int neededUncached = numCached - (pendingUncached.size() + neededCached);
        if (neededUncached > 0) {
            addNewPendingUncached(neededUncached, cblock, cached, pendingUncached);
        } else {
            int additionalCachedNeeded = neededCached - (numCached + pendingCached.size());
            if (additionalCachedNeeded > 0) {
                addNewPendingCached(additionalCachedNeeded, cblock, cached, pendingCached);
            }
        }
        if ((neededCached == 0) && pendingUncached.isEmpty() && pendingCached.isEmpty()) {
            // we have nothing more to do with this block.
            LOG.trace("Block {}: removing from cachedBlocks, since neededCached " + "== 0, and pendingUncached and pendingCached are empty.", cblock.getBlockId());
            cbIter.remove();
        }
    }
}
Also used : CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) Block(org.apache.hadoop.hdfs.protocol.Block)

Example 2 with CachedBlock

use of org.apache.hadoop.hdfs.server.namenode.CachedBlock in project hadoop by apache.

the class DatanodeManager method getCacheCommand.

/**
   * Convert a CachedBlockList into a DatanodeCommand with a list of blocks.
   *
   * @param list       The {@link CachedBlocksList}.  This function 
   *                   clears the list.
   * @param action     The action to perform in the command.
   * @param poolId     The block pool id.
   * @return           A DatanodeCommand to be sent back to the DN, or null if
   *                   there is nothing to be done.
   */
private DatanodeCommand getCacheCommand(CachedBlocksList list, int action, String poolId) {
    int length = list.size();
    if (length == 0) {
        return null;
    }
    // Read the existing cache commands.
    long[] blockIds = new long[length];
    int i = 0;
    for (CachedBlock cachedBlock : list) {
        blockIds[i++] = cachedBlock.getBlockId();
    }
    return new BlockIdCommand(action, poolId, blockIds);
}
Also used : CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock)

Example 3 with CachedBlock

use of org.apache.hadoop.hdfs.server.namenode.CachedBlock in project hadoop by apache.

the class TestCachedBlocksList method testSingleList.

@Test(timeout = 60000)
public void testSingleList() {
    DatanodeDescriptor dn = new DatanodeDescriptor(new DatanodeID("127.0.0.1", "localhost", "abcd", 5000, 5001, 5002, 5003));
    CachedBlock[] blocks = new CachedBlock[] { new CachedBlock(0L, (short) 1, true), new CachedBlock(1L, (short) 1, true), new CachedBlock(2L, (short) 1, true) };
    // check that lists are empty
    Assert.assertTrue("expected pending cached list to start off empty.", !dn.getPendingCached().iterator().hasNext());
    Assert.assertTrue("expected cached list to start off empty.", !dn.getCached().iterator().hasNext());
    Assert.assertTrue("expected pending uncached list to start off empty.", !dn.getPendingUncached().iterator().hasNext());
    // add a block to the back
    Assert.assertTrue(dn.getCached().add(blocks[0]));
    Assert.assertTrue("expected pending cached list to still be empty.", !dn.getPendingCached().iterator().hasNext());
    Assert.assertEquals("failed to insert blocks[0]", blocks[0], dn.getCached().iterator().next());
    Assert.assertTrue("expected pending uncached list to still be empty.", !dn.getPendingUncached().iterator().hasNext());
    // add another block to the back
    Assert.assertTrue(dn.getCached().add(blocks[1]));
    Iterator<CachedBlock> iter = dn.getCached().iterator();
    Assert.assertEquals(blocks[0], iter.next());
    Assert.assertEquals(blocks[1], iter.next());
    Assert.assertTrue(!iter.hasNext());
    // add a block to the front
    Assert.assertTrue(dn.getCached().addFirst(blocks[2]));
    iter = dn.getCached().iterator();
    Assert.assertEquals(blocks[2], iter.next());
    Assert.assertEquals(blocks[0], iter.next());
    Assert.assertEquals(blocks[1], iter.next());
    Assert.assertTrue(!iter.hasNext());
    // remove a block from the middle
    Assert.assertTrue(dn.getCached().remove(blocks[0]));
    iter = dn.getCached().iterator();
    Assert.assertEquals(blocks[2], iter.next());
    Assert.assertEquals(blocks[1], iter.next());
    Assert.assertTrue(!iter.hasNext());
    // remove all blocks
    dn.getCached().clear();
    Assert.assertTrue("expected cached list to be empty after clear.", !dn.getPendingCached().iterator().hasNext());
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) Test(org.junit.Test)

Example 4 with CachedBlock

use of org.apache.hadoop.hdfs.server.namenode.CachedBlock in project hadoop by apache.

the class TestCachedBlocksList method testMultipleLists.

@Test(timeout = 60000)
public void testMultipleLists() {
    DatanodeDescriptor[] datanodes = new DatanodeDescriptor[] { new DatanodeDescriptor(new DatanodeID("127.0.0.1", "localhost", "abcd", 5000, 5001, 5002, 5003)), new DatanodeDescriptor(new DatanodeID("127.0.1.1", "localhost", "efgh", 6000, 6001, 6002, 6003)) };
    CachedBlocksList[] lists = new CachedBlocksList[] { datanodes[0].getPendingCached(), datanodes[0].getCached(), datanodes[1].getPendingCached(), datanodes[1].getCached(), datanodes[1].getPendingUncached() };
    final int NUM_BLOCKS = 8000;
    CachedBlock[] blocks = new CachedBlock[NUM_BLOCKS];
    for (int i = 0; i < NUM_BLOCKS; i++) {
        blocks[i] = new CachedBlock(i, (short) i, true);
    }
    Random r = new Random(654);
    for (CachedBlocksList list : lists) {
        testAddElementsToList(list, blocks);
    }
    for (CachedBlocksList list : lists) {
        testRemoveElementsFromList(r, list, blocks);
    }
}
Also used : DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeID(org.apache.hadoop.hdfs.protocol.DatanodeID) CachedBlocksList(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList) Random(java.util.Random) CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) Test(org.junit.Test)

Example 5 with CachedBlock

use of org.apache.hadoop.hdfs.server.namenode.CachedBlock in project hadoop by apache.

the class CacheReplicationMonitor method addNewPendingCached.

/**
   * Add new entries to the PendingCached list.
   *
   * @param neededCached     The number of replicas that need to be cached.
   * @param cachedBlock      The block which needs to be cached.
   * @param cached           A list of DataNodes currently caching the block.
   * @param pendingCached    A list of DataNodes that will soon cache the
   *                         block.
   */
private void addNewPendingCached(final int neededCached, CachedBlock cachedBlock, List<DatanodeDescriptor> cached, List<DatanodeDescriptor> pendingCached) {
    // To figure out which replicas can be cached, we consult the
    // blocksMap.  We don't want to try to cache a corrupt replica, though.
    BlockInfo blockInfo = blockManager.getStoredBlock(new Block(cachedBlock.getBlockId()));
    if (blockInfo == null) {
        LOG.debug("Block {}: can't add new cached replicas," + " because there is no record of this block " + "on the NameNode.", cachedBlock.getBlockId());
        return;
    }
    if (!blockInfo.isComplete()) {
        LOG.debug("Block {}: can't cache this block, because it is not yet" + " complete.", cachedBlock.getBlockId());
        return;
    }
    // Filter the list of replicas to only the valid targets
    List<DatanodeDescriptor> possibilities = new LinkedList<DatanodeDescriptor>();
    int numReplicas = blockInfo.getCapacity();
    Collection<DatanodeDescriptor> corrupt = blockManager.getCorruptReplicas(blockInfo);
    int outOfCapacity = 0;
    for (int i = 0; i < numReplicas; i++) {
        DatanodeDescriptor datanode = blockInfo.getDatanode(i);
        if (datanode == null) {
            continue;
        }
        if (!datanode.isInService()) {
            continue;
        }
        if (corrupt != null && corrupt.contains(datanode)) {
            continue;
        }
        if (pendingCached.contains(datanode) || cached.contains(datanode)) {
            continue;
        }
        long pendingBytes = 0;
        // Subtract pending cached blocks from effective capacity
        Iterator<CachedBlock> it = datanode.getPendingCached().iterator();
        while (it.hasNext()) {
            CachedBlock cBlock = it.next();
            BlockInfo info = blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
            if (info != null) {
                pendingBytes -= info.getNumBytes();
            }
        }
        it = datanode.getPendingUncached().iterator();
        // Add pending uncached blocks from effective capacity
        while (it.hasNext()) {
            CachedBlock cBlock = it.next();
            BlockInfo info = blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
            if (info != null) {
                pendingBytes += info.getNumBytes();
            }
        }
        long pendingCapacity = pendingBytes + datanode.getCacheRemaining();
        if (pendingCapacity < blockInfo.getNumBytes()) {
            LOG.trace("Block {}: DataNode {} is not a valid possibility " + "because the block has size {}, but the DataNode only has {} " + "bytes of cache remaining ({} pending bytes, {} already cached.)", blockInfo.getBlockId(), datanode.getDatanodeUuid(), blockInfo.getNumBytes(), pendingCapacity, pendingBytes, datanode.getCacheRemaining());
            outOfCapacity++;
            continue;
        }
        possibilities.add(datanode);
    }
    List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities, neededCached, blockManager.getDatanodeManager().getStaleInterval());
    for (DatanodeDescriptor datanode : chosen) {
        LOG.trace("Block {}: added to PENDING_CACHED on DataNode {}", blockInfo.getBlockId(), datanode.getDatanodeUuid());
        pendingCached.add(datanode);
        boolean added = datanode.getPendingCached().add(cachedBlock);
        assert added;
    }
    // We were unable to satisfy the requested replication factor
    if (neededCached > chosen.size()) {
        LOG.debug("Block {}: we only have {} of {} cached replicas." + " {} DataNodes have insufficient cache capacity.", blockInfo.getBlockId(), (cachedBlock.getReplication() - neededCached + chosen.size()), cachedBlock.getReplication(), outOfCapacity);
    }
}
Also used : CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) Block(org.apache.hadoop.hdfs.protocol.Block) LinkedList(java.util.LinkedList)

Aggregations

CachedBlock (org.apache.hadoop.hdfs.server.namenode.CachedBlock)6 Block (org.apache.hadoop.hdfs.protocol.Block)3 DatanodeID (org.apache.hadoop.hdfs.protocol.DatanodeID)2 DatanodeDescriptor (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor)2 Test (org.junit.Test)2 LinkedList (java.util.LinkedList)1 Random (java.util.Random)1 CachedBlocksList (org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList)1 CachePool (org.apache.hadoop.hdfs.server.namenode.CachePool)1