Search in sources :

Example 1 with CachePool

use of org.apache.hadoop.hdfs.server.namenode.CachePool in project hadoop by apache.

the class CacheReplicationMonitor method rescanFile.

/**
   * Apply a CacheDirective to a file.
   * 
   * @param directive The CacheDirective to apply.
   * @param file The file.
   */
private void rescanFile(CacheDirective directive, INodeFile file) {
    BlockInfo[] blockInfos = file.getBlocks();
    // Increment the "needed" statistics
    directive.addFilesNeeded(1);
    // We don't cache UC blocks, don't add them to the total here
    long neededTotal = file.computeFileSizeNotIncludingLastUcBlock() * directive.getReplication();
    directive.addBytesNeeded(neededTotal);
    // The pool's bytesNeeded is incremented as we scan. If the demand
    // thus far plus the demand of this file would exceed the pool's limit,
    // do not cache this file.
    CachePool pool = directive.getPool();
    if (pool.getBytesNeeded() > pool.getLimit()) {
        LOG.debug("Directive {}: not scanning file {} because " + "bytesNeeded for pool {} is {}, but the pool's limit is {}", directive.getId(), file.getFullPathName(), pool.getPoolName(), pool.getBytesNeeded(), pool.getLimit());
        return;
    }
    long cachedTotal = 0;
    for (BlockInfo blockInfo : blockInfos) {
        if (!blockInfo.getBlockUCState().equals(BlockUCState.COMPLETE)) {
            // We don't try to cache blocks that are under construction.
            LOG.trace("Directive {}: can't cache block {} because it is in state " + "{}, not COMPLETE.", directive.getId(), blockInfo, blockInfo.getBlockUCState());
            continue;
        }
        Block block = new Block(blockInfo.getBlockId());
        CachedBlock ncblock = new CachedBlock(block.getBlockId(), directive.getReplication(), mark);
        CachedBlock ocblock = cachedBlocks.get(ncblock);
        if (ocblock == null) {
            cachedBlocks.put(ncblock);
            ocblock = ncblock;
        } else {
            // Update bytesUsed using the current replication levels.
            // Assumptions: we assume that all the blocks are the same length
            // on each datanode.  We can assume this because we're only caching
            // blocks in state COMPLETE.
            // Note that if two directives are caching the same block(s), they will
            // both get them added to their bytesCached.
            List<DatanodeDescriptor> cachedOn = ocblock.getDatanodes(Type.CACHED);
            long cachedByBlock = Math.min(cachedOn.size(), directive.getReplication()) * blockInfo.getNumBytes();
            cachedTotal += cachedByBlock;
            if ((mark != ocblock.getMark()) || (ocblock.getReplication() < directive.getReplication())) {
                //
                // Overwrite the block's replication and mark in two cases:
                //
                // 1. If the mark on the CachedBlock is different from the mark for
                // this scan, that means the block hasn't been updated during this
                // scan, and we should overwrite whatever is there, since it is no
                // longer valid.
                //
                // 2. If the replication in the CachedBlock is less than what the
                // directive asks for, we want to increase the block's replication
                // field to what the directive asks for.
                //
                ocblock.setReplicationAndMark(directive.getReplication(), mark);
            }
        }
        LOG.trace("Directive {}: setting replication for block {} to {}", directive.getId(), blockInfo, ocblock.getReplication());
    }
    // Increment the "cached" statistics
    directive.addBytesCached(cachedTotal);
    if (cachedTotal == neededTotal) {
        directive.addFilesCached(1);
    }
    LOG.debug("Directive {}: caching {}: {}/{} bytes", directive.getId(), file.getFullPathName(), cachedTotal, neededTotal);
}
Also used : CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) CachedBlock(org.apache.hadoop.hdfs.server.namenode.CachedBlock) Block(org.apache.hadoop.hdfs.protocol.Block) CachePool(org.apache.hadoop.hdfs.server.namenode.CachePool)

Aggregations

Block (org.apache.hadoop.hdfs.protocol.Block)1 CachePool (org.apache.hadoop.hdfs.server.namenode.CachePool)1 CachedBlock (org.apache.hadoop.hdfs.server.namenode.CachedBlock)1