Search in sources :

Example 11 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project kitten by cloudera.

the class HDFSFileFinder method getNumBytesOfGlobHeldByDatanodes.

public static Map<String, Long> getNumBytesOfGlobHeldByDatanodes(Path p, Configuration conf) throws IOException {
    FileSystem fs = p.getFileSystem(conf);
    HashMap<String, Long> bytesHeld = Maps.newHashMap();
    for (FileStatus f : fs.globStatus(p)) {
        BlockLocation[] bls = fs.getFileBlockLocations(p, 0, f.getLen());
        if (bls.length > 0) {
            for (BlockLocation bl : bls) {
                long l = bl.getLength();
                for (String name : bl.getNames()) {
                    if (bytesHeld.containsKey(name))
                        bytesHeld.put(name, bytesHeld.get(name) + l);
                    else
                        bytesHeld.put(name, l);
                }
            }
        }
    }
    return bytesHeld;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 12 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class InputStriper method splitFor.

/**
   * @param inputDir Pool used to resolve block locations.
   * @param bytes Target byte count
   * @param nLocs Number of block locations per split.
   * @return A set of files satisfying the byte count, with locations weighted
   *         to the dominating proportion of input bytes.
   */
CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs) throws IOException {
    final ArrayList<Path> paths = new ArrayList<Path>();
    final ArrayList<Long> start = new ArrayList<Long>();
    final ArrayList<Long> length = new ArrayList<Long>();
    final HashMap<String, Double> sb = new HashMap<String, Double>();
    do {
        paths.add(current.getPath());
        start.add(currentStart);
        final long fromFile = Math.min(bytes, current.getLen() - currentStart);
        length.add(fromFile);
        for (BlockLocation loc : inputDir.locationsFor(current, currentStart, fromFile)) {
            final double tedium = loc.getLength() / (1.0 * bytes);
            for (String l : loc.getHosts()) {
                Double j = sb.get(l);
                if (null == j) {
                    sb.put(l, tedium);
                } else {
                    sb.put(l, j.doubleValue() + tedium);
                }
            }
        }
        currentStart += fromFile;
        bytes -= fromFile;
        // Switch to a new file if
        //  - the current file is uncompressed and completely used
        //  - the current file is compressed
        CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
        CompressionCodec codec = compressionCodecs.getCodec(current.getPath());
        if (current.getLen() - currentStart == 0 || codec != null) {
            current = files.get(++idx % files.size());
            currentStart = 0;
        }
    } while (bytes > 0);
    final ArrayList<Entry<String, Double>> sort = new ArrayList<Entry<String, Double>>(sb.entrySet());
    Collections.sort(sort, hostRank);
    final String[] hosts = new String[Math.min(nLocs, sort.size())];
    for (int i = 0; i < nLocs && i < sort.size(); ++i) {
        hosts[i] = sort.get(i).getKey();
    }
    return new CombineFileSplit(paths.toArray(new Path[0]), toLongArray(start), toLongArray(length), hosts);
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CombineFileSplit(org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) BlockLocation(org.apache.hadoop.fs.BlockLocation) Entry(java.util.Map.Entry) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 13 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestFilePool method testStriper.

@Test
public void testStriper() throws Exception {
    final Random r = new Random();
    final Configuration conf = new Configuration();
    final FileSystem fs = FileSystem.getLocal(conf).getRaw();
    conf.setLong(FilePool.GRIDMIX_MIN_FILE, 3 * 1024);
    final FilePool pool = new FilePool(conf, base) {

        @Override
        public BlockLocation[] locationsFor(FileStatus stat, long start, long len) throws IOException {
            return new BlockLocation[] { new BlockLocation() };
        }
    };
    pool.refresh();
    final int expectedPoolSize = (NFILES / 2 * (NFILES / 2 + 1) - 6) * 1024;
    final InputStriper striper = new InputStriper(pool, expectedPoolSize);
    int last = 0;
    for (int i = 0; i < expectedPoolSize; last = Math.min(expectedPoolSize - i, r.nextInt(expectedPoolSize))) {
        checkSplitEq(fs, striper.splitFor(pool, last, 0), last);
        i += last;
    }
    final InputStriper striper2 = new InputStriper(pool, expectedPoolSize);
    checkSplitEq(fs, striper2.splitFor(pool, expectedPoolSize, 0), expectedPoolSize);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) Random(java.util.Random) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test)

Example 14 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class MultiFileSplit method getLocations.

public String[] getLocations() throws IOException {
    HashSet<String> hostSet = new HashSet<String>();
    for (Path file : getPaths()) {
        FileSystem fs = file.getFileSystem(getJob());
        FileStatus status = fs.getFileStatus(file);
        BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, status.getLen());
        if (blkLocations != null && blkLocations.length > 0) {
            addToSet(hostSet, blkLocations[0].getHosts());
        }
    }
    return hostSet.toArray(new String[hostSet.size()]);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) BlockLocation(org.apache.hadoop.fs.BlockLocation) HashSet(java.util.HashSet)

Example 15 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class FileInputFormat method getBlockIndex.

protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
    for (int i = 0; i < blkLocations.length; i++) {
        // is the offset inside this block?
        if ((blkLocations[i].getOffset() <= offset) && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
            return i;
        }
    }
    BlockLocation last = blkLocations[blkLocations.length - 1];
    long fileLength = last.getOffset() + last.getLength() - 1;
    throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")");
}
Also used : BlockLocation(org.apache.hadoop.fs.BlockLocation)

Aggregations

BlockLocation (org.apache.hadoop.fs.BlockLocation)88 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)30 Test (org.junit.Test)29 FileSystem (org.apache.hadoop.fs.FileSystem)16 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)14 IOException (java.io.IOException)10 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)10 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)7 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)7 InetSocketAddress (java.net.InetSocketAddress)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5 IgfsBlockLocation (org.apache.ignite.igfs.IgfsBlockLocation)5 IgfsPath (org.apache.ignite.igfs.IgfsPath)5 HashMap (java.util.HashMap)4 Random (java.util.Random)4