use of org.apache.hadoop.fs.BlockLocation in project kitten by cloudera.
the class HDFSFileFinder method getNumBytesOfGlobHeldByDatanodes.
public static Map<String, Long> getNumBytesOfGlobHeldByDatanodes(Path p, Configuration conf) throws IOException {
FileSystem fs = p.getFileSystem(conf);
HashMap<String, Long> bytesHeld = Maps.newHashMap();
for (FileStatus f : fs.globStatus(p)) {
BlockLocation[] bls = fs.getFileBlockLocations(p, 0, f.getLen());
if (bls.length > 0) {
for (BlockLocation bl : bls) {
long l = bl.getLength();
for (String name : bl.getNames()) {
if (bytesHeld.containsKey(name))
bytesHeld.put(name, bytesHeld.get(name) + l);
else
bytesHeld.put(name, l);
}
}
}
}
return bytesHeld;
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class InputStriper method splitFor.
/**
* @param inputDir Pool used to resolve block locations.
* @param bytes Target byte count
* @param nLocs Number of block locations per split.
* @return A set of files satisfying the byte count, with locations weighted
* to the dominating proportion of input bytes.
*/
CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs) throws IOException {
final ArrayList<Path> paths = new ArrayList<Path>();
final ArrayList<Long> start = new ArrayList<Long>();
final ArrayList<Long> length = new ArrayList<Long>();
final HashMap<String, Double> sb = new HashMap<String, Double>();
do {
paths.add(current.getPath());
start.add(currentStart);
final long fromFile = Math.min(bytes, current.getLen() - currentStart);
length.add(fromFile);
for (BlockLocation loc : inputDir.locationsFor(current, currentStart, fromFile)) {
final double tedium = loc.getLength() / (1.0 * bytes);
for (String l : loc.getHosts()) {
Double j = sb.get(l);
if (null == j) {
sb.put(l, tedium);
} else {
sb.put(l, j.doubleValue() + tedium);
}
}
}
currentStart += fromFile;
bytes -= fromFile;
// Switch to a new file if
// - the current file is uncompressed and completely used
// - the current file is compressed
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
CompressionCodec codec = compressionCodecs.getCodec(current.getPath());
if (current.getLen() - currentStart == 0 || codec != null) {
current = files.get(++idx % files.size());
currentStart = 0;
}
} while (bytes > 0);
final ArrayList<Entry<String, Double>> sort = new ArrayList<Entry<String, Double>>(sb.entrySet());
Collections.sort(sort, hostRank);
final String[] hosts = new String[Math.min(nLocs, sort.size())];
for (int i = 0; i < nLocs && i < sort.size(); ++i) {
hosts[i] = sort.get(i).getKey();
}
return new CombineFileSplit(paths.toArray(new Path[0]), toLongArray(start), toLongArray(length), hosts);
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestFilePool method testStriper.
@Test
public void testStriper() throws Exception {
final Random r = new Random();
final Configuration conf = new Configuration();
final FileSystem fs = FileSystem.getLocal(conf).getRaw();
conf.setLong(FilePool.GRIDMIX_MIN_FILE, 3 * 1024);
final FilePool pool = new FilePool(conf, base) {
@Override
public BlockLocation[] locationsFor(FileStatus stat, long start, long len) throws IOException {
return new BlockLocation[] { new BlockLocation() };
}
};
pool.refresh();
final int expectedPoolSize = (NFILES / 2 * (NFILES / 2 + 1) - 6) * 1024;
final InputStriper striper = new InputStriper(pool, expectedPoolSize);
int last = 0;
for (int i = 0; i < expectedPoolSize; last = Math.min(expectedPoolSize - i, r.nextInt(expectedPoolSize))) {
checkSplitEq(fs, striper.splitFor(pool, last, 0), last);
i += last;
}
final InputStriper striper2 = new InputStriper(pool, expectedPoolSize);
checkSplitEq(fs, striper2.splitFor(pool, expectedPoolSize, 0), expectedPoolSize);
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class MultiFileSplit method getLocations.
public String[] getLocations() throws IOException {
HashSet<String> hostSet = new HashSet<String>();
for (Path file : getPaths()) {
FileSystem fs = file.getFileSystem(getJob());
FileStatus status = fs.getFileStatus(file);
BlockLocation[] blkLocations = fs.getFileBlockLocations(status, 0, status.getLen());
if (blkLocations != null && blkLocations.length > 0) {
addToSet(hostSet, blkLocations[0].getHosts());
}
}
return hostSet.toArray(new String[hostSet.size()]);
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class FileInputFormat method getBlockIndex.
protected int getBlockIndex(BlockLocation[] blkLocations, long offset) {
for (int i = 0; i < blkLocations.length; i++) {
// is the offset inside this block?
if ((blkLocations[i].getOffset() <= offset) && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) {
return i;
}
}
BlockLocation last = blkLocations[blkLocations.length - 1];
long fileLength = last.getOffset() + last.getLength() - 1;
throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")");
}
Aggregations