Search in sources :

Example 41 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project drill by apache.

the class TestAffinityCalculator method buildBlockLocations2.

public BlockLocation[] buildBlockLocations2(String[] hosts, long blockSize) {
    String[] names = new String[hosts.length];
    for (int i = 0; i < hosts.length; i++) {
        hosts[i] = "host" + i;
        names[i] = "host:" + port;
    }
    BlockLocation[] blockLocations = new BlockLocation[4];
    blockLocations[0] = new BlockLocation(new String[] { names[0] }, new String[] { hosts[0] }, 0, blockSize);
    blockLocations[1] = new BlockLocation(new String[] { names[1] }, new String[] { hosts[1] }, blockSize, blockSize);
    blockLocations[3] = new BlockLocation(new String[] { names[3] }, new String[] { hosts[3] }, blockSize * 2, blockSize);
    blockLocations[2] = new BlockLocation(new String[] { names[2] }, new String[] { hosts[2] }, blockSize * 3, blockSize);
    return blockLocations;
}
Also used : BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 42 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project drill by apache.

the class TestAffinityCalculator method buildBlockLocations.

public BlockLocation[] buildBlockLocations(String[] hosts, long blockSize) {
    String[] names = new String[hosts.length];
    for (int i = 0; i < hosts.length; i++) {
        hosts[i] = "host" + i;
        names[i] = "host:" + port;
    }
    BlockLocation[] blockLocations = new BlockLocation[3];
    blockLocations[0] = new BlockLocation(new String[] { names[0], names[1], names[2] }, new String[] { hosts[0], hosts[1], hosts[2] }, 0, blockSize);
    blockLocations[1] = new BlockLocation(new String[] { names[0], names[2], names[3] }, new String[] { hosts[0], hosts[2], hosts[3] }, blockSize, blockSize);
    blockLocations[2] = new BlockLocation(new String[] { names[0], names[1], names[3] }, new String[] { hosts[0], hosts[1], hosts[3] }, blockSize * 2, blockSize);
    return blockLocations;
}
Also used : BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 43 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project drill by apache.

the class BlockMapBuilder method buildBlockMap.

/**
   * Builds a mapping of block locations to file byte range
   */
private ImmutableRangeMap<Long, BlockLocation> buildBlockMap(FileStatus status) throws IOException {
    final Timer.Context context = metrics.timer(BLOCK_MAP_BUILDER_TIMER).time();
    BlockLocation[] blocks;
    ImmutableRangeMap<Long, BlockLocation> blockMap;
    blocks = fs.getFileBlockLocations(status, 0, status.getLen());
    ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
    for (BlockLocation block : blocks) {
        long start = block.getOffset();
        long end = start + block.getLength();
        Range<Long> range = Range.closedOpen(start, end);
        blockMapBuilder = blockMapBuilder.put(range, block);
    }
    blockMap = blockMapBuilder.build();
    blockMapMap.put(status.getPath(), blockMap);
    context.stop();
    return blockMap;
}
Also used : Timer(com.codahale.metrics.Timer) ImmutableRangeMap(com.google.common.collect.ImmutableRangeMap) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 44 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project drill by apache.

the class BlockMapBuilder method getEndpointByteMap.

/**
   * For a given FileWork, calculate how many bytes are available on each on drillbit endpoint
   *
   * @param work the FileWork to calculate endpoint bytes for
   * @throws IOException
   */
public EndpointByteMap getEndpointByteMap(FileWork work) throws IOException {
    Stopwatch watch = Stopwatch.createStarted();
    Path fileName = new Path(work.getPath());
    ImmutableRangeMap<Long, BlockLocation> blockMap = getBlockMap(fileName);
    EndpointByteMapImpl endpointByteMap = new EndpointByteMapImpl();
    long start = work.getStart();
    long end = start + work.getLength();
    Range<Long> rowGroupRange = Range.closedOpen(start, end);
    // Find submap of ranges that intersect with the rowGroup
    ImmutableRangeMap<Long, BlockLocation> subRangeMap = blockMap.subRangeMap(rowGroupRange);
    // Iterate through each block in this submap and get the host for the block location
    for (Map.Entry<Range<Long>, BlockLocation> block : subRangeMap.asMapOfRanges().entrySet()) {
        String[] hosts;
        Range<Long> blockRange = block.getKey();
        try {
            hosts = block.getValue().getHosts();
        } catch (IOException ioe) {
            throw new RuntimeException("Failed to get hosts for block location", ioe);
        }
        Range<Long> intersection = rowGroupRange.intersection(blockRange);
        long bytes = intersection.upperEndpoint() - intersection.lowerEndpoint();
        // For each host in the current block location, add the intersecting bytes to the corresponding endpoint
        for (String host : hosts) {
            DrillbitEndpoint endpoint = getDrillBitEndpoint(host);
            if (endpoint != null) {
                endpointByteMap.add(endpoint, bytes);
            } else {
                logger.info("Failure finding Drillbit running on host {}.  Skipping affinity to that host.", host);
            }
        }
    }
    logger.debug("FileWork group ({},{}) max bytes {}", work.getPath(), work.getStart(), endpointByteMap.getMaxBytes());
    logger.debug("Took {} ms to set endpoint bytes", watch.stop().elapsed(TimeUnit.MILLISECONDS));
    return endpointByteMap;
}
Also used : Path(org.apache.hadoop.fs.Path) Stopwatch(com.google.common.base.Stopwatch) IOException(java.io.IOException) BlockLocation(org.apache.hadoop.fs.BlockLocation) Range(com.google.common.collect.Range) DrillbitEndpoint(org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint) HashMap(java.util.HashMap) ImmutableRangeMap(com.google.common.collect.ImmutableRangeMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 45 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project incubator-systemml by apache.

the class RemoteParForColocatedFileSplit method getLocations.

/**
 * Get the list of hostnames where the input split is located.
 */
@Override
public String[] getLocations() throws IOException {
    // Timing time = new Timing();
    // time.start();
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = IOUtilFunctions.getFileSystem(getPath(), job);
    // read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = null;
    try {
        reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
        reader.next(key, value);
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }
    // parse task
    Task t = Task.parseCompactString(value.toString());
    // get all locations
    HashMap<String, Integer> hosts = new HashMap<>();
    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts());
        }
    } else // TaskType.RANGE
    {
        // since this is a serial process, we use just the first iteration
        // as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts());
        }
    }
    // majority consensus on top host
    return getTopHosts(hosts);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) BlockLocation(org.apache.hadoop.fs.BlockLocation) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) FileSystem(org.apache.hadoop.fs.FileSystem) NLineInputFormat(org.apache.hadoop.mapred.lib.NLineInputFormat) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

BlockLocation (org.apache.hadoop.fs.BlockLocation)88 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)30 Test (org.junit.Test)29 FileSystem (org.apache.hadoop.fs.FileSystem)16 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)14 IOException (java.io.IOException)10 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)10 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)7 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)7 InetSocketAddress (java.net.InetSocketAddress)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5 IgfsBlockLocation (org.apache.ignite.igfs.IgfsBlockLocation)5 IgfsPath (org.apache.ignite.igfs.IgfsPath)5 HashMap (java.util.HashMap)4 Random (java.util.Random)4