Search in sources :

Example 81 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project asterixdb by apache.

the class HDFSUtils method getSplits.

/**
     * Instead of creating the split using the input format, we do it manually
     * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
     * and the produced splits only cover intersection between current files in hdfs and files stored internally
     * in AsterixDB
     * 1. NoOp means appended file
     * 2. AddOp means new file
     * 3. UpdateOp means the delta of a file
     *
     * @return
     * @throws IOException
     */
public static InputSplit[] getSplits(JobConf conf, List<ExternalFile> files) throws IOException {
    // Create file system object
    FileSystem fs = FileSystem.get(conf);
    ArrayList<FileSplit> fileSplits = new ArrayList<>();
    ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<>();
    // Create files splits
    for (ExternalFile file : files) {
        Path filePath = new Path(file.getFileName());
        FileStatus fileStatus;
        try {
            fileStatus = fs.getFileStatus(filePath);
        } catch (FileNotFoundException e) {
            // file was deleted at some point, skip to next file
            continue;
        }
        if (file.getPendingOp() == ExternalFilePendingOp.ADD_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() < file.getSize()) {
                    fileSplits.add(new FileSplit(filePath, block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength() : (file.getSize() - block.getOffset()), block.getHosts()));
                    orderedExternalFiles.add(file);
                }
            }
        } else if (file.getPendingOp() == ExternalFilePendingOp.NO_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            long oldSize = 0L;
            long newSize = file.getSize();
            for (int i = 0; i < files.size(); i++) {
                if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) {
                    newSize = files.get(i).getSize();
                    oldSize = file.getSize();
                    break;
                }
            }
            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() + block.getLength() > oldSize) {
                    if (block.getOffset() < newSize) {
                        // Block interact with delta -> Create a split
                        long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
                        long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L : block.getOffset() + block.getLength() - newSize;
                        long splitLength = block.getLength() - startCut - endCut;
                        fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength, block.getHosts()));
                        orderedExternalFiles.add(file);
                    }
                }
            }
        }
    }
    fs.close();
    files.clear();
    files.addAll(orderedExternalFiles);
    return fileSplits.toArray(new FileSplit[fileSplits.size()]);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) FileSplit(org.apache.hadoop.mapred.FileSplit) BlockLocation(org.apache.hadoop.fs.BlockLocation) ExternalFile(org.apache.asterix.external.indexing.ExternalFile)

Example 82 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project drill by apache.

the class TestAffinityCalculator method testBuildRangeMap.

//  @Test
//  public void testSetEndpointBytes(@Injectable final FileSystem fs, @Injectable final FileStatus file) throws Throwable{
//    final long blockSize = 256*1024*1024;
//    LinkedList<ParquetGroupScan.RowGroupInfo> rowGroups = new LinkedList<>();
//    int numberOfHosts = 4;
//    int numberOfBlocks = 3;
//    String port = "1234";
//    String[] hosts = new String[numberOfHosts];
//
//    final BlockLocation[] blockLocations = buildBlockLocations(hosts, blockSize);
//    final LinkedList<CoordinationProtos.DrillbitEndpoint> endPoints = buildEndpoints(numberOfHosts);
//    buildRowGroups(rowGroups, numberOfBlocks, blockSize, 3);
//
//    new NonStrictExpectations() {{
//      fs.getFileBlockLocations(file, 0, 3*blockSize); result = blockLocations;
//      fs.getFileStatus(new Path(path)); result = file;
//      file.getLen(); result = 3*blockSize;
//    }};
//
//
//    BlockMapBuilder ac = new BlockMapBuilder(fs, endPoints);
//    for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
//      ac.setEndpointBytes(rowGroup);
//    }
//    ParquetGroupScan.RowGroupInfo rg = rowGroups.get(0);
//    Long b = rg.getEndpointBytes().get(endPoints.get(0));
//    assertEquals(blockSize,b.longValue());
//    b = rg.getEndpointBytes().get(endPoints.get(3));
//    assertNull(b);
//
//    buildRowGroups(rowGroups, numberOfBlocks, blockSize, 2);
//
//    ac = new BlockMapBuilder(fs, endPoints);
//    for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
//      ac.setEndpointBytes(rowGroup);
//    }
//    rg = rowGroups.get(0);
//    b = rg.getEndpointBytes().get(endPoints.get(0));
//    assertEquals(blockSize*3/2,b.longValue());
//    b = rg.getEndpointBytes().get(endPoints.get(3));
//    assertEquals(blockSize / 2, b.longValue());
//
//    buildRowGroups(rowGroups, numberOfBlocks, blockSize, 6);
//
//    ac = new BlockMapBuilder(fs, endPoints);
//    for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
//      ac.setEndpointBytes(rowGroup);
//    }
//    rg = rowGroups.get(0);
//    b = rg.getEndpointBytes().get(endPoints.get(0));
//    assertEquals(blockSize/2,b.longValue());
//    b = rg.getEndpointBytes().get(endPoints.get(3));
//    assertNull(b);
//  }
@Test
public void testBuildRangeMap() {
    BlockLocation[] blocks = buildBlockLocations(new String[4], 256 * 1024 * 1024);
    long tA = System.nanoTime();
    ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
    for (BlockLocation block : blocks) {
        long start = block.getOffset();
        long end = start + block.getLength();
        Range<Long> range = Range.closedOpen(start, end);
        blockMapBuilder = blockMapBuilder.put(range, block);
    }
    ImmutableRangeMap<Long, BlockLocation> map = blockMapBuilder.build();
    long tB = System.nanoTime();
    System.out.println(String.format("Took %f ms to build range map", (tB - tA) / 1e6));
}
Also used : ImmutableRangeMap(com.google.common.collect.ImmutableRangeMap) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test) ExecTest(org.apache.drill.exec.ExecTest)

Example 83 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project ignite by apache.

the class IgniteHadoopFileSystem method getFileBlockLocations.

/** {@inheritDoc} */
@Override
public BlockLocation[] getFileBlockLocations(Path path, long start, long len) throws IOException {
    A.notNull(path, "path");
    IgfsPath igfsPath = convert(path);
    enterBusy();
    try {
        long now = System.currentTimeMillis();
        List<IgfsBlockLocation> affinity = new ArrayList<>(rmtClient.affinity(igfsPath, start, len));
        BlockLocation[] arr = new BlockLocation[affinity.size()];
        for (int i = 0; i < arr.length; i++) arr[i] = convert(affinity.get(i));
        if (LOG.isDebugEnabled())
            LOG.debug("Fetched file locations [path=" + path + ", fetchTime=" + (System.currentTimeMillis() - now) + ", locations=" + Arrays.asList(arr) + ']');
        return arr;
    } finally {
        leaveBusy();
    }
}
Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) ArrayList(java.util.ArrayList) IgfsBlockLocation(org.apache.ignite.igfs.IgfsBlockLocation) BlockLocation(org.apache.hadoop.fs.BlockLocation) IgfsBlockLocation(org.apache.ignite.igfs.IgfsBlockLocation) HadoopIgfsEndpoint(org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint)

Example 84 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project ignite by apache.

the class IgniteHadoopFileSystem method getFileBlockLocations.

/** {@inheritDoc} */
@Override
public BlockLocation[] getFileBlockLocations(FileStatus status, long start, long len) throws IOException {
    A.notNull(status, "status");
    enterBusy();
    try {
        IgfsPath path = convert(status.getPath());
        long now = System.currentTimeMillis();
        List<IgfsBlockLocation> affinity = new ArrayList<>(rmtClient.affinity(path, start, len));
        BlockLocation[] arr = new BlockLocation[affinity.size()];
        for (int i = 0; i < arr.length; i++) arr[i] = convert(affinity.get(i));
        if (LOG.isDebugEnabled())
            LOG.debug("Fetched file locations [path=" + path + ", fetchTime=" + (System.currentTimeMillis() - now) + ", locations=" + Arrays.asList(arr) + ']');
        return arr;
    } catch (FileNotFoundException ignored) {
        return EMPTY_BLOCK_LOCATIONS;
    } finally {
        leaveBusy();
    }
}
Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) IgfsBlockLocation(org.apache.ignite.igfs.IgfsBlockLocation) BlockLocation(org.apache.hadoop.fs.BlockLocation) IgfsBlockLocation(org.apache.ignite.igfs.IgfsBlockLocation)

Example 85 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project ignite by apache.

the class IgniteHadoopFileSystemAbstractSelfTest method testGetFileBlockLocationsIfFileStatusReferenceNotExistingPath.

/** @throws Exception If failed. */
public void testGetFileBlockLocationsIfFileStatusReferenceNotExistingPath() throws Exception {
    Path path = new Path("someFile");
    fs.create(path).close();
    final FileStatus status = fs.getFileStatus(path);
    fs.delete(path, true);
    BlockLocation[] locations = fs.getFileBlockLocations(status, 1, 2);
    assertEquals(0, locations.length);
}
Also used : Path(org.apache.hadoop.fs.Path) IgfsPath(org.apache.ignite.igfs.IgfsPath) FileStatus(org.apache.hadoop.fs.FileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation) IgfsBlockLocation(org.apache.ignite.igfs.IgfsBlockLocation)

Aggregations

BlockLocation (org.apache.hadoop.fs.BlockLocation)87 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)30 Test (org.junit.Test)29 FileSystem (org.apache.hadoop.fs.FileSystem)16 Configuration (org.apache.hadoop.conf.Configuration)14 ArrayList (java.util.ArrayList)13 IOException (java.io.IOException)10 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)10 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)7 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)7 InetSocketAddress (java.net.InetSocketAddress)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5 IgfsBlockLocation (org.apache.ignite.igfs.IgfsBlockLocation)5 IgfsPath (org.apache.ignite.igfs.IgfsPath)5 HashMap (java.util.HashMap)4 Random (java.util.Random)4