Search in sources :

Example 66 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class DFSUtilClient method locatedBlocks2Locations.

/**
   * Convert a List<LocatedBlock> to BlockLocation[]
   * @param blocks A List<LocatedBlock> to be converted
   * @return converted array of BlockLocation
   */
public static BlockLocation[] locatedBlocks2Locations(List<LocatedBlock> blocks) {
    if (blocks == null) {
        return new BlockLocation[0];
    }
    int nrBlocks = blocks.size();
    BlockLocation[] blkLocations = new BlockLocation[nrBlocks];
    if (nrBlocks == 0) {
        return blkLocations;
    }
    int idx = 0;
    for (LocatedBlock blk : blocks) {
        assert idx < nrBlocks : "Incorrect index";
        DatanodeInfo[] locations = blk.getLocations();
        String[] hosts = new String[locations.length];
        String[] xferAddrs = new String[locations.length];
        String[] racks = new String[locations.length];
        for (int hCnt = 0; hCnt < locations.length; hCnt++) {
            hosts[hCnt] = locations[hCnt].getHostName();
            xferAddrs[hCnt] = locations[hCnt].getXferAddr();
            NodeBase node = new NodeBase(xferAddrs[hCnt], locations[hCnt].getNetworkLocation());
            racks[hCnt] = node.toString();
        }
        DatanodeInfo[] cachedLocations = blk.getCachedLocations();
        String[] cachedHosts = new String[cachedLocations.length];
        for (int i = 0; i < cachedLocations.length; i++) {
            cachedHosts[i] = cachedLocations[i].getHostName();
        }
        blkLocations[idx] = new BlockLocation(xferAddrs, hosts, cachedHosts, racks, blk.getStorageIDs(), blk.getStorageTypes(), blk.getStartOffset(), blk.getBlockSize(), blk.isCorrupt());
        idx++;
    }
    return blkLocations;
}
Also used : NodeBase(org.apache.hadoop.net.NodeBase) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 67 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project alluxio by Alluxio.

the class HdfsUnderFileSystem method getFileLocations.

@Override
public List<String> getFileLocations(String path, FileLocationOptions options) throws IOException {
    // workers, short circuit without querying the locations
    if (Configuration.getBoolean(PropertyKey.UNDERFS_HDFS_REMOTE)) {
        return null;
    }
    List<String> ret = new ArrayList<>();
    try {
        FileStatus fStatus = mFileSystem.getFileStatus(new Path(path));
        BlockLocation[] bLocations = mFileSystem.getFileBlockLocations(fStatus, options.getOffset(), 1);
        if (bLocations.length > 0) {
            String[] names = bLocations[0].getHosts();
            Collections.addAll(ret, names);
        }
    } catch (IOException e) {
        LOG.error("Unable to get file location for {}", path, e);
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) UnderFileStatus(alluxio.underfs.UnderFileStatus) ArrayList(java.util.ArrayList) IOException(java.io.IOException) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 68 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project alluxio by Alluxio.

the class AbstractFileSystem method getFileBlockLocations.

@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
    if (file == null) {
        return null;
    }
    if (mStatistics != null) {
        mStatistics.incrementReadOps(1);
    }
    AlluxioURI path = new AlluxioURI(HadoopUtils.getPathWithoutScheme(file.getPath()));
    List<FileBlockInfo> blocks = getFileBlocks(path);
    List<BlockLocation> blockLocations = new ArrayList<>();
    for (FileBlockInfo fileBlockInfo : blocks) {
        long offset = fileBlockInfo.getOffset();
        long end = offset + fileBlockInfo.getBlockInfo().getLength();
        // Check if there is any overlapping between [start, start+len] and [offset, end]
        if (end >= start && offset <= start + len) {
            ArrayList<String> names = new ArrayList<>();
            ArrayList<String> hosts = new ArrayList<>();
            // add the existing in-memory block locations
            for (alluxio.wire.BlockLocation location : fileBlockInfo.getBlockInfo().getLocations()) {
                HostAndPort address = HostAndPort.fromParts(location.getWorkerAddress().getHost(), location.getWorkerAddress().getDataPort());
                names.add(address.toString());
                hosts.add(address.getHostText());
            }
            // add under file system locations
            for (String location : fileBlockInfo.getUfsLocations()) {
                names.add(location);
                hosts.add(HostAndPort.fromString(location).getHostText());
            }
            blockLocations.add(new BlockLocation(CommonUtils.toStringArray(names), CommonUtils.toStringArray(hosts), offset, fileBlockInfo.getBlockInfo().getLength()));
        }
    }
    BlockLocation[] ret = new BlockLocation[blockLocations.size()];
    blockLocations.toArray(ret);
    return ret;
}
Also used : ArrayList(java.util.ArrayList) FileBlockInfo(alluxio.wire.FileBlockInfo) BlockLocation(org.apache.hadoop.fs.BlockLocation) HostAndPort(com.google.common.net.HostAndPort) AlluxioURI(alluxio.AlluxioURI)

Example 69 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestMRCJCFileInputFormat method testLocality.

@Test
public void testLocality() throws Exception {
    JobConf job = new JobConf(conf);
    dfs = newDFSCluster(job);
    FileSystem fs = dfs.getFileSystem();
    System.out.println("FileSystem " + fs.getUri());
    Path inputDir = new Path("/foo/");
    String fileName = "part-0000";
    createInputs(fs, inputDir, fileName);
    // split it using a file input format
    TextInputFormat.addInputPath(job, inputDir);
    TextInputFormat inFormat = new TextInputFormat();
    inFormat.configure(job);
    InputSplit[] splits = inFormat.getSplits(job, 1);
    FileStatus fileStatus = fs.getFileStatus(new Path(inputDir, fileName));
    BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
    System.out.println("Made splits");
    // make sure that each split is a block and the locations match
    for (int i = 0; i < splits.length; ++i) {
        FileSplit fileSplit = (FileSplit) splits[i];
        System.out.println("File split: " + fileSplit);
        for (String h : fileSplit.getLocations()) {
            System.out.println("Location: " + h);
        }
        System.out.println("Block: " + locations[i]);
        assertEquals(locations[i].getOffset(), fileSplit.getStart());
        assertEquals(locations[i].getLength(), fileSplit.getLength());
        String[] blockLocs = locations[i].getHosts();
        String[] splitLocs = fileSplit.getLocations();
        assertEquals(2, blockLocs.length);
        assertEquals(2, splitLocs.length);
        assertTrue((blockLocs[0].equals(splitLocs[0]) && blockLocs[1].equals(splitLocs[1])) || (blockLocs[1].equals(splitLocs[0]) && blockLocs[0].equals(splitLocs[1])));
    }
    assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES, 1, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0));
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.junit.Test)

Example 70 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class SwiftNativeFileSystem method getFileBlockLocations.

/**
   * Return an array containing hostnames, offset and size of
   * portions of the given file.  For a nonexistent
   * file or regions, null will be returned.
   * <p>
   * This call is most helpful with DFS, where it returns
   * hostnames of machines that contain the given file.
   * <p>
   * The FileSystem will simply return an elt containing 'localhost'.
   */
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
    //argument checks
    if (file == null) {
        return null;
    }
    if (start < 0 || len < 0) {
        throw new IllegalArgumentException("Negative start or len parameter" + " to getFileBlockLocations");
    }
    if (file.getLen() <= start) {
        return new BlockLocation[0];
    }
    // Check if requested file in Swift is more than 5Gb. In this case
    // each block has its own location -which may be determinable
    // from the Swift client API, depending on the remote server
    final FileStatus[] listOfFileBlocks = store.listSubPaths(file.getPath(), false, true);
    List<URI> locations = new ArrayList<URI>();
    if (listOfFileBlocks.length > 1) {
        for (FileStatus fileStatus : listOfFileBlocks) {
            if (SwiftObjectPath.fromPath(uri, fileStatus.getPath()).equals(SwiftObjectPath.fromPath(uri, file.getPath()))) {
                continue;
            }
            locations.addAll(store.getObjectLocation(fileStatus.getPath()));
        }
    } else {
        locations = store.getObjectLocation(file.getPath());
    }
    if (locations.isEmpty()) {
        LOG.debug("No locations returned for " + file.getPath());
        //no locations were returned for the object
        //fall back to the superclass
        String[] name = { SwiftProtocolConstants.BLOCK_LOCATION };
        String[] host = { "localhost" };
        String[] topology = { SwiftProtocolConstants.TOPOLOGY_PATH };
        return new BlockLocation[] { new BlockLocation(name, host, topology, 0, file.getLen()) };
    }
    final String[] names = new String[locations.size()];
    final String[] hosts = new String[locations.size()];
    int i = 0;
    for (URI location : locations) {
        hosts[i] = location.getHost();
        names[i] = location.getAuthority();
        i++;
    }
    return new BlockLocation[] { new BlockLocation(names, hosts, 0, file.getLen()) };
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) BlockLocation(org.apache.hadoop.fs.BlockLocation) URI(java.net.URI)

Aggregations

BlockLocation (org.apache.hadoop.fs.BlockLocation)88 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)30 Test (org.junit.Test)29 FileSystem (org.apache.hadoop.fs.FileSystem)16 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)14 IOException (java.io.IOException)10 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)10 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)7 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)7 InetSocketAddress (java.net.InetSocketAddress)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5 IgfsBlockLocation (org.apache.ignite.igfs.IgfsBlockLocation)5 IgfsPath (org.apache.ignite.igfs.IgfsPath)5 HashMap (java.util.HashMap)4 Random (java.util.Random)4