use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class DFSUtilClient method locatedBlocks2Locations.
/**
* Convert a List<LocatedBlock> to BlockLocation[]
* @param blocks A List<LocatedBlock> to be converted
* @return converted array of BlockLocation
*/
public static BlockLocation[] locatedBlocks2Locations(List<LocatedBlock> blocks) {
if (blocks == null) {
return new BlockLocation[0];
}
int nrBlocks = blocks.size();
BlockLocation[] blkLocations = new BlockLocation[nrBlocks];
if (nrBlocks == 0) {
return blkLocations;
}
int idx = 0;
for (LocatedBlock blk : blocks) {
assert idx < nrBlocks : "Incorrect index";
DatanodeInfo[] locations = blk.getLocations();
String[] hosts = new String[locations.length];
String[] xferAddrs = new String[locations.length];
String[] racks = new String[locations.length];
for (int hCnt = 0; hCnt < locations.length; hCnt++) {
hosts[hCnt] = locations[hCnt].getHostName();
xferAddrs[hCnt] = locations[hCnt].getXferAddr();
NodeBase node = new NodeBase(xferAddrs[hCnt], locations[hCnt].getNetworkLocation());
racks[hCnt] = node.toString();
}
DatanodeInfo[] cachedLocations = blk.getCachedLocations();
String[] cachedHosts = new String[cachedLocations.length];
for (int i = 0; i < cachedLocations.length; i++) {
cachedHosts[i] = cachedLocations[i].getHostName();
}
blkLocations[idx] = new BlockLocation(xferAddrs, hosts, cachedHosts, racks, blk.getStorageIDs(), blk.getStorageTypes(), blk.getStartOffset(), blk.getBlockSize(), blk.isCorrupt());
idx++;
}
return blkLocations;
}
use of org.apache.hadoop.fs.BlockLocation in project alluxio by Alluxio.
the class HdfsUnderFileSystem method getFileLocations.
@Override
public List<String> getFileLocations(String path, FileLocationOptions options) throws IOException {
// workers, short circuit without querying the locations
if (Configuration.getBoolean(PropertyKey.UNDERFS_HDFS_REMOTE)) {
return null;
}
List<String> ret = new ArrayList<>();
try {
FileStatus fStatus = mFileSystem.getFileStatus(new Path(path));
BlockLocation[] bLocations = mFileSystem.getFileBlockLocations(fStatus, options.getOffset(), 1);
if (bLocations.length > 0) {
String[] names = bLocations[0].getHosts();
Collections.addAll(ret, names);
}
} catch (IOException e) {
LOG.error("Unable to get file location for {}", path, e);
}
return ret;
}
use of org.apache.hadoop.fs.BlockLocation in project alluxio by Alluxio.
the class AbstractFileSystem method getFileBlockLocations.
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
if (file == null) {
return null;
}
if (mStatistics != null) {
mStatistics.incrementReadOps(1);
}
AlluxioURI path = new AlluxioURI(HadoopUtils.getPathWithoutScheme(file.getPath()));
List<FileBlockInfo> blocks = getFileBlocks(path);
List<BlockLocation> blockLocations = new ArrayList<>();
for (FileBlockInfo fileBlockInfo : blocks) {
long offset = fileBlockInfo.getOffset();
long end = offset + fileBlockInfo.getBlockInfo().getLength();
// Check if there is any overlapping between [start, start+len] and [offset, end]
if (end >= start && offset <= start + len) {
ArrayList<String> names = new ArrayList<>();
ArrayList<String> hosts = new ArrayList<>();
// add the existing in-memory block locations
for (alluxio.wire.BlockLocation location : fileBlockInfo.getBlockInfo().getLocations()) {
HostAndPort address = HostAndPort.fromParts(location.getWorkerAddress().getHost(), location.getWorkerAddress().getDataPort());
names.add(address.toString());
hosts.add(address.getHostText());
}
// add under file system locations
for (String location : fileBlockInfo.getUfsLocations()) {
names.add(location);
hosts.add(HostAndPort.fromString(location).getHostText());
}
blockLocations.add(new BlockLocation(CommonUtils.toStringArray(names), CommonUtils.toStringArray(hosts), offset, fileBlockInfo.getBlockInfo().getLength()));
}
}
BlockLocation[] ret = new BlockLocation[blockLocations.size()];
blockLocations.toArray(ret);
return ret;
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestMRCJCFileInputFormat method testLocality.
@Test
public void testLocality() throws Exception {
JobConf job = new JobConf(conf);
dfs = newDFSCluster(job);
FileSystem fs = dfs.getFileSystem();
System.out.println("FileSystem " + fs.getUri());
Path inputDir = new Path("/foo/");
String fileName = "part-0000";
createInputs(fs, inputDir, fileName);
// split it using a file input format
TextInputFormat.addInputPath(job, inputDir);
TextInputFormat inFormat = new TextInputFormat();
inFormat.configure(job);
InputSplit[] splits = inFormat.getSplits(job, 1);
FileStatus fileStatus = fs.getFileStatus(new Path(inputDir, fileName));
BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
System.out.println("Made splits");
// make sure that each split is a block and the locations match
for (int i = 0; i < splits.length; ++i) {
FileSplit fileSplit = (FileSplit) splits[i];
System.out.println("File split: " + fileSplit);
for (String h : fileSplit.getLocations()) {
System.out.println("Location: " + h);
}
System.out.println("Block: " + locations[i]);
assertEquals(locations[i].getOffset(), fileSplit.getStart());
assertEquals(locations[i].getLength(), fileSplit.getLength());
String[] blockLocs = locations[i].getHosts();
String[] splitLocs = fileSplit.getLocations();
assertEquals(2, blockLocs.length);
assertEquals(2, splitLocs.length);
assertTrue((blockLocs[0].equals(splitLocs[0]) && blockLocs[1].equals(splitLocs[1])) || (blockLocs[1].equals(splitLocs[0]) && blockLocs[0].equals(splitLocs[1])));
}
assertEquals("Expected value of " + FileInputFormat.NUM_INPUT_FILES, 1, job.getLong(FileInputFormat.NUM_INPUT_FILES, 0));
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class SwiftNativeFileSystem method getFileBlockLocations.
/**
* Return an array containing hostnames, offset and size of
* portions of the given file. For a nonexistent
* file or regions, null will be returned.
* <p>
* This call is most helpful with DFS, where it returns
* hostnames of machines that contain the given file.
* <p>
* The FileSystem will simply return an elt containing 'localhost'.
*/
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
//argument checks
if (file == null) {
return null;
}
if (start < 0 || len < 0) {
throw new IllegalArgumentException("Negative start or len parameter" + " to getFileBlockLocations");
}
if (file.getLen() <= start) {
return new BlockLocation[0];
}
// Check if requested file in Swift is more than 5Gb. In this case
// each block has its own location -which may be determinable
// from the Swift client API, depending on the remote server
final FileStatus[] listOfFileBlocks = store.listSubPaths(file.getPath(), false, true);
List<URI> locations = new ArrayList<URI>();
if (listOfFileBlocks.length > 1) {
for (FileStatus fileStatus : listOfFileBlocks) {
if (SwiftObjectPath.fromPath(uri, fileStatus.getPath()).equals(SwiftObjectPath.fromPath(uri, file.getPath()))) {
continue;
}
locations.addAll(store.getObjectLocation(fileStatus.getPath()));
}
} else {
locations = store.getObjectLocation(file.getPath());
}
if (locations.isEmpty()) {
LOG.debug("No locations returned for " + file.getPath());
//no locations were returned for the object
//fall back to the superclass
String[] name = { SwiftProtocolConstants.BLOCK_LOCATION };
String[] host = { "localhost" };
String[] topology = { SwiftProtocolConstants.TOPOLOGY_PATH };
return new BlockLocation[] { new BlockLocation(name, host, topology, 0, file.getLen()) };
}
final String[] names = new String[locations.size()];
final String[] hosts = new String[locations.size()];
int i = 0;
for (URI location : locations) {
hosts[i] = location.getHost();
names[i] = location.getAuthority();
i++;
}
return new BlockLocation[] { new BlockLocation(names, hosts, 0, file.getLen()) };
}
Aggregations