use of org.apache.hadoop.mapred.lib.NLineInputFormat in project incubator-systemml by apache.
the class RemoteParForColocatedFileSplit method getLocations.
/**
* Get the list of hostnames where the input split is located.
*/
@Override
public String[] getLocations() throws IOException {
//Timing time = new Timing();
//time.start();
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
FileSystem fs = IOUtilFunctions.getFileSystem(getPath(), job);
//read task string
LongWritable key = new LongWritable();
Text value = new Text();
RecordReader<LongWritable, Text> reader = null;
try {
reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
reader.next(key, value);
} finally {
IOUtilFunctions.closeSilently(reader);
}
//parse task
Task t = Task.parseCompactString(value.toString());
//get all locations
HashMap<String, Integer> hosts = new HashMap<String, Integer>();
if (t.getType() == TaskType.SET) {
for (IntObject val : t.getIterations()) {
String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
FileStatus status = fs.getFileStatus(new Path(fname));
BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts());
}
} else //TaskType.RANGE
{
//since this is a serial process, we use just the first iteration
//as a heuristic for location information
long lFrom = t.getIterations().get(0).getLongValue();
long lTo = t.getIterations().get(1).getLongValue();
for (long li : new long[] { lFrom, lTo }) {
String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
FileStatus status = fs.getFileStatus(new Path(fname));
BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts());
}
}
//majority consensus on top host
return getTopHosts(hosts);
}
Aggregations