Search in sources :

Example 1 with NLineInputFormat

use of org.apache.hadoop.mapred.lib.NLineInputFormat in project incubator-systemml by apache.

the class RemoteParForColocatedFileSplit method getLocations.

/**
	 * Get the list of hostnames where the input split is located.
	 */
@Override
public String[] getLocations() throws IOException {
    //Timing time = new Timing();
    //time.start();
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = IOUtilFunctions.getFileSystem(getPath(), job);
    //read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = null;
    try {
        reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
        reader.next(key, value);
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }
    //parse task
    Task t = Task.parseCompactString(value.toString());
    //get all locations
    HashMap<String, Integer> hosts = new HashMap<String, Integer>();
    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts());
        }
    } else //TaskType.RANGE
    {
        //since this is a serial process, we use just the first iteration
        //as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1) countHosts(hosts, bl.getHosts());
        }
    }
    //majority consensus on top host
    return getTopHosts(hosts);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) Text(org.apache.hadoop.io.Text) BlockLocation(org.apache.hadoop.fs.BlockLocation) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) FileSystem(org.apache.hadoop.fs.FileSystem) NLineInputFormat(org.apache.hadoop.mapred.lib.NLineInputFormat) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

HashMap (java.util.HashMap)1 BlockLocation (org.apache.hadoop.fs.BlockLocation)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 LongWritable (org.apache.hadoop.io.LongWritable)1 Text (org.apache.hadoop.io.Text)1 JobConf (org.apache.hadoop.mapred.JobConf)1 NLineInputFormat (org.apache.hadoop.mapred.lib.NLineInputFormat)1 IntObject (org.apache.sysml.runtime.instructions.cp.IntObject)1