Search in sources :

Example 1 with TaskSplitIndex

use of org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex in project tez by apache.

the class MRInput method initializeInternal.

@Private
void initializeInternal() throws IOException {
    // Primarily for visibility
    rrLock.lock();
    try {
        if (splitInfoViaEvents) {
            if (useNewApi) {
                mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
            } else {
                mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter, getContext());
            }
        } else {
            TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf);
            TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
            TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(), thisTaskMetaInfo.getStartOffset());
            long splitLength = -1;
            if (useNewApi) {
                org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils.getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES));
                try {
                    splitLength = newInputSplit.getLength();
                } catch (InterruptedException e) {
                    LOG.warn("Got interrupted while reading split length: ", e);
                }
                mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
            } else {
                org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils.getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES));
                splitLength = oldInputSplit.getLength();
                mrReader = new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(), inputRecordCounter, getContext());
            }
            if (splitLength != -1) {
                getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES).increment(splitLength);
            }
        }
    } finally {
        rrLock.unlock();
    }
    LOG.info("Initialized MRInput: " + getContext().getSourceVertexName());
}
Also used : MRReaderMapred(org.apache.tez.mapreduce.lib.MRReaderMapred) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MRReaderMapReduce(org.apache.tez.mapreduce.lib.MRReaderMapReduce) TaskSplitMetaInfo(org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo) TaskSplitIndex(org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Aggregations

Private (org.apache.hadoop.classification.InterfaceAudience.Private)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 TaskSplitIndex (org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex)1 TaskSplitMetaInfo (org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo)1 MRReaderMapReduce (org.apache.tez.mapreduce.lib.MRReaderMapReduce)1 MRReaderMapred (org.apache.tez.mapreduce.lib.MRReaderMapred)1