Search in sources :

Example 1 with MRReaderMapred

use of org.apache.tez.mapreduce.lib.MRReaderMapred in project tez by apache.

the class MRInput method initializeInternal.

@Private
void initializeInternal() throws IOException {
    // Primarily for visibility
    rrLock.lock();
    try {
        if (splitInfoViaEvents) {
            if (useNewApi) {
                mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
            } else {
                mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter, getContext());
            }
        } else {
            TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf);
            TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()];
            TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(), thisTaskMetaInfo.getStartOffset());
            long splitLength = -1;
            if (useNewApi) {
                org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils.getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES));
                try {
                    splitLength = newInputSplit.getLength();
                } catch (InterruptedException e) {
                    LOG.warn("Got interrupted while reading split length: ", e);
                }
                mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
            } else {
                org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils.getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters().findCounter(TaskCounter.SPLIT_RAW_BYTES));
                splitLength = oldInputSplit.getLength();
                mrReader = new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(), inputRecordCounter, getContext());
            }
            if (splitLength != -1) {
                getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES).increment(splitLength);
            }
        }
    } finally {
        rrLock.unlock();
    }
    LOG.info("Initialized MRInput: " + getContext().getSourceVertexName());
}
Also used : MRReaderMapred(org.apache.tez.mapreduce.lib.MRReaderMapred) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MRReaderMapReduce(org.apache.tez.mapreduce.lib.MRReaderMapReduce) TaskSplitMetaInfo(org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo) TaskSplitIndex(org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Example 2 with MRReaderMapred

use of org.apache.tez.mapreduce.lib.MRReaderMapred in project tez by apache.

the class MultiMRInput method initFromEvent.

private MRReader initFromEvent(InputDataInformationEvent event) throws IOException {
    Preconditions.checkState(event != null, "Event must be specified");
    if (LOG.isDebugEnabled()) {
        LOG.debug(getContext().getSourceVertexName() + " initializing Reader: " + eventCount.get());
    }
    MRSplitProto splitProto = MRSplitProto.parseFrom(ByteString.copyFrom(event.getUserPayload()));
    MRReader reader = null;
    JobConf localJobConf = new JobConf(jobConf);
    long splitLength = -1;
    if (useNewApi) {
        InputSplit split = MRInputUtils.getNewSplitDetailsFromEvent(splitProto, localJobConf);
        try {
            splitLength = split.getLength();
        } catch (InterruptedException e) {
            LOG.warn("Got interrupted while reading split length: ", e);
        }
        reader = new MRReaderMapReduce(localJobConf, split, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext());
        if (LOG.isDebugEnabled()) {
            LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength);
        }
    } else {
        org.apache.hadoop.mapred.InputSplit split = MRInputUtils.getOldSplitDetailsFromEvent(splitProto, localJobConf);
        splitLength = split.getLength();
        reader = new MRReaderMapred(localJobConf, split, getContext().getCounters(), inputRecordCounter, getContext());
        if (LOG.isDebugEnabled()) {
            LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength);
        }
    }
    if (splitLength != -1) {
        getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES).increment(splitLength);
    }
    LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event");
    return reader;
}
Also used : MRReaderMapred(org.apache.tez.mapreduce.lib.MRReaderMapred) MRReaderMapReduce(org.apache.tez.mapreduce.lib.MRReaderMapReduce) MRReader(org.apache.tez.mapreduce.lib.MRReader) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MRSplitProto(org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)

Aggregations

InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 MRReaderMapReduce (org.apache.tez.mapreduce.lib.MRReaderMapReduce)2 MRReaderMapred (org.apache.tez.mapreduce.lib.MRReaderMapred)2 Private (org.apache.hadoop.classification.InterfaceAudience.Private)1 JobConf (org.apache.hadoop.mapred.JobConf)1 TaskSplitIndex (org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex)1 TaskSplitMetaInfo (org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo)1 MRReader (org.apache.tez.mapreduce.lib.MRReader)1 MRSplitProto (org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto)1