Search in sources :

Example 1 with MROutputLegacy

use of org.apache.tez.mapreduce.output.MROutputLegacy in project tez by apache.

the class ReduceProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
    LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
    if (_outputs.size() <= 0 || _outputs.size() > 1) {
        throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
    }
    if (_inputs.size() <= 0 || _inputs.size() > 1) {
        throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
    }
    LogicalInput in = _inputs.values().iterator().next();
    in.start();
    List<Input> pendingInputs = new LinkedList<Input>();
    pendingInputs.add(in);
    processorContext.waitForAllInputsReady(pendingInputs);
    LOG.info("Input is ready for consumption. Starting Output");
    LogicalOutput out = _outputs.values().iterator().next();
    out.start();
    initTask(out);
    progressHelper.scheduleProgressTaskService(0, 100);
    this.statusUpdate();
    Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
    Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
    LOG.info("Using keyClass: " + keyClass);
    LOG.info("Using valueClass: " + valueClass);
    RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
    LOG.info("Using comparator: " + comparator);
    reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
    reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
    // Sanity check
    if (!(in instanceof OrderedGroupedInputLegacy)) {
        throw new IOException("Illegal input to reduce: " + in.getClass());
    }
    OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
    KeyValuesReader kvReader = shuffleInput.getReader();
    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
        kvWriter = ((MROutputLegacy) out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)) {
        kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
    } else {
        throw new IOException("Illegal output to reduce: " + in.getClass());
    }
    if (useNewApi) {
        try {
            runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    } else {
        runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
    }
    done();
}
Also used : OrderedGroupedInputLegacy(org.apache.tez.runtime.library.input.OrderedGroupedInputLegacy) ProgressHelper(org.apache.tez.common.ProgressHelper) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) IOException(java.io.IOException) LinkedList(java.util.LinkedList) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) RawComparator(org.apache.hadoop.io.RawComparator) LogicalInput(org.apache.tez.runtime.api.LogicalInput) Input(org.apache.tez.runtime.api.Input) LogicalInput(org.apache.tez.runtime.api.LogicalInput) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader)

Example 2 with MROutputLegacy

use of org.apache.tez.mapreduce.output.MROutputLegacy in project tez by apache.

the class MRTask method done.

public void done() throws IOException, InterruptedException {
    LOG.info("Task:" + taskAttemptId + " is done." + " And is in the process of committing");
    // TODO TEZ Interaciton between Commit and OutputReady. Merge ?
    if (output instanceof MROutputLegacy) {
        MROutputLegacy sOut = (MROutputLegacy) output;
        if (sOut.isCommitRequired()) {
            // wait for commit approval and commit
            // TODO EVENTUALLY - Commit is not required for map tasks.
            // skip a couple of RPCs before exiting.
            commit(sOut);
        }
    }
    taskDone.set(true);
    sendLastUpdate();
}
Also used : MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy)

Example 3 with MROutputLegacy

use of org.apache.tez.mapreduce.output.MROutputLegacy in project tez by apache.

the class MapProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
    LOG.info("Running map: " + processorContext.getUniqueIdentifier());
    if (_inputs.size() != 1 || _outputs.size() != 1) {
        throw new IOException("Cannot handle multiple _inputs or _outputs" + ", inputCount=" + _inputs.size() + ", outputCount=" + _outputs.size());
    }
    for (LogicalInput input : _inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : _outputs.values()) {
        output.start();
    }
    LogicalInput in = _inputs.values().iterator().next();
    LogicalOutput out = _outputs.values().iterator().next();
    initTask(out);
    progressHelper.scheduleProgressTaskService(0, 100);
    // Sanity check
    if (!(in instanceof MRInputLegacy)) {
        throw new IOException(new TezException("Only MRInputLegacy supported. Input: " + in.getClass()));
    }
    MRInputLegacy input = (MRInputLegacy) in;
    input.init();
    Configuration incrementalConf = input.getConfigUpdates();
    if (incrementalConf != null) {
        for (Entry<String, String> entry : incrementalConf) {
            jobConf.set(entry.getKey(), entry.getValue());
        }
    }
    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
        kvWriter = ((MROutputLegacy) out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)) {
        kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
    } else {
        throw new IOException("Illegal output to map, outputClass=" + out.getClass());
    }
    if (useNewApi) {
        runNewMapper(jobConf, mrReporter, input, kvWriter);
    } else {
        runOldMapper(jobConf, mrReporter, input, kvWriter);
    }
    done();
}
Also used : TezException(org.apache.tez.dag.api.TezException) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) ProgressHelper(org.apache.tez.common.ProgressHelper) Configuration(org.apache.hadoop.conf.Configuration) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) LogicalInput(org.apache.tez.runtime.api.LogicalInput) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) IOException(java.io.IOException) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy)

Example 4 with MROutputLegacy

use of org.apache.tez.mapreduce.output.MROutputLegacy in project tez by apache.

the class MRTask method initTask.

public void initTask(LogicalOutput output) throws IOException, InterruptedException {
    // By this time output has been initialized
    this.output = output;
    if (output instanceof MROutputLegacy) {
        committer = ((MROutputLegacy) output).getOutputCommitter();
    }
    this.mrReporter = new MRTaskReporter(processorContext);
    this.useNewApi = jobConf.getUseNewMapper();
    TezDAGID dagId = IDConverter.fromMRTaskAttemptId(taskAttemptId).getTaskID().getVertexID().getDAGId();
    this.jobContext = new JobContextImpl(jobConf, dagId, mrReporter);
    this.taskAttemptContext = new TaskAttemptContextImpl(jobConf, taskAttemptId, mrReporter);
    localizeConfiguration(jobConf);
}
Also used : JobContextImpl(org.apache.tez.mapreduce.hadoop.mapreduce.JobContextImpl) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) TaskAttemptContextImpl(org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl) TezDAGID(org.apache.tez.dag.records.TezDAGID)

Aggregations

MROutputLegacy (org.apache.tez.mapreduce.output.MROutputLegacy)4 IOException (java.io.IOException)2 ProgressHelper (org.apache.tez.common.ProgressHelper)2 LogicalInput (org.apache.tez.runtime.api.LogicalInput)2 LogicalOutput (org.apache.tez.runtime.api.LogicalOutput)2 KeyValueWriter (org.apache.tez.runtime.library.api.KeyValueWriter)2 OrderedPartitionedKVOutput (org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput)2 LinkedList (java.util.LinkedList)1 Configuration (org.apache.hadoop.conf.Configuration)1 RawComparator (org.apache.hadoop.io.RawComparator)1 TezException (org.apache.tez.dag.api.TezException)1 TezDAGID (org.apache.tez.dag.records.TezDAGID)1 TaskAttemptContextImpl (org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl)1 JobContextImpl (org.apache.tez.mapreduce.hadoop.mapreduce.JobContextImpl)1 MRInputLegacy (org.apache.tez.mapreduce.input.MRInputLegacy)1 Input (org.apache.tez.runtime.api.Input)1 KeyValuesReader (org.apache.tez.runtime.library.api.KeyValuesReader)1 OrderedGroupedInputLegacy (org.apache.tez.runtime.library.input.OrderedGroupedInputLegacy)1