Search in sources :

Example 1 with MROutput

use of org.apache.tez.mapreduce.output.MROutput in project tez by apache.

the class FilterByWordOutputProcessor method run.

@Override
public void run() throws Exception {
    if (inputs.size() != 1) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
    }
    if (outputs.size() != 1) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
    }
    for (LogicalInput input : inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : outputs.values()) {
        output.start();
    }
    LogicalInput li = inputs.values().iterator().next();
    if (!(li instanceof UnorderedKVInput)) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
    }
    LogicalOutput lo = outputs.values().iterator().next();
    if (!(lo instanceof MROutput)) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
    }
    UnorderedKVInput kvInput = (UnorderedKVInput) li;
    MROutput mrOutput = (MROutput) lo;
    KeyValueReader kvReader = kvInput.getReader();
    KeyValueWriter kvWriter = mrOutput.getWriter();
    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object value = kvReader.getCurrentValue();
        kvWriter.write(key, value);
    }
}
Also used : KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) UnorderedKVInput(org.apache.tez.runtime.library.input.UnorderedKVInput) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) LogicalInput(org.apache.tez.runtime.api.LogicalInput) MROutput(org.apache.tez.mapreduce.output.MROutput)

Example 2 with MROutput

use of org.apache.tez.mapreduce.output.MROutput in project hive by apache.

the class TezProcessor method initializeAndRunProcessor.

protected void initializeAndRunProcessor(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
    Throwable originalThrowable = null;
    try {
        MRTaskReporter mrReporter = new MRTaskReporter(getContext());
        // Init and run are both potentially long, and blocking operations. Synchronization
        // with the 'abort' operation will not work since if they end up blocking on a monitor
        // which does not belong to the lock, the abort will end up getting blocked.
        // Both of these method invocations need to handle the abort call on their own.
        rproc.init(mrReporter, inputs, outputs);
        rproc.run();
        perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR);
    } catch (Throwable t) {
        rproc.setAborted(true);
        originalThrowable = t;
    } finally {
        if (originalThrowable != null && (originalThrowable instanceof Error || Throwables.getRootCause(originalThrowable) instanceof Error)) {
            LOG.error("Cannot recover from this FATAL error", originalThrowable);
            getContext().reportFailure(TaskFailureType.FATAL, originalThrowable, "Cannot recover from this error");
            throw new RuntimeException(originalThrowable);
        }
        try {
            if (rproc != null) {
                rproc.close();
            }
        } catch (Throwable t) {
            if (originalThrowable == null) {
                originalThrowable = t;
            }
        }
        // commit the output tasks
        try {
            for (LogicalOutput output : outputs.values()) {
                if (output instanceof MROutput) {
                    MROutput mrOutput = (MROutput) output;
                    if (mrOutput.isCommitRequired()) {
                        mrOutput.commit();
                    }
                }
            }
        } catch (Throwable t) {
            if (originalThrowable == null) {
                originalThrowable = t;
            }
        }
        if (originalThrowable != null) {
            LOG.error("Failed initializeAndRunProcessor", originalThrowable);
            // abort the output tasks
            for (LogicalOutput output : outputs.values()) {
                if (output instanceof MROutput) {
                    MROutput mrOutput = (MROutput) output;
                    if (mrOutput.isCommitRequired()) {
                        mrOutput.abort();
                    }
                }
            }
            if (originalThrowable instanceof InterruptedException) {
                throw (InterruptedException) originalThrowable;
            } else {
                throw new RuntimeException(originalThrowable);
            }
        }
    }
}
Also used : MRTaskReporter(org.apache.tez.mapreduce.processor.MRTaskReporter) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutput(org.apache.tez.mapreduce.output.MROutput)

Example 3 with MROutput

use of org.apache.tez.mapreduce.output.MROutput in project tez by apache.

the class SimpleMRProcessor method postOp.

@Override
protected void postOp() throws Exception {
    if (getOutputs() == null) {
        // No post op
        return;
    }
    List<MROutput> mrOuts = Lists.newLinkedList();
    for (LogicalOutput output : getOutputs().values()) {
        if (output instanceof MROutput) {
            MROutput mrOutput = (MROutput) output;
            mrOutput.flush();
            if (mrOutput.isCommitRequired()) {
                mrOuts.add((MROutput) output);
            }
        }
    }
    if (mrOuts.size() > 0) {
        // current committer fails and we get chosen to commit.
        while (!getContext().canCommit()) {
            Thread.sleep(100);
        }
        boolean willAbort = false;
        Exception savedEx = null;
        for (MROutput output : mrOuts) {
            try {
                output.commit();
            } catch (IOException ioe) {
                LOG.warn("Error in committing output", ioe);
                willAbort = true;
                savedEx = ioe;
                break;
            }
        }
        if (willAbort == true) {
            for (MROutput output : mrOuts) {
                try {
                    output.abort();
                } catch (IOException ioe) {
                    LOG.warn("Error in aborting output", ioe);
                }
            }
            throw savedEx;
        }
    }
}
Also used : LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutput(org.apache.tez.mapreduce.output.MROutput) IOException(java.io.IOException) IOException(java.io.IOException)

Aggregations

MROutput (org.apache.tez.mapreduce.output.MROutput)3 LogicalOutput (org.apache.tez.runtime.api.LogicalOutput)3 IOException (java.io.IOException)1 MRTaskReporter (org.apache.tez.mapreduce.processor.MRTaskReporter)1 LogicalInput (org.apache.tez.runtime.api.LogicalInput)1 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)1 KeyValueWriter (org.apache.tez.runtime.library.api.KeyValueWriter)1 UnorderedKVInput (org.apache.tez.runtime.library.input.UnorderedKVInput)1