Search in sources :

Example 1 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class ReduceProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
    LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
    if (_outputs.size() <= 0 || _outputs.size() > 1) {
        throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
    }
    if (_inputs.size() <= 0 || _inputs.size() > 1) {
        throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
    }
    LogicalInput in = _inputs.values().iterator().next();
    in.start();
    List<Input> pendingInputs = new LinkedList<Input>();
    pendingInputs.add(in);
    processorContext.waitForAllInputsReady(pendingInputs);
    LOG.info("Input is ready for consumption. Starting Output");
    LogicalOutput out = _outputs.values().iterator().next();
    out.start();
    initTask(out);
    progressHelper.scheduleProgressTaskService(0, 100);
    this.statusUpdate();
    Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
    Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
    LOG.info("Using keyClass: " + keyClass);
    LOG.info("Using valueClass: " + valueClass);
    RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
    LOG.info("Using comparator: " + comparator);
    reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
    reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
    // Sanity check
    if (!(in instanceof OrderedGroupedInputLegacy)) {
        throw new IOException("Illegal input to reduce: " + in.getClass());
    }
    OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
    KeyValuesReader kvReader = shuffleInput.getReader();
    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
        kvWriter = ((MROutputLegacy) out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)) {
        kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
    } else {
        throw new IOException("Illegal output to reduce: " + in.getClass());
    }
    if (useNewApi) {
        try {
            runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    } else {
        runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
    }
    done();
}
Also used : OrderedGroupedInputLegacy(org.apache.tez.runtime.library.input.OrderedGroupedInputLegacy) ProgressHelper(org.apache.tez.common.ProgressHelper) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) IOException(java.io.IOException) LinkedList(java.util.LinkedList) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) RawComparator(org.apache.hadoop.io.RawComparator) LogicalInput(org.apache.tez.runtime.api.LogicalInput) Input(org.apache.tez.runtime.api.Input) LogicalInput(org.apache.tez.runtime.api.LogicalInput) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader)

Example 2 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class LogicalIOProcessorRuntimeTask method initialize.

/**
 * @throws Exception
 */
public void initialize() throws Exception {
    Preconditions.checkState(this.state.get() == State.NEW, "Already initialized");
    this.state.set(State.INITED);
    this.processorContext = createProcessorContext();
    this.processor = createProcessor(processorDescriptor.getClassName(), processorContext);
    if (initializeProcessorFirst || initializeProcessorIOSerially) {
        // Initialize processor in the current thread.
        initializeLogicalIOProcessor();
    }
    int numTasks = 0;
    int inputIndex = 0;
    for (InputSpec inputSpec : taskSpec.getInputs()) {
        this.initializerCompletionService.submit(new InitializeInputCallable(inputSpec, inputIndex++));
        numTasks++;
    }
    int outputIndex = 0;
    for (OutputSpec outputSpec : taskSpec.getOutputs()) {
        this.initializerCompletionService.submit(new InitializeOutputCallable(outputSpec, outputIndex++));
        numTasks++;
    }
    if (!(initializeProcessorFirst || initializeProcessorIOSerially)) {
        // Initialize processor in the current thread.
        initializeLogicalIOProcessor();
    }
    int completedTasks = 0;
    while (completedTasks < numTasks) {
        LOG.info("Waiting for " + (numTasks - completedTasks) + " initializers to finish");
        Future<Void> future = initializerCompletionService.take();
        try {
            future.get();
            completedTasks++;
        } catch (ExecutionException e) {
            if (e.getCause() instanceof Exception) {
                throw (Exception) e.getCause();
            } else {
                throw new Exception(e);
            }
        }
    }
    LOG.info("All initializers finished");
    // group inputs depend on inputs beings initialized. So must be done after.
    initializeGroupInputs();
    // Register the groups so that appropriate calls can be made.
    this.inputReadyTracker.setGroupedInputs(groupInputsMap == null ? null : groupInputsMap.values());
    // Grouped input start will be controlled by the start of the GroupedInput
    // Construct the set of groupedInputs up front so that start is not invoked on them.
    Set<String> groupInputs = Sets.newHashSet();
    // first add the group inputs
    if (groupInputSpecs != null && !groupInputSpecs.isEmpty()) {
        for (GroupInputSpec groupInputSpec : groupInputSpecs) {
            runInputMap.put(groupInputSpec.getGroupName(), groupInputsMap.get(groupInputSpec.getGroupName()));
            groupInputs.addAll(groupInputSpec.getGroupVertices());
        }
    }
    initialMemoryDistributor.makeInitialAllocations();
    LOG.info("Starting Inputs/Outputs");
    int numAutoStarts = 0;
    for (InputSpec inputSpec : inputSpecs) {
        if (groupInputs.contains(inputSpec.getSourceVertexName())) {
            LOG.info("Ignoring " + inputSpec.getSourceVertexName() + " for start, since it will be controlled via it's Group");
            continue;
        }
        if (!inputAlreadyStarted(taskSpec.getVertexName(), inputSpec.getSourceVertexName())) {
            startedInputsMap.put(taskSpec.getVertexName(), inputSpec.getSourceVertexName());
            numAutoStarts++;
            this.initializerCompletionService.submit(new StartInputCallable(inputsMap.get(inputSpec.getSourceVertexName()), inputSpec.getSourceVertexName()));
            LOG.info("Input: " + inputSpec.getSourceVertexName() + " being auto started by the framework. Subsequent instances will not be auto-started");
        }
    }
    if (groupInputSpecs != null) {
        for (GroupInputSpec group : groupInputSpecs) {
            if (!inputAlreadyStarted(taskSpec.getVertexName(), group.getGroupName())) {
                numAutoStarts++;
                this.initializerCompletionService.submit(new StartInputCallable(groupInputsMap.get(group.getGroupName()), group.getGroupName()));
                LOG.info("InputGroup: " + group.getGroupName() + " being auto started by the framework. Subsequent instance will not be auto-started");
            }
        }
    }
    // Shutdown after all tasks complete.
    this.initializerExecutor.shutdown();
    completedTasks = 0;
    LOG.info("Num IOs determined for AutoStart: " + numAutoStarts);
    while (completedTasks < numAutoStarts) {
        LOG.info("Waiting for " + (numAutoStarts - completedTasks) + " IOs to start");
        Future<Void> future = initializerCompletionService.take();
        try {
            future.get();
            completedTasks++;
        } catch (ExecutionException e) {
            if (e.getCause() instanceof Exception) {
                throw (Exception) e.getCause();
            } else {
                throw new Exception(e);
            }
        }
    }
    LOG.info("AutoStartComplete");
    // then add the non-grouped inputs
    for (InputSpec inputSpec : inputSpecs) {
        if (!groupInputs.contains(inputSpec.getSourceVertexName())) {
            LogicalInput input = inputsMap.get(inputSpec.getSourceVertexName());
            runInputMap.put(inputSpec.getSourceVertexName(), input);
        }
    }
    for (OutputSpec outputSpec : outputSpecs) {
        LogicalOutput output = outputsMap.get(outputSpec.getDestinationVertexName());
        String outputName = outputSpec.getDestinationVertexName();
        runOutputMap.put(outputName, output);
    }
    // TODO Maybe close initialized inputs / outputs in case of failure to
    // initialize.
    startRouterThread();
}
Also used : LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) InputSpec(org.apache.tez.runtime.api.impl.InputSpec) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec) TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) ExecutionException(java.util.concurrent.ExecutionException) LogicalInput(org.apache.tez.runtime.api.LogicalInput) MergedLogicalInput(org.apache.tez.runtime.api.MergedLogicalInput) ExecutionException(java.util.concurrent.ExecutionException) OutputSpec(org.apache.tez.runtime.api.impl.OutputSpec) GroupInputSpec(org.apache.tez.runtime.api.impl.GroupInputSpec)

Example 3 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class FilterByWordInputProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    this.progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
    if (_inputs.size() != 1) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
    }
    if (_outputs.size() != 1) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
    }
    for (LogicalInput input : _inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : _outputs.values()) {
        output.start();
    }
    LogicalInput li = _inputs.values().iterator().next();
    if (!(li instanceof MRInput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
    }
    LogicalOutput lo = _outputs.values().iterator().next();
    if (!(lo instanceof UnorderedKVOutput)) {
        throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
    }
    progressHelper.scheduleProgressTaskService(0, 100);
    MRInputLegacy mrInput = (MRInputLegacy) li;
    mrInput.init();
    UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;
    Configuration updatedConf = mrInput.getConfigUpdates();
    Text srcFile = new Text();
    srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
    if (updatedConf != null) {
        String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
        if (fileName != null) {
            LOG.info("Processing file: " + fileName);
            srcFile.set(fileName);
        }
    }
    KeyValueReader kvReader = mrInput.getReader();
    KeyValueWriter kvWriter = kvOutput.getWriter();
    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object val = kvReader.getCurrentValue();
        Text valText = (Text) val;
        String readVal = valText.toString();
        if (readVal.contains(filterWord)) {
            LongWritable lineNum = (LongWritable) key;
            TextLongPair outVal = new TextLongPair(srcFile, lineNum);
            kvWriter.write(valText, outVal);
        }
    }
}
Also used : MRInput(org.apache.tez.mapreduce.input.MRInput) ProgressHelper(org.apache.tez.common.ProgressHelper) Configuration(org.apache.hadoop.conf.Configuration) TextLongPair(org.apache.tez.mapreduce.examples.FilterLinesByWord.TextLongPair) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) Text(org.apache.hadoop.io.Text) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) UnorderedKVOutput(org.apache.tez.runtime.library.output.UnorderedKVOutput) LogicalInput(org.apache.tez.runtime.api.LogicalInput) LongWritable(org.apache.hadoop.io.LongWritable) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy)

Example 4 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class FilterByWordOutputProcessor method run.

@Override
public void run() throws Exception {
    if (inputs.size() != 1) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single input");
    }
    if (outputs.size() != 1) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with a single output");
    }
    for (LogicalInput input : inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : outputs.values()) {
        output.start();
    }
    LogicalInput li = inputs.values().iterator().next();
    if (!(li instanceof UnorderedKVInput)) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with ShuffledUnorderedKVInput");
    }
    LogicalOutput lo = outputs.values().iterator().next();
    if (!(lo instanceof MROutput)) {
        throw new IllegalStateException("FilterByWordOutputProcessor processor can only work with MROutput");
    }
    UnorderedKVInput kvInput = (UnorderedKVInput) li;
    MROutput mrOutput = (MROutput) lo;
    KeyValueReader kvReader = kvInput.getReader();
    KeyValueWriter kvWriter = mrOutput.getWriter();
    while (kvReader.next()) {
        Object key = kvReader.getCurrentKey();
        Object value = kvReader.getCurrentValue();
        kvWriter.write(key, value);
    }
}
Also used : KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) UnorderedKVInput(org.apache.tez.runtime.library.input.UnorderedKVInput) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) LogicalInput(org.apache.tez.runtime.api.LogicalInput) MROutput(org.apache.tez.mapreduce.output.MROutput)

Example 5 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project hive by apache.

the class TezProcessor method initializeAndRunProcessor.

protected void initializeAndRunProcessor(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
    Throwable originalThrowable = null;
    try {
        MRTaskReporter mrReporter = new MRTaskReporter(getContext());
        // Init and run are both potentially long, and blocking operations. Synchronization
        // with the 'abort' operation will not work since if they end up blocking on a monitor
        // which does not belong to the lock, the abort will end up getting blocked.
        // Both of these method invocations need to handle the abort call on their own.
        rproc.init(mrReporter, inputs, outputs);
        rproc.run();
        perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR);
    } catch (Throwable t) {
        rproc.setAborted(true);
        originalThrowable = t;
    } finally {
        if (originalThrowable != null && (originalThrowable instanceof Error || Throwables.getRootCause(originalThrowable) instanceof Error)) {
            LOG.error("Cannot recover from this FATAL error", originalThrowable);
            getContext().reportFailure(TaskFailureType.FATAL, originalThrowable, "Cannot recover from this error");
            throw new RuntimeException(originalThrowable);
        }
        try {
            if (rproc != null) {
                rproc.close();
            }
        } catch (Throwable t) {
            if (originalThrowable == null) {
                originalThrowable = t;
            }
        }
        // commit the output tasks
        try {
            for (LogicalOutput output : outputs.values()) {
                if (output instanceof MROutput) {
                    MROutput mrOutput = (MROutput) output;
                    if (mrOutput.isCommitRequired()) {
                        mrOutput.commit();
                    }
                }
            }
        } catch (Throwable t) {
            if (originalThrowable == null) {
                originalThrowable = t;
            }
        }
        if (originalThrowable != null) {
            LOG.error("Failed initializeAndRunProcessor", originalThrowable);
            // abort the output tasks
            for (LogicalOutput output : outputs.values()) {
                if (output instanceof MROutput) {
                    MROutput mrOutput = (MROutput) output;
                    if (mrOutput.isCommitRequired()) {
                        mrOutput.abort();
                    }
                }
            }
            if (originalThrowable instanceof InterruptedException) {
                throw (InterruptedException) originalThrowable;
            } else {
                throw new RuntimeException(originalThrowable);
            }
        }
    }
}
Also used : MRTaskReporter(org.apache.tez.mapreduce.processor.MRTaskReporter) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutput(org.apache.tez.mapreduce.output.MROutput)

Aggregations

LogicalOutput (org.apache.tez.runtime.api.LogicalOutput)14 LogicalInput (org.apache.tez.runtime.api.LogicalInput)8 IOException (java.io.IOException)4 Configuration (org.apache.hadoop.conf.Configuration)4 ProgressHelper (org.apache.tez.common.ProgressHelper)4 MROutput (org.apache.tez.mapreduce.output.MROutput)3 KeyValueWriter (org.apache.tez.runtime.library.api.KeyValueWriter)3 Map (java.util.Map)2 ExecMapperContext (org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext)2 TezKVOutputCollector (org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector)2 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)2 JobConf (org.apache.hadoop.mapred.JobConf)2 TezException (org.apache.tez.dag.api.TezException)2 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)2 MRInputLegacy (org.apache.tez.mapreduce.input.MRInputLegacy)2 MROutputLegacy (org.apache.tez.mapreduce.output.MROutputLegacy)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 JarURLConnection (java.net.JarURLConnection)1 URL (java.net.URL)1 ArrayList (java.util.ArrayList)1