Search in sources :

Example 6 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project hive by apache.

the class RecordProcessor method createOutputMap.

protected void createOutputMap() {
    Preconditions.checkState(outMap == null, "Outputs should only be setup once");
    outMap = new HashMap<>();
    for (Entry<String, LogicalOutput> entry : outputs.entrySet()) {
        TezKVOutputCollector collector = new TezKVOutputCollector(entry.getValue());
        outMap.put(entry.getKey(), collector);
    }
}
Also used : TezKVOutputCollector(org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput)

Example 7 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class LogicalIOProcessorRuntimeTask method createOutput.

private LogicalOutput createOutput(OutputSpec outputSpec, OutputContext outputContext) throws TezException {
    OutputDescriptor outputDesc = outputSpec.getOutputDescriptor();
    Output output = ReflectionUtils.createClazzInstance(outputDesc.getClassName(), new Class[] { OutputContext.class, Integer.TYPE }, new Object[] { outputContext, outputSpec.getPhysicalEdgeCount() });
    if (!(output instanceof LogicalOutput)) {
        throw new TezUncheckedException(output.getClass().getName() + " is not a sub-type of LogicalOutput." + " Only LogicalOutput sub-types supported by LogicalIOProcessor.");
    }
    return (LogicalOutput) output;
}
Also used : TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) OutputDescriptor(org.apache.tez.dag.api.OutputDescriptor) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) Output(org.apache.tez.runtime.api.Output)

Example 8 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class TestProcessor method run.

@Override
public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
    LOG.info("Sleeping ms: " + sleepMs);
    for (LogicalInput input : inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : outputs.values()) {
        output.start();
    }
    Thread.sleep(sleepMs);
    if (!doRandomFail) {
        // not random fail
        if (doFail) {
            if ((failingTaskIndices.contains(failAll) || failingTaskIndices.contains(getContext().getTaskIndex())) && (failingTaskAttemptUpto == failAll.intValue() || failingTaskAttemptUpto >= getContext().getTaskAttemptNumber())) {
                String msg = "FailingProcessor: " + getContext().getUniqueIdentifier() + " dag: " + getContext().getDAGName() + " taskIndex: " + getContext().getTaskIndex() + " taskAttempt: " + getContext().getTaskAttemptNumber();
                LOG.info(msg);
                throwException(msg);
            }
        }
    } else {
        // random fail
        // If task attempt number is below limit, try to randomly fail the attempt.
        int taskAttemptNumber = getContext().getTaskAttemptNumber();
        int maxFailedAttempt = conf.getInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS_DEFAULT);
        if (taskAttemptNumber < maxFailedAttempt - 1) {
            float rollNumber = (float) Math.random();
            String msg = "FailingProcessor random fail turned on." + " Do a roll: " + getContext().getUniqueIdentifier() + " dag: " + getContext().getDAGName() + " taskIndex: " + getContext().getTaskIndex() + " taskAttempt: " + taskAttemptNumber + " maxFailedAttempt: " + maxFailedAttempt + " rollNumber: " + rollNumber + " randomFailProbability " + randomFailProbability;
            LOG.info(msg);
            if (rollNumber < randomFailProbability) {
                // fail the attempt
                msg = "FailingProcessor: rollNumber < randomFailProbability. Do fail.";
                LOG.info(msg);
                throwException(msg);
            }
        }
    }
    if (inputs.entrySet().size() > 0) {
        String msg = "Reading input of current FailingProcessor: " + getContext().getUniqueIdentifier() + " dag: " + getContext().getDAGName() + " vertex: " + getContext().getTaskVertexName() + " taskIndex: " + getContext().getTaskIndex() + " taskAttempt: " + getContext().getTaskAttemptNumber();
        LOG.info(msg);
    }
    // initialize sum to attempt number + 1
    int sum = getContext().getTaskAttemptNumber() + 1;
    LOG.info("initializing vertex= " + getContext().getTaskVertexName() + " taskIndex: " + getContext().getTaskIndex() + " taskAttempt: " + getContext().getTaskAttemptNumber() + " sum= " + sum);
    // sum = summation of input values
    for (Map.Entry<String, LogicalInput> entry : inputs.entrySet()) {
        if (!(entry.getValue() instanceof TestInput)) {
            LOG.info("Ignoring non TestInput: " + entry.getKey() + " inputClass= " + entry.getValue().getClass().getSimpleName());
            continue;
        }
        TestInput input = (TestInput) entry.getValue();
        int inputValue = input.doRead();
        LOG.info("Reading input: " + entry.getKey() + " inputValue= " + inputValue);
        sum += inputValue;
    }
    if (outputs.entrySet().size() > 0) {
        String msg = "Writing output of current FailingProcessor: " + getContext().getUniqueIdentifier() + " dag: " + getContext().getDAGName() + " vertex: " + getContext().getTaskVertexName() + " taskIndex: " + getContext().getTaskIndex() + " taskAttempt: " + getContext().getTaskAttemptNumber();
        LOG.info(msg);
    }
    for (Map.Entry<String, LogicalOutput> entry : outputs.entrySet()) {
        if (!(entry.getValue() instanceof TestOutput)) {
            LOG.info("Ignoring non TestOutput: " + entry.getKey() + " outputClass= " + entry.getValue().getClass().getSimpleName());
            continue;
        }
        LOG.info("Writing output: " + entry.getKey() + " sum= " + sum);
        TestOutput output = (TestOutput) entry.getValue();
        output.write(sum);
    }
    LOG.info("Output for DAG: " + getContext().getDAGName() + " vertex: " + getContext().getTaskVertexName() + " task: " + getContext().getTaskIndex() + " attempt: " + getContext().getTaskAttemptNumber() + " is: " + sum);
    if (verifyTaskIndices.contains(new Integer(getContext().getTaskIndex()))) {
        if (verifyValue != -1 && verifyValue != sum) {
            // expected output value set and not equal to observed value
            String msg = "Expected output mismatch of current FailingProcessor: " + getContext().getUniqueIdentifier() + " dag: " + getContext().getDAGName() + " vertex: " + getContext().getTaskVertexName() + " taskIndex: " + getContext().getTaskIndex() + " taskAttempt: " + getContext().getTaskAttemptNumber();
            msg += "\n" + "Expected output: " + verifyValue + " got: " + sum;
            throwException(msg);
        } else {
            LOG.info("Verified output for DAG: " + getContext().getDAGName() + " vertex: " + getContext().getTaskVertexName() + " task: " + getContext().getTaskIndex() + " attempt: " + getContext().getTaskAttemptNumber() + " is: " + sum);
        }
    }
}
Also used : LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) LogicalInput(org.apache.tez.runtime.api.LogicalInput) Map(java.util.Map)

Example 9 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class SleepProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    inputs = _inputs;
    outputs = _outputs;
    progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
    LOG.info("Running the Sleep Processor, sleeping for " + timeToSleepMS + " ms");
    for (LogicalInput input : _inputs.values()) {
        input.start();
    }
    progressHelper.scheduleProgressTaskService(0, 100);
    for (LogicalOutput output : _outputs.values()) {
        output.start();
    }
    try {
        Thread.sleep(timeToSleepMS);
    } catch (InterruptedException ie) {
    // ignore
    }
}
Also used : ProgressHelper(org.apache.tez.common.ProgressHelper) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) LogicalInput(org.apache.tez.runtime.api.LogicalInput)

Example 10 with LogicalOutput

use of org.apache.tez.runtime.api.LogicalOutput in project tez by apache.

the class MapProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    progressHelper = new ProgressHelper(this.inputs, getContext(), this.getClass().getSimpleName());
    LOG.info("Running map: " + processorContext.getUniqueIdentifier());
    if (_inputs.size() != 1 || _outputs.size() != 1) {
        throw new IOException("Cannot handle multiple _inputs or _outputs" + ", inputCount=" + _inputs.size() + ", outputCount=" + _outputs.size());
    }
    for (LogicalInput input : _inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : _outputs.values()) {
        output.start();
    }
    LogicalInput in = _inputs.values().iterator().next();
    LogicalOutput out = _outputs.values().iterator().next();
    initTask(out);
    progressHelper.scheduleProgressTaskService(0, 100);
    // Sanity check
    if (!(in instanceof MRInputLegacy)) {
        throw new IOException(new TezException("Only MRInputLegacy supported. Input: " + in.getClass()));
    }
    MRInputLegacy input = (MRInputLegacy) in;
    input.init();
    Configuration incrementalConf = input.getConfigUpdates();
    if (incrementalConf != null) {
        for (Entry<String, String> entry : incrementalConf) {
            jobConf.set(entry.getKey(), entry.getValue());
        }
    }
    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
        kvWriter = ((MROutputLegacy) out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)) {
        kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
    } else {
        throw new IOException("Illegal output to map, outputClass=" + out.getClass());
    }
    if (useNewApi) {
        runNewMapper(jobConf, mrReporter, input, kvWriter);
    } else {
        runOldMapper(jobConf, mrReporter, input, kvWriter);
    }
    done();
}
Also used : TezException(org.apache.tez.dag.api.TezException) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) ProgressHelper(org.apache.tez.common.ProgressHelper) Configuration(org.apache.hadoop.conf.Configuration) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) LogicalInput(org.apache.tez.runtime.api.LogicalInput) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) IOException(java.io.IOException) MRInputLegacy(org.apache.tez.mapreduce.input.MRInputLegacy)

Aggregations

LogicalOutput (org.apache.tez.runtime.api.LogicalOutput)14 LogicalInput (org.apache.tez.runtime.api.LogicalInput)8 IOException (java.io.IOException)4 Configuration (org.apache.hadoop.conf.Configuration)4 ProgressHelper (org.apache.tez.common.ProgressHelper)4 MROutput (org.apache.tez.mapreduce.output.MROutput)3 KeyValueWriter (org.apache.tez.runtime.library.api.KeyValueWriter)3 Map (java.util.Map)2 ExecMapperContext (org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext)2 TezKVOutputCollector (org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector)2 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)2 JobConf (org.apache.hadoop.mapred.JobConf)2 TezException (org.apache.tez.dag.api.TezException)2 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)2 MRInputLegacy (org.apache.tez.mapreduce.input.MRInputLegacy)2 MROutputLegacy (org.apache.tez.mapreduce.output.MROutputLegacy)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 JarURLConnection (java.net.JarURLConnection)1 URL (java.net.URL)1 ArrayList (java.util.ArrayList)1