Search in sources :

Example 1 with MRTaskReporter

use of org.apache.tez.mapreduce.processor.MRTaskReporter in project hive by apache.

the class TezProcessor method initializeAndRunProcessor.

protected void initializeAndRunProcessor(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
    Throwable originalThrowable = null;
    try {
        MRTaskReporter mrReporter = new MRTaskReporter(getContext());
        // Init and run are both potentially long, and blocking operations. Synchronization
        // with the 'abort' operation will not work since if they end up blocking on a monitor
        // which does not belong to the lock, the abort will end up getting blocked.
        // Both of these method invocations need to handle the abort call on their own.
        rproc.init(mrReporter, inputs, outputs);
        rproc.run();
        perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR);
    } catch (Throwable t) {
        rproc.setAborted(true);
        originalThrowable = t;
    } finally {
        if (originalThrowable != null && (originalThrowable instanceof Error || Throwables.getRootCause(originalThrowable) instanceof Error)) {
            LOG.error("Cannot recover from this FATAL error", originalThrowable);
            getContext().reportFailure(TaskFailureType.FATAL, originalThrowable, "Cannot recover from this error");
            throw new RuntimeException(originalThrowable);
        }
        try {
            if (rproc != null) {
                rproc.close();
            }
        } catch (Throwable t) {
            if (originalThrowable == null) {
                originalThrowable = t;
            }
        }
        // commit the output tasks
        try {
            for (LogicalOutput output : outputs.values()) {
                if (output instanceof MROutput) {
                    MROutput mrOutput = (MROutput) output;
                    if (mrOutput.isCommitRequired()) {
                        mrOutput.commit();
                    }
                }
            }
        } catch (Throwable t) {
            if (originalThrowable == null) {
                originalThrowable = t;
            }
        }
        if (originalThrowable != null) {
            LOG.error("Failed initializeAndRunProcessor", originalThrowable);
            // abort the output tasks
            for (LogicalOutput output : outputs.values()) {
                if (output instanceof MROutput) {
                    MROutput mrOutput = (MROutput) output;
                    if (mrOutput.isCommitRequired()) {
                        mrOutput.abort();
                    }
                }
            }
            if (originalThrowable instanceof InterruptedException) {
                throw (InterruptedException) originalThrowable;
            } else {
                throw new RuntimeException(originalThrowable);
            }
        }
    }
}
Also used : MRTaskReporter(org.apache.tez.mapreduce.processor.MRTaskReporter) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutput(org.apache.tez.mapreduce.output.MROutput)

Example 2 with MRTaskReporter

use of org.apache.tez.mapreduce.processor.MRTaskReporter in project tez by apache.

the class MROutput method initializeBase.

protected List<Event> initializeBase() throws IOException, InterruptedException {
    // mandatory call
    getContext().requestInitialMemory(0l, null);
    taskNumberFormat.setMinimumIntegerDigits(5);
    taskNumberFormat.setGroupingUsed(false);
    nonTaskNumberFormat.setMinimumIntegerDigits(3);
    nonTaskNumberFormat.setGroupingUsed(false);
    Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
    this.jobConf = new JobConf(conf);
    // Add tokens to the jobConf - in case they are accessed within the RW / OF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
    this.isMapperOutput = jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
    if (this.isMapperOutput) {
        this.useNewApi = this.jobConf.getUseNewMapper();
    } else {
        this.useNewApi = this.jobConf.getUseNewReducer();
    }
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl.createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
    jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    jobConf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
    jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
    String outputFormatClassName;
    outputRecordCounter = getContext().getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
    if (useNewApi) {
        // set the output part name to have a unique prefix
        if (jobConf.get(MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME) == null) {
            jobConf.set(MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME, getOutputFileNamePrefix());
        }
        newApiTaskAttemptContext = createTaskAttemptContext(taskAttemptId);
        try {
            newOutputFormat = org.apache.hadoop.util.ReflectionUtils.newInstance(newApiTaskAttemptContext.getOutputFormatClass(), jobConf);
            outputFormatClassName = newOutputFormat.getClass().getName();
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
        initCommitter(jobConf, useNewApi);
    } else {
        oldApiTaskAttemptContext = new org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl(jobConf, taskAttemptId, new MRTaskReporter(getContext()));
        oldOutputFormat = jobConf.getOutputFormat();
        outputFormatClassName = oldOutputFormat.getClass().getName();
        initCommitter(jobConf, useNewApi);
    }
    LOG.info(getContext().getDestinationVertexName() + ": " + "outputFormat=" + outputFormatClassName + ", using newmapreduce API=" + useNewApi);
    return null;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) MRTaskReporter(org.apache.tez.mapreduce.processor.MRTaskReporter) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

MRTaskReporter (org.apache.tez.mapreduce.processor.MRTaskReporter)2 IOException (java.io.IOException)1 Configuration (org.apache.hadoop.conf.Configuration)1 JobConf (org.apache.hadoop.mapred.JobConf)1 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)1 MROutput (org.apache.tez.mapreduce.output.MROutput)1 LogicalOutput (org.apache.tez.runtime.api.LogicalOutput)1 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)1