use of org.apache.tez.mapreduce.processor.MRTaskReporter in project hive by apache.
the class TezProcessor method initializeAndRunProcessor.
protected void initializeAndRunProcessor(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
Throwable originalThrowable = null;
try {
MRTaskReporter mrReporter = new MRTaskReporter(getContext());
// Init and run are both potentially long, and blocking operations. Synchronization
// with the 'abort' operation will not work since if they end up blocking on a monitor
// which does not belong to the lock, the abort will end up getting blocked.
// Both of these method invocations need to handle the abort call on their own.
rproc.init(mrReporter, inputs, outputs);
rproc.run();
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR);
} catch (Throwable t) {
rproc.setAborted(true);
originalThrowable = t;
} finally {
if (originalThrowable != null && (originalThrowable instanceof Error || Throwables.getRootCause(originalThrowable) instanceof Error)) {
LOG.error("Cannot recover from this FATAL error", originalThrowable);
getContext().reportFailure(TaskFailureType.FATAL, originalThrowable, "Cannot recover from this error");
throw new RuntimeException(originalThrowable);
}
try {
if (rproc != null) {
rproc.close();
}
} catch (Throwable t) {
if (originalThrowable == null) {
originalThrowable = t;
}
}
// commit the output tasks
try {
for (LogicalOutput output : outputs.values()) {
if (output instanceof MROutput) {
MROutput mrOutput = (MROutput) output;
if (mrOutput.isCommitRequired()) {
mrOutput.commit();
}
}
}
} catch (Throwable t) {
if (originalThrowable == null) {
originalThrowable = t;
}
}
if (originalThrowable != null) {
LOG.error("Failed initializeAndRunProcessor", originalThrowable);
// abort the output tasks
for (LogicalOutput output : outputs.values()) {
if (output instanceof MROutput) {
MROutput mrOutput = (MROutput) output;
if (mrOutput.isCommitRequired()) {
mrOutput.abort();
}
}
}
if (originalThrowable instanceof InterruptedException) {
throw (InterruptedException) originalThrowable;
} else {
throw new RuntimeException(originalThrowable);
}
}
}
}
use of org.apache.tez.mapreduce.processor.MRTaskReporter in project tez by apache.
the class MROutput method initializeBase.
protected List<Event> initializeBase() throws IOException, InterruptedException {
// mandatory call
getContext().requestInitialMemory(0l, null);
taskNumberFormat.setMinimumIntegerDigits(5);
taskNumberFormat.setGroupingUsed(false);
nonTaskNumberFormat.setMinimumIntegerDigits(3);
nonTaskNumberFormat.setGroupingUsed(false);
Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload());
this.jobConf = new JobConf(conf);
// Add tokens to the jobConf - in case they are accessed within the RW / OF
jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());
this.isMapperOutput = jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false);
if (this.isMapperOutput) {
this.useNewApi = this.jobConf.getUseNewMapper();
} else {
this.useNewApi = this.jobConf.getUseNewReducer();
}
jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber());
TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl.createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), isMapperOutput);
jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
jobConf.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString());
String outputFormatClassName;
outputRecordCounter = getContext().getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
if (useNewApi) {
// set the output part name to have a unique prefix
if (jobConf.get(MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME) == null) {
jobConf.set(MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME, getOutputFileNamePrefix());
}
newApiTaskAttemptContext = createTaskAttemptContext(taskAttemptId);
try {
newOutputFormat = org.apache.hadoop.util.ReflectionUtils.newInstance(newApiTaskAttemptContext.getOutputFormatClass(), jobConf);
outputFormatClassName = newOutputFormat.getClass().getName();
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
}
initCommitter(jobConf, useNewApi);
} else {
oldApiTaskAttemptContext = new org.apache.tez.mapreduce.hadoop.mapred.TaskAttemptContextImpl(jobConf, taskAttemptId, new MRTaskReporter(getContext()));
oldOutputFormat = jobConf.getOutputFormat();
outputFormatClassName = oldOutputFormat.getClass().getName();
initCommitter(jobConf, useNewApi);
}
LOG.info(getContext().getDestinationVertexName() + ": " + "outputFormat=" + outputFormatClassName + ", using newmapreduce API=" + useNewApi);
return null;
}
Aggregations