use of org.apache.hadoop.mapreduce.lib.map.WrappedMapper in project hadoop by apache.
the class MapTask method runNewMapper.
@SuppressWarnings("unchecked")
private <INKEY, INVALUE, OUTKEY, OUTVALUE> void runNewMapper(final JobConf job, final TaskSplitIndex splitIndex, final TaskUmbilicalProtocol umbilical, TaskReporter reporter) throws IOException, ClassNotFoundException, InterruptedException {
// make a task context so we can get the classes
org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(job, getTaskID(), reporter);
// make a mapper
org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
// make the input format
org.apache.hadoop.mapreduce.InputFormat<INKEY, INVALUE> inputFormat = (org.apache.hadoop.mapreduce.InputFormat<INKEY, INVALUE>) ReflectionUtils.newInstance(taskContext.getInputFormatClass(), job);
// rebuild the input split
org.apache.hadoop.mapreduce.InputSplit split = null;
split = getSplitDetails(new Path(splitIndex.getSplitLocation()), splitIndex.getStartOffset());
LOG.info("Processing split: " + split);
org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = new NewTrackingRecordReader<INKEY, INVALUE>(split, inputFormat, reporter, taskContext);
job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
org.apache.hadoop.mapreduce.RecordWriter output = null;
// get an output object
if (job.getNumReduceTasks() == 0) {
output = new NewDirectOutputCollector(taskContext, job, umbilical, reporter);
} else {
output = new NewOutputCollector(taskContext, job, umbilical, reporter);
}
org.apache.hadoop.mapreduce.MapContext<INKEY, INVALUE, OUTKEY, OUTVALUE> mapContext = new MapContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, getTaskID(), input, output, committer, reporter, split);
org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context<INKEY, INVALUE, OUTKEY, OUTVALUE> mapperContext = new WrappedMapper<INKEY, INVALUE, OUTKEY, OUTVALUE>().getMapContext(mapContext);
try {
input.initialize(split, mapperContext);
mapper.run(mapperContext);
mapPhase.complete();
setPhase(TaskStatus.Phase.SORT);
statusUpdate(umbilical);
input.close();
input = null;
output.close(mapperContext);
output = null;
} finally {
closeQuietly(input);
closeQuietly(output, mapperContext);
}
}
use of org.apache.hadoop.mapreduce.lib.map.WrappedMapper in project cdap by caskdata.
the class MapperWrapper method createAutoFlushingContext.
private WrappedMapper.Context createAutoFlushingContext(final Context context, final BasicMapReduceTaskContext basicMapReduceContext) {
// NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
// current approach
final int flushFreq = context.getConfiguration().getInt("c.mapper.flush.freq", 10000);
@SuppressWarnings("unchecked") WrappedMapper.Context flushingContext = new WrappedMapper().new Context(context) {
private int processedRecords = 0;
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
boolean result = super.nextKeyValue();
if (++processedRecords > flushFreq) {
try {
LOG.trace("Flushing dataset operations...");
basicMapReduceContext.flushOperations();
} catch (Exception e) {
LOG.error("Failed to persist changes", e);
throw Throwables.propagate(e);
}
processedRecords = 0;
}
return result;
}
@Override
public InputSplit getInputSplit() {
InputSplit inputSplit = super.getInputSplit();
if (inputSplit instanceof TaggedInputSplit) {
// expose the delegate InputSplit to the user
inputSplit = ((TaggedInputSplit) inputSplit).getInputSplit();
}
return inputSplit;
}
@Override
public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException {
InputSplit inputSplit = super.getInputSplit();
if (inputSplit instanceof MultiInputTaggedSplit) {
// expose the delegate InputFormat to the user
return ((MultiInputTaggedSplit) inputSplit).getInputFormatClass();
}
return super.getInputFormatClass();
}
};
return flushingContext;
}
use of org.apache.hadoop.mapreduce.lib.map.WrappedMapper in project ignite by apache.
the class HadoopV2MapTask method run0.
/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
OutputFormat outputFormat = null;
Exception err = null;
JobContextImpl jobCtx = taskCtx.jobContext();
if (taskCtx.taskInfo().hasMapperIndex())
HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
else
HadoopMapperUtils.clearMapperIndex();
try {
HadoopV2Context hadoopCtx = hadoopContext();
InputSplit nativeSplit = hadoopCtx.getInputSplit();
if (nativeSplit == null)
throw new IgniteCheckedException("Input split cannot be null.");
InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopCtx.getConfiguration());
RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);
reader.initialize(nativeSplit, hadoopCtx);
hadoopCtx.reader(reader);
HadoopJobInfo jobInfo = taskCtx.job().info();
outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);
Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());
try {
mapper.run(new WrappedMapper().getMapContext(hadoopCtx));
taskCtx.onMapperFinished();
} finally {
closeWriter();
}
commit(outputFormat);
} catch (InterruptedException e) {
err = e;
Thread.currentThread().interrupt();
throw new IgniteInterruptedCheckedException(e);
} catch (Exception e) {
err = e;
throw new IgniteCheckedException(e);
} finally {
HadoopMapperUtils.clearMapperIndex();
if (err != null)
abort(outputFormat);
}
}
Aggregations