Search in sources :

Example 16 with InputFormat

use of org.apache.hadoop.mapreduce.InputFormat in project ignite by apache.

the class HadoopV2MapTask method run0.

/**
 * {@inheritDoc}
 */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;
    JobContextImpl jobCtx = taskCtx.jobContext();
    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        HadoopV2Context hadoopCtx = hadoopContext();
        InputSplit nativeSplit = hadoopCtx.getInputSplit();
        if (nativeSplit == null)
            throw new IgniteCheckedException("Input split cannot be null.");
        InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopCtx.getConfiguration());
        RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);
        reader.initialize(nativeSplit, hadoopCtx);
        hadoopCtx.reader(reader);
        HadoopJobInfo jobInfo = taskCtx.job().info();
        outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);
        Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());
        try {
            mapper.run(new WrappedMapper().getMapContext(hadoopCtx));
            taskCtx.onMapperFinished();
        } finally {
            closeWriter();
        }
        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;
        Thread.currentThread().interrupt();
        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;
        throw new IgniteCheckedException(e);
    } finally {
        HadoopMapperUtils.clearMapperIndex();
        if (err != null)
            abort(outputFormat);
    }
}
Also used : HadoopJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopJobInfo) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) Mapper(org.apache.hadoop.mapreduce.Mapper) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) InputFormat(org.apache.hadoop.mapreduce.InputFormat) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Aggregations

InputFormat (org.apache.hadoop.mapreduce.InputFormat)16 Path (org.apache.hadoop.fs.Path)9 Configuration (org.apache.hadoop.conf.Configuration)8 InputSplit (org.apache.hadoop.mapreduce.InputSplit)7 Job (org.apache.hadoop.mapreduce.Job)6 Test (org.junit.Test)6 RecordReader (org.apache.hadoop.mapreduce.RecordReader)5 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)5 HashMap (java.util.HashMap)3 Map (java.util.Map)3 Mapper (org.apache.hadoop.mapreduce.Mapper)3 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)3 FileInputFormat (org.apache.hadoop.mapreduce.lib.input.FileInputFormat)3 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 KeyValueTextInputFormat (org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat)2 Input (co.cask.cdap.api.data.batch.Input)1 InputFormatProvider (co.cask.cdap.api.data.batch.InputFormatProvider)1 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)1