Examples with RecordReader - org.apache.hadoop.mapreduce.RecordReader

Example 11 with RecordReader

use of org.apache.hadoop.mapreduce.RecordReader in project asterixdb by apache.

the class HDFSReadOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final List<FileSplit> inputSplits = splitsFactory.getSplits();
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();

        private ContextFactory ctxFactory = new ContextFactory();

        @SuppressWarnings("unchecked")
        @Override
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                Job job = confFactory.getConf();
                job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
                int size = inputSplits.size();
                for (int i = 0; i < size; i++) {
                    /**
                         * read all the partitions scheduled to the current node
                         */
                    if (scheduledLocations[i].equals(nodeName)) {
                        /**
                             * pick an unread split to read synchronize among
                             * simultaneous partitions in the same machine
                             */
                        synchronized (executed) {
                            if (executed[i] == false) {
                                executed[i] = true;
                            } else {
                                continue;
                            }
                        }
                        /**
                             * read the split
                             */
                        TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
                        context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                        RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                        reader.initialize(inputSplits.get(i), context);
                        while (reader.nextKeyValue() == true) {
                            parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer, inputSplits.get(i).toString());
                        }
                    }
                }
                parser.close(writer);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}

Also used : AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ContextFactory(org.apache.hyracks.hdfs.ContextFactory) IKeyValueParser(org.apache.hyracks.hdfs.api.IKeyValueParser) InputFormat(org.apache.hadoop.mapreduce.InputFormat) Job(org.apache.hadoop.mapreduce.Job)

Example 12 with RecordReader

use of org.apache.hadoop.mapreduce.RecordReader in project ignite by apache.

the class HadoopV2MapTask method run0.

/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;
    JobContextImpl jobCtx = taskCtx.jobContext();
    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        HadoopV2Context hadoopCtx = hadoopContext();
        InputSplit nativeSplit = hadoopCtx.getInputSplit();
        if (nativeSplit == null)
            throw new IgniteCheckedException("Input split cannot be null.");
        InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopCtx.getConfiguration());
        RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);
        reader.initialize(nativeSplit, hadoopCtx);
        hadoopCtx.reader(reader);
        HadoopJobInfo jobInfo = taskCtx.job().info();
        outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);
        Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());
        try {
            mapper.run(new WrappedMapper().getMapContext(hadoopCtx));
            taskCtx.onMapperFinished();
        } finally {
            closeWriter();
        }
        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;
        Thread.currentThread().interrupt();
        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;
        throw new IgniteCheckedException(e);
    } finally {
        HadoopMapperUtils.clearMapperIndex();
        if (err != null)
            abort(outputFormat);
    }
}

Also used : HadoopJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopJobInfo) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) Mapper(org.apache.hadoop.mapreduce.Mapper) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) InputFormat(org.apache.hadoop.mapreduce.InputFormat) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Aggregations

RecordReader (org.apache.hadoop.mapreduce.RecordReader)12 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)9 Configuration (org.apache.hadoop.conf.Configuration)5 InputFormat (org.apache.hadoop.mapreduce.InputFormat)5 InputSplit (org.apache.hadoop.mapreduce.InputSplit)5 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)5 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)5 Test (org.junit.Test)4 IOException (java.io.IOException)3 Path (org.apache.hadoop.fs.Path)3 Mapper (org.apache.hadoop.mapreduce.Mapper)3 WrappedMapper (org.apache.hadoop.mapreduce.lib.map.WrappedMapper)3 Scan (org.apache.hadoop.hbase.client.Scan)2 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)2 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)2 BaseHadoopTest (com.mongodb.hadoop.testutils.BaseHadoopTest)1 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)1 DataOutputStream (java.io.DataOutputStream)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1