Search in sources :

Example 1 with IKeyValueParser

use of org.apache.hyracks.hdfs.api.IKeyValueParser in project asterixdb by apache.

the class HDFSReadOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final InputSplit[] inputSplits = splitsFactory.getSplits();
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();

        @SuppressWarnings("unchecked")
        @Override
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                JobConf conf = confFactory.getConf();
                conf.setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                try {
                    parser.open(writer);
                    InputFormat inputFormat = conf.getInputFormat();
                    for (int i = 0; i < inputSplits.length; i++) {
                        /**
                             * read all the partitions scheduled to the current node
                             */
                        if (scheduledLocations[i].equals(nodeName)) {
                            /**
                                 * pick an unread split to read
                                 * synchronize among simultaneous partitions in the same machine
                                 */
                            synchronized (executed) {
                                if (executed[i] == false) {
                                    executed[i] = true;
                                } else {
                                    continue;
                                }
                            }
                            /**
                                 * read the split
                                 */
                            RecordReader reader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);
                            Object key = reader.createKey();
                            Object value = reader.createValue();
                            while (reader.next(key, value) == true) {
                                parser.parse(key, value, writer, inputSplits[i].toString());
                            }
                        }
                    }
                } finally {
                    parser.close(writer);
                }
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}
Also used : AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordReader(org.apache.hadoop.mapred.RecordReader) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IKeyValueParser(org.apache.hyracks.hdfs.api.IKeyValueParser) InputFormat(org.apache.hadoop.mapred.InputFormat) InputSplit(org.apache.hadoop.mapred.InputSplit) JobConf(org.apache.hadoop.mapred.JobConf)

Example 2 with IKeyValueParser

use of org.apache.hyracks.hdfs.api.IKeyValueParser in project asterixdb by apache.

the class HDFSReadOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final List<FileSplit> inputSplits = splitsFactory.getSplits();
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();

        private ContextFactory ctxFactory = new ContextFactory();

        @SuppressWarnings("unchecked")
        @Override
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                Job job = confFactory.getConf();
                job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
                int size = inputSplits.size();
                for (int i = 0; i < size; i++) {
                    /**
                         * read all the partitions scheduled to the current node
                         */
                    if (scheduledLocations[i].equals(nodeName)) {
                        /**
                             * pick an unread split to read synchronize among
                             * simultaneous partitions in the same machine
                             */
                        synchronized (executed) {
                            if (executed[i] == false) {
                                executed[i] = true;
                            } else {
                                continue;
                            }
                        }
                        /**
                             * read the split
                             */
                        TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
                        context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                        RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                        reader.initialize(inputSplits.get(i), context);
                        while (reader.nextKeyValue() == true) {
                            parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer, inputSplits.get(i).toString());
                        }
                    }
                }
                parser.close(writer);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}
Also used : AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ContextFactory(org.apache.hyracks.hdfs.ContextFactory) IKeyValueParser(org.apache.hyracks.hdfs.api.IKeyValueParser) InputFormat(org.apache.hadoop.mapreduce.InputFormat) Job(org.apache.hadoop.mapreduce.Job)

Example 3 with IKeyValueParser

use of org.apache.hyracks.hdfs.api.IKeyValueParser in project asterixdb by apache.

the class TextKeyValueParserFactory method createKeyValueParser.

@Override
public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) throws HyracksDataException {
    final ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
    final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
    return new IKeyValueParser<LongWritable, Text>() {

        @Override
        public void open(IFrameWriter writer) {
        }

        @Override
        public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString) throws HyracksDataException {
            tb.reset();
            tb.addField(value.getBytes(), 0, value.getLength());
            FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
        }

        @Override
        public void close(IFrameWriter writer) throws HyracksDataException {
            appender.write(writer, false);
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) IKeyValueParser(org.apache.hyracks.hdfs.api.IKeyValueParser) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame)

Aggregations

IKeyValueParser (org.apache.hyracks.hdfs.api.IKeyValueParser)3 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)2 AbstractUnaryOutputSourceOperatorNodePushable (org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable)2 LongWritable (org.apache.hadoop.io.LongWritable)1 Text (org.apache.hadoop.io.Text)1 InputFormat (org.apache.hadoop.mapred.InputFormat)1 InputSplit (org.apache.hadoop.mapred.InputSplit)1 JobConf (org.apache.hadoop.mapred.JobConf)1 RecordReader (org.apache.hadoop.mapred.RecordReader)1 InputFormat (org.apache.hadoop.mapreduce.InputFormat)1 Job (org.apache.hadoop.mapreduce.Job)1 RecordReader (org.apache.hadoop.mapreduce.RecordReader)1 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)1 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)1 IFrameWriter (org.apache.hyracks.api.comm.IFrameWriter)1 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)1 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)1 FrameTupleAppender (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender)1 ContextFactory (org.apache.hyracks.hdfs.ContextFactory)1