Search in sources :

Example 1 with AbstractUnaryOutputSourceOperatorNodePushable

use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.

the class HDFSReadOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final InputSplit[] inputSplits = splitsFactory.getSplits();
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();

        @SuppressWarnings("unchecked")
        @Override
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                JobConf conf = confFactory.getConf();
                conf.setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                try {
                    parser.open(writer);
                    InputFormat inputFormat = conf.getInputFormat();
                    for (int i = 0; i < inputSplits.length; i++) {
                        /**
                             * read all the partitions scheduled to the current node
                             */
                        if (scheduledLocations[i].equals(nodeName)) {
                            /**
                                 * pick an unread split to read
                                 * synchronize among simultaneous partitions in the same machine
                                 */
                            synchronized (executed) {
                                if (executed[i] == false) {
                                    executed[i] = true;
                                } else {
                                    continue;
                                }
                            }
                            /**
                                 * read the split
                                 */
                            RecordReader reader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);
                            Object key = reader.createKey();
                            Object value = reader.createValue();
                            while (reader.next(key, value) == true) {
                                parser.parse(key, value, writer, inputSplits[i].toString());
                            }
                        }
                    }
                } finally {
                    parser.close(writer);
                }
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}
Also used : AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordReader(org.apache.hadoop.mapred.RecordReader) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IKeyValueParser(org.apache.hyracks.hdfs.api.IKeyValueParser) InputFormat(org.apache.hadoop.mapred.InputFormat) InputSplit(org.apache.hadoop.mapred.InputSplit) JobConf(org.apache.hadoop.mapred.JobConf)

Example 2 with AbstractUnaryOutputSourceOperatorNodePushable

use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.

the class SortGroupbyTest method initial.

@Override
protected void initial(final IHyracksTaskContext ctx, int tableSize, final int numFrames) throws HyracksDataException {
    builder = new ExternalSortGroupByRunGenerator(ctx, keyFields, inRecordDesc, numFrames, keyFields, normalizedKeyComputerFactory, comparatorFactories, partialAggrInState, outputRec, Algorithm.QUICK_SORT);
    mergerOperator = new AbstractUnaryOutputSourceOperatorNodePushable() {

        @Override
        public void initialize() throws HyracksDataException {
            List<GeneratedRunFileReader> runs = builder.getRuns();
            ISorter sorter = builder.getSorter();
            IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
            for (int i = 0; i < comparatorFactories.length; ++i) {
                comparators[i] = comparatorFactories[i].createBinaryComparator();
            }
            INormalizedKeyComputer nmkComputer = normalizedKeyComputerFactory == null ? null : normalizedKeyComputerFactory.createNormalizedKeyComputer();
            AbstractExternalSortRunMerger merger = new ExternalSortGroupByRunMerger(ctx, sorter, runs, keyFields, inRecordDesc, outputRec, outputRec, numFrames, writer, keyFields, nmkComputer, comparators, partialAggrInState, finalAggrInState, true);
            merger.process();
        }
    };
}
Also used : INormalizedKeyComputer(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer) AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) AbstractExternalSortRunMerger(org.apache.hyracks.dataflow.std.sort.AbstractExternalSortRunMerger) List(java.util.List) ExternalSortGroupByRunMerger(org.apache.hyracks.dataflow.std.group.sort.ExternalSortGroupByRunMerger) ExternalSortGroupByRunGenerator(org.apache.hyracks.dataflow.std.group.sort.ExternalSortGroupByRunGenerator) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ISorter(org.apache.hyracks.dataflow.std.sort.ISorter)

Example 3 with AbstractUnaryOutputSourceOperatorNodePushable

use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.

the class DataGenOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
    final RecordDescriptor recDesc = outRecDescs[0];
    final ArrayTupleBuilder tb = new ArrayTupleBuilder(recDesc.getFields().length);
    final Random rnd = new Random(randomSeed);
    final int maxUniqueAttempts = 20;
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        // for quick & dirty exclusion of duplicates
        // WARNING: could contain numRecord entries and use a lot of memory
        HashSet<String> stringHs = new HashSet<String>();

        HashSet<Integer> intHs = new HashSet<Integer>();

        @Override
        public void initialize() throws HyracksDataException {
            try {
                writer.open();
                for (int i = 0; i < numRecords; i++) {
                    tb.reset();
                    for (int j = 0; j < recDesc.getFieldCount(); j++) {
                        genField(tb, j);
                    }
                    if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                        appender.write(writer, true);
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + appender.getBuffer().capacity() + ")");
                        }
                    }
                }
                appender.write(writer, true);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
            }
        }

        private void genField(ArrayTupleBuilder tb, int fieldIndex) throws HyracksDataException {
            DataOutput dos = tb.getDataOutput();
            if (recDesc.getFields()[fieldIndex] instanceof IntegerSerializerDeserializer) {
                int val = -1;
                if (fieldIndex == uniqueField) {
                    int attempt = 0;
                    while (attempt < maxUniqueAttempts) {
                        int tmp = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
                        if (intHs.contains(tmp))
                            attempt++;
                        else {
                            val = tmp;
                            intHs.add(val);
                            break;
                        }
                    }
                    if (attempt == maxUniqueAttempts)
                        throw new HyracksDataException("MaxUnique attempts reached in datagen");
                } else {
                    val = Math.abs(rnd.nextInt()) % (intMaxVal - intMinVal) + intMinVal;
                }
                recDesc.getFields()[fieldIndex].serialize(val, dos);
                tb.addFieldEndOffset();
            } else if (recDesc.getFields()[fieldIndex] instanceof UTF8StringSerializerDeserializer) {
                String val = null;
                if (fieldIndex == uniqueField) {
                    int attempt = 0;
                    while (attempt < maxUniqueAttempts) {
                        String tmp = randomString(maxStrLen, rnd);
                        if (stringHs.contains(tmp))
                            attempt++;
                        else {
                            val = tmp;
                            stringHs.add(val);
                            break;
                        }
                    }
                    if (attempt == maxUniqueAttempts)
                        throw new HyracksDataException("MaxUnique attempts reached in datagen");
                } else {
                    val = randomString(maxStrLen, rnd);
                }
                recDesc.getFields()[fieldIndex].serialize(val, dos);
                tb.addFieldEndOffset();
            } else {
                throw new HyracksDataException("Type unsupported in data generator. Only integers and strings allowed");
            }
        }

        private String randomString(int length, Random random) {
            String s = Long.toHexString(Double.doubleToLongBits(random.nextDouble()));
            StringBuilder strBuilder = new StringBuilder();
            for (int i = 0; i < s.length() && i < length; i++) {
                strBuilder.append(s.charAt(Math.abs(random.nextInt()) % s.length()));
            }
            return strBuilder.toString();
        }
    };
}
Also used : DataOutput(java.io.DataOutput) AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IntegerSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer) Random(java.util.Random) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) HashSet(java.util.HashSet)

Example 4 with AbstractUnaryOutputSourceOperatorNodePushable

use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.

the class AlgebricksMetaOperatorDescriptor method createSourceInputPushRuntime.

private IOperatorNodePushable createSourceInputPushRuntime(final IHyracksTaskContext ctx) {
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        @Override
        public void initialize() throws HyracksDataException {
            IFrameWriter startOfPipeline;
            RecordDescriptor pipelineOutputRecordDescriptor = outputArity > 0 ? AlgebricksMetaOperatorDescriptor.this.outRecDescs[0] : null;
            PipelineAssembler pa = new PipelineAssembler(pipeline, inputArity, outputArity, null, pipelineOutputRecordDescriptor);
            startOfPipeline = pa.assemblePipeline(writer, ctx);
            try {
                startOfPipeline.open();
            } catch (Exception e) {
                startOfPipeline.fail();
                throw e;
            } finally {
                startOfPipeline.close();
            }
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 5 with AbstractUnaryOutputSourceOperatorNodePushable

use of org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable in project asterixdb by apache.

the class HDFSReadOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final List<FileSplit> inputSplits = splitsFactory.getSplits();
    return new AbstractUnaryOutputSourceOperatorNodePushable() {

        private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();

        private ContextFactory ctxFactory = new ContextFactory();

        @SuppressWarnings("unchecked")
        @Override
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                Job job = confFactory.getConf();
                job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
                int size = inputSplits.size();
                for (int i = 0; i < size; i++) {
                    /**
                         * read all the partitions scheduled to the current node
                         */
                    if (scheduledLocations[i].equals(nodeName)) {
                        /**
                             * pick an unread split to read synchronize among
                             * simultaneous partitions in the same machine
                             */
                        synchronized (executed) {
                            if (executed[i] == false) {
                                executed[i] = true;
                            } else {
                                continue;
                            }
                        }
                        /**
                             * read the split
                             */
                        TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
                        context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                        RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                        reader.initialize(inputSplits.get(i), context);
                        while (reader.nextKeyValue() == true) {
                            parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer, inputSplits.get(i).toString());
                        }
                    }
                }
                parser.close(writer);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}
Also used : AbstractUnaryOutputSourceOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ContextFactory(org.apache.hyracks.hdfs.ContextFactory) IKeyValueParser(org.apache.hyracks.hdfs.api.IKeyValueParser) InputFormat(org.apache.hadoop.mapreduce.InputFormat) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)7 AbstractUnaryOutputSourceOperatorNodePushable (org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable)7 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)2 IKeyValueParser (org.apache.hyracks.hdfs.api.IKeyValueParser)2 DataOutput (java.io.DataOutput)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 InputStream (java.io.InputStream)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Random (java.util.Random)1 IDataSourceAdapter (org.apache.asterix.external.api.IDataSourceAdapter)1 InputFormat (org.apache.hadoop.mapred.InputFormat)1 InputSplit (org.apache.hadoop.mapred.InputSplit)1 JobConf (org.apache.hadoop.mapred.JobConf)1 RecordReader (org.apache.hadoop.mapred.RecordReader)1 InputFormat (org.apache.hadoop.mapreduce.InputFormat)1 Job (org.apache.hadoop.mapreduce.Job)1 RecordReader (org.apache.hadoop.mapreduce.RecordReader)1