Search in sources :

Example 36 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method bulkLoadInvIndex.

public static void bulkLoadInvIndex(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs, boolean appendOnly) throws HyracksDataException, IOException {
    SortedSet<CheckTuple> tmpMemIndex = new TreeSet<>();
    // First generate the expected index by inserting the documents one-by-one.
    for (int i = 0; i < numDocs; i++) {
        ITupleReference tuple = tupleGen.next();
        testCtx.insertCheckTuples(tuple, tmpMemIndex);
    }
    ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
    // Use the expected index to bulk-load the actual index.
    IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false, numDocs, true);
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
    ArrayTupleReference tuple = new ArrayTupleReference();
    Iterator<CheckTuple> checkTupleIter = tmpMemIndex.iterator();
    while (checkTupleIter.hasNext()) {
        CheckTuple checkTuple = checkTupleIter.next();
        OrderedIndexTestUtils.createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, fieldSerdes);
        bulkLoader.add(tuple);
    }
    bulkLoader.end();
    // Add all check tuples from the temp index to the text context.
    testCtx.getCheckTuples().addAll(tmpMemIndex);
}
Also used : CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) TreeSet(java.util.TreeSet) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)

Example 37 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class TestTypedAdapterFactory method createAdapter.

@Override
public IDataSourceAdapter createAdapter(IHyracksTaskContext ctx, int partition) throws HyracksDataException {
    final String nodeId = ctx.getJobletContext().getServiceContext().getNodeId();
    final ITupleParserFactory tupleParserFactory = new ITupleParserFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public ITupleParser createTupleParser(IHyracksTaskContext ctx) throws HyracksDataException {
            ADMDataParser parser;
            ITupleForwarder forwarder;
            ArrayTupleBuilder tb;
            IApplicationContext appCtx = (IApplicationContext) ctx.getJobletContext().getServiceContext().getApplicationContext();
            ClusterPartition nodePartition = appCtx.getMetadataProperties().getNodePartitions().get(nodeId)[0];
            parser = new ADMDataParser(outputType, true);
            forwarder = DataflowUtils.getTupleForwarder(configuration, FeedUtils.getFeedLogManager(ctx, FeedUtils.splitsForAdapter(ExternalDataUtils.getDataverse(configuration), ExternalDataUtils.getFeedName(configuration), nodeId, nodePartition)));
            tb = new ArrayTupleBuilder(1);
            return new ITupleParser() {

                @Override
                public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
                    try {
                        parser.setInputStream(in);
                        forwarder.initialize(ctx, writer);
                        while (true) {
                            tb.reset();
                            if (!parser.parse(tb.getDataOutput())) {
                                break;
                            }
                            tb.addFieldEndOffset();
                            forwarder.addTuple(tb);
                        }
                        forwarder.close();
                    } catch (Exception e) {
                        throw new HyracksDataException(e);
                    }
                }
            };
        }
    };
    try {
        return new TestTypedAdapter(tupleParserFactory, outputType, ctx, configuration, partition);
    } catch (IOException e) {
        throw new HyracksDataException(e);
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) ITupleParser(org.apache.hyracks.dataflow.std.file.ITupleParser) InputStream(java.io.InputStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IApplicationContext(org.apache.asterix.common.api.IApplicationContext) IOException(java.io.IOException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) ADMDataParser(org.apache.asterix.external.parser.ADMDataParser) ITupleForwarder(org.apache.asterix.external.api.ITupleForwarder) ITupleParserFactory(org.apache.hyracks.dataflow.std.file.ITupleParserFactory) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition)

Example 38 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class NestedPlansAccumulatingAggregatorFactory method createAggregator.

@Override
public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDesc, RecordDescriptor outRecordDescriptor, int[] keys, int[] partialKeys) throws HyracksDataException {
    final AggregatorOutput outputWriter = new AggregatorOutput(subplans, keyFieldIdx.length, decorFieldIdx.length);
    final NestedTupleSourceRuntime[] pipelines = new NestedTupleSourceRuntime[subplans.length];
    for (int i = 0; i < subplans.length; i++) {
        pipelines[i] = (NestedTupleSourceRuntime) assemblePipeline(subplans[i], outputWriter, ctx);
    }
    return new IAggregatorDescriptor() {

        @Override
        public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
            ArrayTupleBuilder tb = outputWriter.getTupleBuilder();
            tb.reset();
            for (int i = 0; i < keyFieldIdx.length; ++i) {
                tb.addField(accessor, tIndex, keyFieldIdx[i]);
            }
            for (int i = 0; i < decorFieldIdx.length; ++i) {
                tb.addField(accessor, tIndex, decorFieldIdx[i]);
            }
            for (int i = 0; i < pipelines.length; ++i) {
                pipelines[i].open();
            }
            // aggregate the first tuple
            for (int i = 0; i < pipelines.length; i++) {
                pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
            }
        }

        @Override
        public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor, int stateTupleIndex, AggregateState state) throws HyracksDataException {
            for (int i = 0; i < pipelines.length; i++) {
                pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
            }
        }

        @Override
        public boolean outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor stateAccessor, int tIndex, AggregateState state) throws HyracksDataException {
            for (int i = 0; i < pipelines.length; i++) {
                outputWriter.setInputIdx(i);
                pipelines[i].close();
            }
            // outputWriter.writeTuple(appender);
            tupleBuilder.reset();
            ArrayTupleBuilder tb = outputWriter.getTupleBuilder();
            byte[] data = tb.getByteArray();
            int[] fieldEnds = tb.getFieldEndOffsets();
            int start = 0;
            int offset;
            for (int i = 0; i < fieldEnds.length; i++) {
                if (i > 0) {
                    start = fieldEnds[i - 1];
                }
                offset = fieldEnds[i] - start;
                tupleBuilder.addField(data, start, offset);
            }
            return true;
        }

        @Override
        public AggregateState createAggregateStates() {
            return new AggregateState();
        }

        @Override
        public void reset() {
        }

        @Override
        public boolean outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
            throw new IllegalStateException("this method should not be called");
        }

        @Override
        public void close() {
        }
    };
}
Also used : AggregateState(org.apache.hyracks.dataflow.std.group.AggregateState) NestedTupleSourceRuntime(org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory.NestedTupleSourceRuntime) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) IAggregatorDescriptor(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)

Example 39 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class NestedPlansRunningAggregatorFactory method createAggregator.

/* (non-Javadoc)
     * @see org.apache.hyracks.dataflow.std.group.IAggregatorDescriptorFactory#createAggregator(org.apache.hyracks.api.context.IHyracksTaskContext, org.apache.hyracks.api.dataflow.value.RecordDescriptor, org.apache.hyracks.api.dataflow.value.RecordDescriptor, int[], int[])
     */
@Override
public IAggregatorDescriptor createAggregator(final IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults, final IFrameWriter writer) throws HyracksDataException {
    final RunningAggregatorOutput outputWriter = new RunningAggregatorOutput(ctx, subplans, keyFieldIdx.length, decorFieldIdx.length, writer);
    final NestedTupleSourceRuntime[] pipelines = new NestedTupleSourceRuntime[subplans.length];
    for (int i = 0; i < subplans.length; i++) {
        pipelines[i] = (NestedTupleSourceRuntime) assemblePipeline(subplans[i], outputWriter, ctx);
    }
    final ArrayTupleBuilder gbyTb = outputWriter.getGroupByTupleBuilder();
    return new IAggregatorDescriptor() {

        @Override
        public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
            for (int i = 0; i < pipelines.length; ++i) {
                pipelines[i].open();
            }
            gbyTb.reset();
            for (int i = 0; i < keyFieldIdx.length; ++i) {
                gbyTb.addField(accessor, tIndex, keyFieldIdx[i]);
            }
            for (int i = 0; i < decorFieldIdx.length; ++i) {
                gbyTb.addField(accessor, tIndex, decorFieldIdx[i]);
            }
            // aggregate the first tuple
            for (int i = 0; i < pipelines.length; i++) {
                outputWriter.setInputIdx(i);
                pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
            }
        }

        @Override
        public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor, int stateTupleIndex, AggregateState state) throws HyracksDataException {
            for (int i = 0; i < pipelines.length; i++) {
                outputWriter.setInputIdx(i);
                pipelines[i].writeTuple(accessor.getBuffer(), tIndex);
            }
        }

        @Override
        public boolean outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
            for (int i = 0; i < pipelines.length; ++i) {
                outputWriter.setInputIdx(i);
                pipelines[i].close();
            }
            return false;
        }

        @Override
        public AggregateState createAggregateStates() {
            return new AggregateState();
        }

        @Override
        public void reset() {
        }

        @Override
        public boolean outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
            throw new IllegalStateException("this method should not be called");
        }

        @Override
        public void close() {
        }
    };
}
Also used : AggregateState(org.apache.hyracks.dataflow.std.group.AggregateState) NestedTupleSourceRuntime(org.apache.hyracks.algebricks.runtime.operators.std.NestedTupleSourceRuntimeFactory.NestedTupleSourceRuntime) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IAggregatorDescriptor(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptor)

Example 40 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class SerializableAggregatorDescriptorFactory method createAggregator.

@Override
public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, int[] keyFields, final int[] keyFieldsInPartialResults) throws HyracksDataException {
    final int[] keys = keyFields;
    /**
         * one IAggregatorDescriptor instance per Gby operator
         */
    return new IAggregatorDescriptor() {

        private FrameTupleReference ftr = new FrameTupleReference();

        private ISerializedAggregateEvaluator[] aggs = new ISerializedAggregateEvaluator[aggFactories.length];

        private int offsetFieldIndex = keys.length;

        private int[] stateFieldLength = new int[aggFactories.length];

        @Override
        public AggregateState createAggregateStates() {
            return new AggregateState();
        }

        @Override
        public void init(ArrayTupleBuilder tb, IFrameTupleAccessor accessor, int tIndex, AggregateState state) throws HyracksDataException {
            DataOutput output = tb.getDataOutput();
            ftr.reset(accessor, tIndex);
            for (int i = 0; i < aggs.length; i++) {
                int begin = tb.getSize();
                if (aggs[i] == null) {
                    aggs[i] = aggFactories[i].createAggregateEvaluator(ctx);
                }
                aggs[i].init(output);
                tb.addFieldEndOffset();
                stateFieldLength[i] = tb.getSize() - begin;
            }
            // doing initial aggregate
            ftr.reset(accessor, tIndex);
            for (int i = 0; i < aggs.length; i++) {
                byte[] data = tb.getByteArray();
                int prevFieldPos = i + keys.length - 1;
                int start = prevFieldPos >= 0 ? tb.getFieldEndOffsets()[prevFieldPos] : 0;
                aggs[i].step(ftr, data, start, stateFieldLength[i]);
            }
        }

        @Override
        public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor, int stateTupleIndex, AggregateState state) throws HyracksDataException {
            ftr.reset(accessor, tIndex);
            int stateTupleStart = stateAccessor.getTupleStartOffset(stateTupleIndex);
            int fieldSlotLength = stateAccessor.getFieldSlotsLength();
            for (int i = 0; i < aggs.length; i++) {
                byte[] data = stateAccessor.getBuffer().array();
                int start = stateAccessor.getFieldStartOffset(stateTupleIndex, i + keys.length) + stateTupleStart + fieldSlotLength;
                aggs[i].step(ftr, data, start, stateFieldLength[i]);
            }
        }

        @Override
        public boolean outputPartialResult(ArrayTupleBuilder tb, IFrameTupleAccessor stateAccessor, int tIndex, AggregateState state) throws HyracksDataException {
            byte[] data = stateAccessor.getBuffer().array();
            int startOffset = stateAccessor.getTupleStartOffset(tIndex);
            int aggFieldOffset = stateAccessor.getFieldStartOffset(tIndex, offsetFieldIndex);
            int refOffset = startOffset + stateAccessor.getFieldSlotsLength() + aggFieldOffset;
            int start = refOffset;
            for (int i = 0; i < aggs.length; i++) {
                aggs[i].finishPartial(data, start, stateFieldLength[i], tb.getDataOutput());
                start += stateFieldLength[i];
                tb.addFieldEndOffset();
            }
            return true;
        }

        @Override
        public boolean outputFinalResult(ArrayTupleBuilder tb, IFrameTupleAccessor stateAccessor, int tIndex, AggregateState state) throws HyracksDataException {
            byte[] data = stateAccessor.getBuffer().array();
            int startOffset = stateAccessor.getTupleStartOffset(tIndex);
            int aggFieldOffset = stateAccessor.getFieldStartOffset(tIndex, offsetFieldIndex);
            int refOffset = startOffset + stateAccessor.getFieldSlotsLength() + aggFieldOffset;
            int start = refOffset;
            for (int i = 0; i < aggs.length; i++) {
                aggs[i].finish(data, start, stateFieldLength[i], tb.getDataOutput());
                start += stateFieldLength[i];
                tb.addFieldEndOffset();
            }
            return true;
        }

        @Override
        public void reset() {
        }

        @Override
        public void close() {
            reset();
        }
    };
}
Also used : DataOutput(java.io.DataOutput) AggregateState(org.apache.hyracks.dataflow.std.group.AggregateState) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) FrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference) IAggregatorDescriptor(org.apache.hyracks.dataflow.std.group.IAggregatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)

Aggregations

ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)99 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)45 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)42 Test (org.junit.Test)40 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)35 DataOutput (java.io.DataOutput)33 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)24 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)21 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)21 ITreeIndex (org.apache.hyracks.storage.am.common.api.ITreeIndex)18 FrameTupleAppender (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender)17 ConstantTupleSourceOperatorDescriptor (org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor)17 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)16 JobSpecification (org.apache.hyracks.api.job.JobSpecification)16 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)16 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)16 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)15 BTreeSearchOperatorDescriptor (org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor)14 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)12