Search in sources :

Example 11 with FrameTupleAppender

use of org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender in project asterixdb by apache.

the class FramewriterTest method mockAppenders.

public static FrameTupleAppender[] mockAppenders() throws HyracksDataException {
    FrameTupleAppender[] appenders = new FrameTupleAppender[2];
    appenders[0] = Mockito.mock(FrameTupleAppender.class);
    Mockito.doAnswer(new Answer<Object>() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            IFrameWriter writer = (IFrameWriter) args[0];
            writer.nextFrame(EMPTY_BUFFER);
            return null;
        }
    }).when(appenders[0]).write(Matchers.any(IFrameWriter.class), Matchers.anyBoolean());
    appenders[1] = Mockito.mock(FrameTupleAppender.class);
    Mockito.doAnswer(new Answer<Object>() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            throw new HyracksDataException("couldn't flush frame");
        }
    }).when(appenders[1]).write(Matchers.any(IFrameWriter.class), Matchers.anyBoolean());
    return appenders;
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) InvocationOnMock(org.mockito.invocation.InvocationOnMock) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) IFrameTupleAppender(org.apache.hyracks.api.comm.IFrameTupleAppender) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 12 with FrameTupleAppender

use of org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender in project asterixdb by apache.

the class HashSpillableTableFactory method buildSpillableTable.

@Override
public ISpillableTable buildSpillableTable(final IHyracksTaskContext ctx, int suggestTableSize, long inputDataBytesSize, final int[] keyFields, final IBinaryComparator[] comparators, final INormalizedKeyComputer firstKeyNormalizerFactory, IAggregatorDescriptorFactory aggregateFactory, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, final int framesLimit, final int seed) throws HyracksDataException {
    final int tableSize = suggestTableSize;
    // For the output, we need to have at least one frame.
    if (framesLimit < MIN_FRAME_LIMT) {
        throw new HyracksDataException("The given frame limit is too small to partition the data.");
    }
    final int[] intermediateResultKeys = new int[keyFields.length];
    for (int i = 0; i < keyFields.length; i++) {
        intermediateResultKeys[i] = i;
    }
    final FrameTuplePairComparator ftpcInputCompareToAggregate = new FrameTuplePairComparator(keyFields, intermediateResultKeys, comparators);
    final ITuplePartitionComputer tpc = new FieldHashPartitionComputerFamily(keyFields, hashFunctionFamilies).createPartitioner(seed);
    // For calculating hash value for the already aggregated tuples (not incoming tuples)
    // This computer is required to calculate the hash value of a aggregated tuple
    // while doing the garbage collection work on Hash Table.
    final ITuplePartitionComputer tpcIntermediate = new FieldHashPartitionComputerFamily(intermediateResultKeys, hashFunctionFamilies).createPartitioner(seed);
    final IAggregatorDescriptor aggregator = aggregateFactory.createAggregator(ctx, inRecordDescriptor, outRecordDescriptor, keyFields, intermediateResultKeys, null);
    final AggregateState aggregateState = aggregator.createAggregateStates();
    final ArrayTupleBuilder stateTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
    //TODO(jf) research on the optimized partition size
    long memoryBudget = Math.max(MIN_DATA_TABLE_FRAME_LIMT + MIN_HASH_TABLE_FRAME_LIMT, framesLimit - OUTPUT_FRAME_LIMT - MIN_HASH_TABLE_FRAME_LIMT);
    final int numPartitions = getNumOfPartitions(inputDataBytesSize / ctx.getInitialFrameSize(), memoryBudget);
    final int entriesPerPartition = (int) Math.ceil(1.0 * tableSize / numPartitions);
    if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine("created hashtable, table size:" + tableSize + " file size:" + inputDataBytesSize + "  #partitions:" + numPartitions);
    }
    final ArrayTupleBuilder outputTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
    return new ISpillableTable() {

        private final TuplePointer pointer = new TuplePointer();

        private final BitSet spilledSet = new BitSet(numPartitions);

        // This frame pool will be shared by both data table and hash table.
        private final IDeallocatableFramePool framePool = new DeallocatableFramePool(ctx, framesLimit * ctx.getInitialFrameSize());

        // buffer manager for hash table
        private final ISimpleFrameBufferManager bufferManagerForHashTable = new FramePoolBackedFrameBufferManager(framePool);

        private final ISerializableTable hashTableForTuplePointer = new SerializableHashTable(tableSize, ctx, bufferManagerForHashTable);

        // buffer manager for data table
        final IPartitionedTupleBufferManager bufferManager = new VPartitionTupleBufferManager(PreferToSpillFullyOccupiedFramePolicy.createAtMostOneFrameForSpilledPartitionConstrain(spilledSet), numPartitions, framePool);

        final ITuplePointerAccessor bufferAccessor = bufferManager.getTuplePointerAccessor(outRecordDescriptor);

        private final PreferToSpillFullyOccupiedFramePolicy spillPolicy = new PreferToSpillFullyOccupiedFramePolicy(bufferManager, spilledSet);

        private final FrameTupleAppender outputAppender = new FrameTupleAppender(new VSizeFrame(ctx));

        @Override
        public void close() throws HyracksDataException {
            hashTableForTuplePointer.close();
            aggregator.close();
        }

        @Override
        public void clear(int partition) throws HyracksDataException {
            for (int p = getFirstEntryInHashTable(partition); p < getLastEntryInHashTable(partition); p++) {
                hashTableForTuplePointer.delete(p);
            }
            // Checks whether the garbage collection is required and conducts a garbage collection if so.
            if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
                int numberOfFramesReclaimed = hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.fine("Garbage Collection on Hash table is done. Deallocated frames:" + numberOfFramesReclaimed);
                }
            }
            bufferManager.clearPartition(partition);
        }

        private int getPartition(int entryInHashTable) {
            return entryInHashTable / entriesPerPartition;
        }

        private int getFirstEntryInHashTable(int partition) {
            return partition * entriesPerPartition;
        }

        private int getLastEntryInHashTable(int partition) {
            return Math.min(tableSize, (partition + 1) * entriesPerPartition);
        }

        @Override
        public boolean insert(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
            for (int i = 0; i < hashTableForTuplePointer.getTupleCount(entryInHashTable); i++) {
                hashTableForTuplePointer.getTuplePointer(entryInHashTable, i, pointer);
                bufferAccessor.reset(pointer);
                int c = ftpcInputCompareToAggregate.compare(accessor, tIndex, bufferAccessor);
                if (c == 0) {
                    aggregateExistingTuple(accessor, tIndex, bufferAccessor, pointer.getTupleIndex());
                    return true;
                }
            }
            return insertNewAggregateEntry(entryInHashTable, accessor, tIndex);
        }

        /**
             * Inserts a new aggregate entry into the data table and hash table.
             * This insertion must be an atomic operation. We cannot have a partial success or failure.
             * So, if an insertion succeeds on the data table and the same insertion on the hash table fails, then
             * we need to revert the effect of data table insertion.
             */
        private boolean insertNewAggregateEntry(int entryInHashTable, IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            initStateTupleBuilder(accessor, tIndex);
            int pid = getPartition(entryInHashTable);
            // Insertion to the data table
            if (!bufferManager.insertTuple(pid, stateTupleBuilder.getByteArray(), stateTupleBuilder.getFieldEndOffsets(), 0, stateTupleBuilder.getSize(), pointer)) {
                return false;
            }
            // Insertion to the hash table
            if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
                // To preserve the atomicity of this method, we need to undo the effect
                // of the above bufferManager.insertTuple() call since the given insertion has failed.
                bufferManager.cancelInsertTuple(pid);
                return false;
            }
            return true;
        }

        private void initStateTupleBuilder(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            stateTupleBuilder.reset();
            for (int k = 0; k < keyFields.length; k++) {
                stateTupleBuilder.addField(accessor, tIndex, keyFields[k]);
            }
            aggregator.init(stateTupleBuilder, accessor, tIndex, aggregateState);
        }

        private void aggregateExistingTuple(IFrameTupleAccessor accessor, int tIndex, ITuplePointerAccessor bufferAccessor, int tupleIndex) throws HyracksDataException {
            aggregator.aggregate(accessor, tIndex, bufferAccessor, tupleIndex, aggregateState);
        }

        @Override
        public int flushFrames(int partition, IFrameWriter writer, AggregateType type) throws HyracksDataException {
            int count = 0;
            for (int hashEntryPid = getFirstEntryInHashTable(partition); hashEntryPid < getLastEntryInHashTable(partition); hashEntryPid++) {
                count += hashTableForTuplePointer.getTupleCount(hashEntryPid);
                for (int tid = 0; tid < hashTableForTuplePointer.getTupleCount(hashEntryPid); tid++) {
                    hashTableForTuplePointer.getTuplePointer(hashEntryPid, tid, pointer);
                    bufferAccessor.reset(pointer);
                    outputTupleBuilder.reset();
                    for (int k = 0; k < intermediateResultKeys.length; k++) {
                        outputTupleBuilder.addField(bufferAccessor.getBuffer().array(), bufferAccessor.getAbsFieldStartOffset(intermediateResultKeys[k]), bufferAccessor.getFieldLength(intermediateResultKeys[k]));
                    }
                    boolean hasOutput = false;
                    switch(type) {
                        case PARTIAL:
                            hasOutput = aggregator.outputPartialResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
                            break;
                        case FINAL:
                            hasOutput = aggregator.outputFinalResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
                            break;
                    }
                    if (hasOutput && !outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
                        outputAppender.write(writer, true);
                        if (!outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
                            throw new HyracksDataException("The output item is too large to be fit into a frame.");
                        }
                    }
                }
            }
            outputAppender.write(writer, true);
            spilledSet.set(partition);
            return count;
        }

        @Override
        public int getNumPartitions() {
            return bufferManager.getNumPartitions();
        }

        @Override
        public int findVictimPartition(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
            int partition = getPartition(entryInHashTable);
            return spillPolicy.selectVictimPartition(partition);
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) FramePoolBackedFrameBufferManager(org.apache.hyracks.dataflow.std.buffermanager.FramePoolBackedFrameBufferManager) VPartitionTupleBufferManager(org.apache.hyracks.dataflow.std.buffermanager.VPartitionTupleBufferManager) ITuplePointerAccessor(org.apache.hyracks.dataflow.std.buffermanager.ITuplePointerAccessor) TuplePointer(org.apache.hyracks.dataflow.std.structures.TuplePointer) ISimpleFrameBufferManager(org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager) ITuplePartitionComputer(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputer) IDeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.IDeallocatableFramePool) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) SerializableHashTable(org.apache.hyracks.dataflow.std.structures.SerializableHashTable) FrameTuplePairComparator(org.apache.hyracks.dataflow.std.util.FrameTuplePairComparator) BitSet(java.util.BitSet) FieldHashPartitionComputerFamily(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFamily) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) IDeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.IDeallocatableFramePool) DeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.DeallocatableFramePool) PreferToSpillFullyOccupiedFramePolicy(org.apache.hyracks.dataflow.std.buffermanager.PreferToSpillFullyOccupiedFramePolicy) ISerializableTable(org.apache.hyracks.dataflow.std.structures.ISerializableTable) IPartitionedTupleBufferManager(org.apache.hyracks.dataflow.std.buffermanager.IPartitionedTupleBufferManager)

Example 13 with FrameTupleAppender

use of org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender in project asterixdb by apache.

the class ConstantTupleSourceOperatorNodePushable method initialize.

@Override
public void initialize() throws HyracksDataException {
    FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
    if (fieldSlots != null && tupleData != null && tupleSize > 0)
        appender.append(fieldSlots, tupleData, 0, tupleSize);
    writer.open();
    try {
        appender.write(writer, false);
    } catch (Throwable th) {
        writer.fail();
        throw new HyracksDataException(th);
    } finally {
        writer.close();
    }
}
Also used : FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 14 with FrameTupleAppender

use of org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender in project asterixdb by apache.

the class LimitOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {

        private FrameTupleAccessor fta;

        private int currentSize;

        private boolean finished;

        @Override
        public void open() throws HyracksDataException {
            fta = new FrameTupleAccessor(outRecDescs[0]);
            currentSize = 0;
            finished = false;
            writer.open();
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            if (!finished) {
                fta.reset(buffer);
                int count = fta.getTupleCount();
                if ((currentSize + count) > outputLimit) {
                    FrameTupleAppender partialAppender = new FrameTupleAppender(new VSizeFrame(ctx));
                    int copyCount = outputLimit - currentSize;
                    for (int i = 0; i < copyCount; i++) {
                        FrameUtils.appendToWriter(writer, partialAppender, fta, i);
                        currentSize++;
                    }
                    partialAppender.write(writer, false);
                    finished = true;
                } else {
                    FrameUtils.flushFrame(buffer, writer);
                    currentSize += count;
                }
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            writer.fail();
        }

        @Override
        public void close() throws HyracksDataException {
            writer.close();
        }

        @Override
        public void flush() throws HyracksDataException {
            writer.flush();
        }
    };
}
Also used : FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) AbstractUnaryInputUnaryOutputOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable) ByteBuffer(java.nio.ByteBuffer) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) FrameTupleAccessor(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)

Example 15 with FrameTupleAppender

use of org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender in project asterixdb by apache.

the class DelimitedDataTupleParserFactory method createTupleParser.

@Override
public ITupleParser createTupleParser(final IHyracksTaskContext ctx) {
    return new ITupleParser() {

        @Override
        public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
            try {
                IValueParser[] valueParsers = new IValueParser[valueParserFactories.length];
                for (int i = 0; i < valueParserFactories.length; ++i) {
                    valueParsers[i] = valueParserFactories[i].createValueParser();
                }
                IFrame frame = new VSizeFrame(ctx);
                FrameTupleAppender appender = new FrameTupleAppender();
                appender.reset(frame, true);
                ArrayTupleBuilder tb = new ArrayTupleBuilder(valueParsers.length);
                DataOutput dos = tb.getDataOutput();
                FieldCursorForDelimitedDataParser cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote);
                while (cursor.nextRecord()) {
                    tb.reset();
                    for (int i = 0; i < valueParsers.length; ++i) {
                        if (!cursor.nextField()) {
                            break;
                        }
                        // Eliminate double quotes in the field that we are going to parse
                        if (cursor.isDoubleQuoteIncludedInThisField) {
                            cursor.eliminateDoubleQuote(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart);
                            cursor.fEnd -= cursor.doubleQuoteCount;
                            cursor.isDoubleQuoteIncludedInThisField = false;
                        }
                        valueParsers[i].parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos);
                        tb.addFieldEndOffset();
                    }
                    FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
                }
                appender.write(writer, true);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) DataOutput(java.io.DataOutput) InputStreamReader(java.io.InputStreamReader) IFrame(org.apache.hyracks.api.comm.IFrame) InputStream(java.io.InputStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IOException(java.io.IOException) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) IValueParser(org.apache.hyracks.dataflow.common.data.parsers.IValueParser)

Aggregations

FrameTupleAppender (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender)42 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)32 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)17 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)15 FrameTupleAccessor (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)14 DataOutput (java.io.DataOutput)10 IFrame (org.apache.hyracks.api.comm.IFrame)8 ByteBuffer (java.nio.ByteBuffer)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 FixedSizeFrame (org.apache.hyracks.api.comm.FixedSizeFrame)6 IFrameTupleAppender (org.apache.hyracks.api.comm.IFrameTupleAppender)6 IFrameWriter (org.apache.hyracks.api.comm.IFrameWriter)6 FrameTupleReference (org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference)6 IFrameTupleAccessor (org.apache.hyracks.api.comm.IFrameTupleAccessor)5 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)5 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)4