Search in sources :

Example 16 with VSizeFrame

use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.

the class RunMergingFrameReaderTest method testRunFileReader.

@Test
public void testRunFileReader() throws HyracksDataException {
    int pageSize = 128;
    int numRuns = 4;
    int numFramesPerRun = 4;
    int minRecordSize = pageSize / 10;
    int maxRecordSize = pageSize / 2;
    IHyracksTaskContext ctx = testUtils.create(pageSize);
    ExternalSortRunGenerator runGenerator = new ExternalSortRunGenerator(ctx, SortFields, null, ComparatorFactories, RecordDesc, Algorithm.MERGE_SORT, numFramesPerRun);
    runGenerator.open();
    Map<Integer, String> keyValuePair = new HashMap<>();
    List<IFrame> frameList = new ArrayList<>();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    numFramesPerRun = 2;
    minRecordSize = pageSize;
    maxRecordSize = pageSize;
    frameList.clear();
    prepareData(ctx, frameList, pageSize * numFramesPerRun * numRuns, minRecordSize, maxRecordSize, null, keyValuePair);
    for (IFrame frame : frameList) {
        runGenerator.nextFrame(frame.getBuffer());
    }
    runGenerator.close();
    List<IFrame> inFrame = new ArrayList<>(runGenerator.getRuns().size());
    for (GeneratedRunFileReader max : runGenerator.getRuns()) {
        inFrame.add(new GroupVSizeFrame(ctx, max.getMaxFrameSize()));
    }
    // Let each run file reader not delete the run file when it is read and closed.
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        PA.setValue(run, "deleteAfterClose", false);
    }
    matchResult(ctx, runGenerator.getRuns(), keyValuePair);
    List<IFrameReader> runs = new ArrayList<>();
    for (GeneratedRunFileReader run : runGenerator.getRuns()) {
        runs.add(run);
    }
    RunMergingFrameReader reader = new RunMergingFrameReader(ctx, runs, inFrame, SortFields, Comparators, null, RecordDesc);
    IFrame outFrame = new VSizeFrame(ctx);
    reader.open();
    while (reader.nextFrame(outFrame)) {
        assertFrameIsSorted(outFrame, Arrays.asList(keyValuePair));
    }
    reader.close();
    assertAllKeyValueIsConsumed(Arrays.asList(keyValuePair));
}
Also used : HashMap(java.util.HashMap) IFrame(org.apache.hyracks.api.comm.IFrame) ArrayList(java.util.ArrayList) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) GroupVSizeFrame(org.apache.hyracks.dataflow.std.sort.util.GroupVSizeFrame) RunMergingFrameReader(org.apache.hyracks.dataflow.std.sort.RunMergingFrameReader) IFrameReader(org.apache.hyracks.api.comm.IFrameReader) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) ExternalSortRunGenerator(org.apache.hyracks.dataflow.std.sort.ExternalSortRunGenerator) GeneratedRunFileReader(org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader) Test(org.junit.Test)

Example 17 with VSizeFrame

use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.

the class HashSpillableTableFactory method buildSpillableTable.

@Override
public ISpillableTable buildSpillableTable(final IHyracksTaskContext ctx, int suggestTableSize, long inputDataBytesSize, final int[] keyFields, final IBinaryComparator[] comparators, final INormalizedKeyComputer firstKeyNormalizerFactory, IAggregatorDescriptorFactory aggregateFactory, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, final int framesLimit, final int seed) throws HyracksDataException {
    final int tableSize = suggestTableSize;
    // For the output, we need to have at least one frame.
    if (framesLimit < MIN_FRAME_LIMT) {
        throw new HyracksDataException("The given frame limit is too small to partition the data.");
    }
    final int[] intermediateResultKeys = new int[keyFields.length];
    for (int i = 0; i < keyFields.length; i++) {
        intermediateResultKeys[i] = i;
    }
    final FrameTuplePairComparator ftpcInputCompareToAggregate = new FrameTuplePairComparator(keyFields, intermediateResultKeys, comparators);
    final ITuplePartitionComputer tpc = new FieldHashPartitionComputerFamily(keyFields, hashFunctionFamilies).createPartitioner(seed);
    // For calculating hash value for the already aggregated tuples (not incoming tuples)
    // This computer is required to calculate the hash value of a aggregated tuple
    // while doing the garbage collection work on Hash Table.
    final ITuplePartitionComputer tpcIntermediate = new FieldHashPartitionComputerFamily(intermediateResultKeys, hashFunctionFamilies).createPartitioner(seed);
    final IAggregatorDescriptor aggregator = aggregateFactory.createAggregator(ctx, inRecordDescriptor, outRecordDescriptor, keyFields, intermediateResultKeys, null);
    final AggregateState aggregateState = aggregator.createAggregateStates();
    final ArrayTupleBuilder stateTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
    //TODO(jf) research on the optimized partition size
    long memoryBudget = Math.max(MIN_DATA_TABLE_FRAME_LIMT + MIN_HASH_TABLE_FRAME_LIMT, framesLimit - OUTPUT_FRAME_LIMT - MIN_HASH_TABLE_FRAME_LIMT);
    final int numPartitions = getNumOfPartitions(inputDataBytesSize / ctx.getInitialFrameSize(), memoryBudget);
    final int entriesPerPartition = (int) Math.ceil(1.0 * tableSize / numPartitions);
    if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine("created hashtable, table size:" + tableSize + " file size:" + inputDataBytesSize + "  #partitions:" + numPartitions);
    }
    final ArrayTupleBuilder outputTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
    return new ISpillableTable() {

        private final TuplePointer pointer = new TuplePointer();

        private final BitSet spilledSet = new BitSet(numPartitions);

        // This frame pool will be shared by both data table and hash table.
        private final IDeallocatableFramePool framePool = new DeallocatableFramePool(ctx, framesLimit * ctx.getInitialFrameSize());

        // buffer manager for hash table
        private final ISimpleFrameBufferManager bufferManagerForHashTable = new FramePoolBackedFrameBufferManager(framePool);

        private final ISerializableTable hashTableForTuplePointer = new SerializableHashTable(tableSize, ctx, bufferManagerForHashTable);

        // buffer manager for data table
        final IPartitionedTupleBufferManager bufferManager = new VPartitionTupleBufferManager(PreferToSpillFullyOccupiedFramePolicy.createAtMostOneFrameForSpilledPartitionConstrain(spilledSet), numPartitions, framePool);

        final ITuplePointerAccessor bufferAccessor = bufferManager.getTuplePointerAccessor(outRecordDescriptor);

        private final PreferToSpillFullyOccupiedFramePolicy spillPolicy = new PreferToSpillFullyOccupiedFramePolicy(bufferManager, spilledSet);

        private final FrameTupleAppender outputAppender = new FrameTupleAppender(new VSizeFrame(ctx));

        @Override
        public void close() throws HyracksDataException {
            hashTableForTuplePointer.close();
            aggregator.close();
        }

        @Override
        public void clear(int partition) throws HyracksDataException {
            for (int p = getFirstEntryInHashTable(partition); p < getLastEntryInHashTable(partition); p++) {
                hashTableForTuplePointer.delete(p);
            }
            // Checks whether the garbage collection is required and conducts a garbage collection if so.
            if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
                int numberOfFramesReclaimed = hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.fine("Garbage Collection on Hash table is done. Deallocated frames:" + numberOfFramesReclaimed);
                }
            }
            bufferManager.clearPartition(partition);
        }

        private int getPartition(int entryInHashTable) {
            return entryInHashTable / entriesPerPartition;
        }

        private int getFirstEntryInHashTable(int partition) {
            return partition * entriesPerPartition;
        }

        private int getLastEntryInHashTable(int partition) {
            return Math.min(tableSize, (partition + 1) * entriesPerPartition);
        }

        @Override
        public boolean insert(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
            for (int i = 0; i < hashTableForTuplePointer.getTupleCount(entryInHashTable); i++) {
                hashTableForTuplePointer.getTuplePointer(entryInHashTable, i, pointer);
                bufferAccessor.reset(pointer);
                int c = ftpcInputCompareToAggregate.compare(accessor, tIndex, bufferAccessor);
                if (c == 0) {
                    aggregateExistingTuple(accessor, tIndex, bufferAccessor, pointer.getTupleIndex());
                    return true;
                }
            }
            return insertNewAggregateEntry(entryInHashTable, accessor, tIndex);
        }

        /**
             * Inserts a new aggregate entry into the data table and hash table.
             * This insertion must be an atomic operation. We cannot have a partial success or failure.
             * So, if an insertion succeeds on the data table and the same insertion on the hash table fails, then
             * we need to revert the effect of data table insertion.
             */
        private boolean insertNewAggregateEntry(int entryInHashTable, IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            initStateTupleBuilder(accessor, tIndex);
            int pid = getPartition(entryInHashTable);
            // Insertion to the data table
            if (!bufferManager.insertTuple(pid, stateTupleBuilder.getByteArray(), stateTupleBuilder.getFieldEndOffsets(), 0, stateTupleBuilder.getSize(), pointer)) {
                return false;
            }
            // Insertion to the hash table
            if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
                // To preserve the atomicity of this method, we need to undo the effect
                // of the above bufferManager.insertTuple() call since the given insertion has failed.
                bufferManager.cancelInsertTuple(pid);
                return false;
            }
            return true;
        }

        private void initStateTupleBuilder(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            stateTupleBuilder.reset();
            for (int k = 0; k < keyFields.length; k++) {
                stateTupleBuilder.addField(accessor, tIndex, keyFields[k]);
            }
            aggregator.init(stateTupleBuilder, accessor, tIndex, aggregateState);
        }

        private void aggregateExistingTuple(IFrameTupleAccessor accessor, int tIndex, ITuplePointerAccessor bufferAccessor, int tupleIndex) throws HyracksDataException {
            aggregator.aggregate(accessor, tIndex, bufferAccessor, tupleIndex, aggregateState);
        }

        @Override
        public int flushFrames(int partition, IFrameWriter writer, AggregateType type) throws HyracksDataException {
            int count = 0;
            for (int hashEntryPid = getFirstEntryInHashTable(partition); hashEntryPid < getLastEntryInHashTable(partition); hashEntryPid++) {
                count += hashTableForTuplePointer.getTupleCount(hashEntryPid);
                for (int tid = 0; tid < hashTableForTuplePointer.getTupleCount(hashEntryPid); tid++) {
                    hashTableForTuplePointer.getTuplePointer(hashEntryPid, tid, pointer);
                    bufferAccessor.reset(pointer);
                    outputTupleBuilder.reset();
                    for (int k = 0; k < intermediateResultKeys.length; k++) {
                        outputTupleBuilder.addField(bufferAccessor.getBuffer().array(), bufferAccessor.getAbsFieldStartOffset(intermediateResultKeys[k]), bufferAccessor.getFieldLength(intermediateResultKeys[k]));
                    }
                    boolean hasOutput = false;
                    switch(type) {
                        case PARTIAL:
                            hasOutput = aggregator.outputPartialResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
                            break;
                        case FINAL:
                            hasOutput = aggregator.outputFinalResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
                            break;
                    }
                    if (hasOutput && !outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
                        outputAppender.write(writer, true);
                        if (!outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
                            throw new HyracksDataException("The output item is too large to be fit into a frame.");
                        }
                    }
                }
            }
            outputAppender.write(writer, true);
            spilledSet.set(partition);
            return count;
        }

        @Override
        public int getNumPartitions() {
            return bufferManager.getNumPartitions();
        }

        @Override
        public int findVictimPartition(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
            int partition = getPartition(entryInHashTable);
            return spillPolicy.selectVictimPartition(partition);
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) FramePoolBackedFrameBufferManager(org.apache.hyracks.dataflow.std.buffermanager.FramePoolBackedFrameBufferManager) VPartitionTupleBufferManager(org.apache.hyracks.dataflow.std.buffermanager.VPartitionTupleBufferManager) ITuplePointerAccessor(org.apache.hyracks.dataflow.std.buffermanager.ITuplePointerAccessor) TuplePointer(org.apache.hyracks.dataflow.std.structures.TuplePointer) ISimpleFrameBufferManager(org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager) ITuplePartitionComputer(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputer) IDeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.IDeallocatableFramePool) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) SerializableHashTable(org.apache.hyracks.dataflow.std.structures.SerializableHashTable) FrameTuplePairComparator(org.apache.hyracks.dataflow.std.util.FrameTuplePairComparator) BitSet(java.util.BitSet) FieldHashPartitionComputerFamily(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFamily) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) IDeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.IDeallocatableFramePool) DeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.DeallocatableFramePool) PreferToSpillFullyOccupiedFramePolicy(org.apache.hyracks.dataflow.std.buffermanager.PreferToSpillFullyOccupiedFramePolicy) ISerializableTable(org.apache.hyracks.dataflow.std.structures.ISerializableTable) IPartitionedTupleBufferManager(org.apache.hyracks.dataflow.std.buffermanager.IPartitionedTupleBufferManager)

Example 18 with VSizeFrame

use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.

the class ResultWriterOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
    final IDatasetPartitionManager dpm = ctx.getDatasetPartitionManager();
    final IFrame frame = new VSizeFrame(ctx);
    final FrameOutputStream frameOutputStream = new FrameOutputStream(ctx.getInitialFrameSize());
    frameOutputStream.reset(frame, true);
    PrintStream printStream = new PrintStream(frameOutputStream);
    final RecordDescriptor outRecordDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
    final IResultSerializer resultSerializer = resultSerializerFactory.createResultSerializer(outRecordDesc, printStream);
    final FrameTupleAccessor frameTupleAccessor = new FrameTupleAccessor(outRecordDesc);
    return new AbstractUnaryInputSinkOperatorNodePushable() {

        private IFrameWriter datasetPartitionWriter;

        private boolean failed = false;

        @Override
        public void open() throws HyracksDataException {
            try {
                datasetPartitionWriter = dpm.createDatasetPartitionWriter(ctx, rsId, ordered, asyncMode, partition, nPartitions);
                datasetPartitionWriter.open();
                resultSerializer.init();
            } catch (HyracksException e) {
                throw HyracksDataException.create(e);
            }
        }

        @Override
        public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
            frameTupleAccessor.reset(buffer);
            for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
                resultSerializer.appendTuple(frameTupleAccessor, tIndex);
                if (!frameOutputStream.appendTuple()) {
                    frameOutputStream.flush(datasetPartitionWriter);
                    resultSerializer.appendTuple(frameTupleAccessor, tIndex);
                    frameOutputStream.appendTuple();
                }
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            failed = true;
            datasetPartitionWriter.fail();
        }

        @Override
        public void close() throws HyracksDataException {
            try {
                if (!failed && frameOutputStream.getTupleCount() > 0) {
                    frameOutputStream.flush(datasetPartitionWriter);
                }
            } catch (Exception e) {
                datasetPartitionWriter.fail();
                throw e;
            } finally {
                datasetPartitionWriter.close();
            }
        }

        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("{ ");
            sb.append("\"rsId\": \"").append(rsId).append("\", ");
            sb.append("\"ordered\": ").append(ordered).append(", ");
            sb.append("\"asyncMode\": ").append(asyncMode).append(" }");
            return sb.toString();
        }
    };
}
Also used : PrintStream(java.io.PrintStream) IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) IFrame(org.apache.hyracks.api.comm.IFrame) IResultSerializer(org.apache.hyracks.api.dataflow.value.IResultSerializer) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ByteBuffer(java.nio.ByteBuffer) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IOException(java.io.IOException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) IDatasetPartitionManager(org.apache.hyracks.api.dataset.IDatasetPartitionManager) AbstractUnaryInputSinkOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable) FrameOutputStream(org.apache.hyracks.dataflow.common.comm.io.FrameOutputStream) FrameTupleAccessor(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)

Example 19 with VSizeFrame

use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.

the class OptimizedHybridHashJoin method loadSpilledPartitionToMem.

private boolean loadSpilledPartitionToMem(int pid, RunFileWriter wr) throws HyracksDataException {
    RunFileReader r = wr.createReader();
    try {
        r.open();
        if (reloadBuffer == null) {
            reloadBuffer = new VSizeFrame(ctx);
        }
        while (r.nextFrame(reloadBuffer)) {
            accessorBuild.reset(reloadBuffer.getBuffer());
            for (int tid = 0; tid < accessorBuild.getTupleCount(); tid++) {
                if (bufferManager.insertTuple(pid, accessorBuild, tid, tempPtr)) {
                    continue;
                }
                // for some reason (e.g. due to fragmentation) if the inserting failed,
                // we need to clear the occupied frames
                bufferManager.clearPartition(pid);
                return false;
            }
        }
        // Closes and deletes the run file if it is already loaded into memory.
        r.setDeleteAfterClose(true);
    } finally {
        r.close();
    }
    spilledStatus.set(pid, false);
    buildRFWriters[pid] = null;
    return true;
}
Also used : RunFileReader(org.apache.hyracks.dataflow.common.io.RunFileReader) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame)

Example 20 with VSizeFrame

use of org.apache.hyracks.api.comm.VSizeFrame in project asterixdb by apache.

the class ConstantTupleSourceOperatorNodePushable method initialize.

@Override
public void initialize() throws HyracksDataException {
    FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
    if (fieldSlots != null && tupleData != null && tupleSize > 0)
        appender.append(fieldSlots, tupleData, 0, tupleSize);
    writer.open();
    try {
        appender.write(writer, false);
    } catch (Throwable th) {
        writer.fail();
        throw new HyracksDataException(th);
    } finally {
        writer.close();
    }
}
Also used : FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Aggregations

VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)63 FrameTupleAppender (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender)32 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)22 FrameTupleAccessor (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor)18 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)17 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)16 Test (org.junit.Test)16 IFrame (org.apache.hyracks.api.comm.IFrame)13 ArrayList (java.util.ArrayList)11 DataOutput (java.io.DataOutput)10 IFrameTupleAccessor (org.apache.hyracks.api.comm.IFrameTupleAccessor)9 FrameTupleReference (org.apache.hyracks.dataflow.common.data.accessors.FrameTupleReference)9 IOException (java.io.IOException)8 IFrameWriter (org.apache.hyracks.api.comm.IFrameWriter)8 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)8 ConcurrentFramePool (org.apache.asterix.common.memory.ConcurrentFramePool)7 FeedRuntimeInputHandler (org.apache.asterix.external.feed.dataflow.FeedRuntimeInputHandler)7 FeedPolicyAccessor (org.apache.asterix.external.feed.policy.FeedPolicyAccessor)7 TestFrameWriter (org.apache.hyracks.api.test.TestFrameWriter)7 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)6