Search in sources :

Example 41 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class HashSpillableTableFactory method buildSpillableTable.

@Override
public ISpillableTable buildSpillableTable(final IHyracksTaskContext ctx, int suggestTableSize, long inputDataBytesSize, final int[] keyFields, final IBinaryComparator[] comparators, final INormalizedKeyComputer firstKeyNormalizerFactory, IAggregatorDescriptorFactory aggregateFactory, RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor, final int framesLimit, final int seed) throws HyracksDataException {
    final int tableSize = suggestTableSize;
    // For the output, we need to have at least one frame.
    if (framesLimit < MIN_FRAME_LIMT) {
        throw new HyracksDataException("The given frame limit is too small to partition the data.");
    }
    final int[] intermediateResultKeys = new int[keyFields.length];
    for (int i = 0; i < keyFields.length; i++) {
        intermediateResultKeys[i] = i;
    }
    final FrameTuplePairComparator ftpcInputCompareToAggregate = new FrameTuplePairComparator(keyFields, intermediateResultKeys, comparators);
    final ITuplePartitionComputer tpc = new FieldHashPartitionComputerFamily(keyFields, hashFunctionFamilies).createPartitioner(seed);
    // For calculating hash value for the already aggregated tuples (not incoming tuples)
    // This computer is required to calculate the hash value of a aggregated tuple
    // while doing the garbage collection work on Hash Table.
    final ITuplePartitionComputer tpcIntermediate = new FieldHashPartitionComputerFamily(intermediateResultKeys, hashFunctionFamilies).createPartitioner(seed);
    final IAggregatorDescriptor aggregator = aggregateFactory.createAggregator(ctx, inRecordDescriptor, outRecordDescriptor, keyFields, intermediateResultKeys, null);
    final AggregateState aggregateState = aggregator.createAggregateStates();
    final ArrayTupleBuilder stateTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
    //TODO(jf) research on the optimized partition size
    long memoryBudget = Math.max(MIN_DATA_TABLE_FRAME_LIMT + MIN_HASH_TABLE_FRAME_LIMT, framesLimit - OUTPUT_FRAME_LIMT - MIN_HASH_TABLE_FRAME_LIMT);
    final int numPartitions = getNumOfPartitions(inputDataBytesSize / ctx.getInitialFrameSize(), memoryBudget);
    final int entriesPerPartition = (int) Math.ceil(1.0 * tableSize / numPartitions);
    if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine("created hashtable, table size:" + tableSize + " file size:" + inputDataBytesSize + "  #partitions:" + numPartitions);
    }
    final ArrayTupleBuilder outputTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
    return new ISpillableTable() {

        private final TuplePointer pointer = new TuplePointer();

        private final BitSet spilledSet = new BitSet(numPartitions);

        // This frame pool will be shared by both data table and hash table.
        private final IDeallocatableFramePool framePool = new DeallocatableFramePool(ctx, framesLimit * ctx.getInitialFrameSize());

        // buffer manager for hash table
        private final ISimpleFrameBufferManager bufferManagerForHashTable = new FramePoolBackedFrameBufferManager(framePool);

        private final ISerializableTable hashTableForTuplePointer = new SerializableHashTable(tableSize, ctx, bufferManagerForHashTable);

        // buffer manager for data table
        final IPartitionedTupleBufferManager bufferManager = new VPartitionTupleBufferManager(PreferToSpillFullyOccupiedFramePolicy.createAtMostOneFrameForSpilledPartitionConstrain(spilledSet), numPartitions, framePool);

        final ITuplePointerAccessor bufferAccessor = bufferManager.getTuplePointerAccessor(outRecordDescriptor);

        private final PreferToSpillFullyOccupiedFramePolicy spillPolicy = new PreferToSpillFullyOccupiedFramePolicy(bufferManager, spilledSet);

        private final FrameTupleAppender outputAppender = new FrameTupleAppender(new VSizeFrame(ctx));

        @Override
        public void close() throws HyracksDataException {
            hashTableForTuplePointer.close();
            aggregator.close();
        }

        @Override
        public void clear(int partition) throws HyracksDataException {
            for (int p = getFirstEntryInHashTable(partition); p < getLastEntryInHashTable(partition); p++) {
                hashTableForTuplePointer.delete(p);
            }
            // Checks whether the garbage collection is required and conducts a garbage collection if so.
            if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
                int numberOfFramesReclaimed = hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.fine("Garbage Collection on Hash table is done. Deallocated frames:" + numberOfFramesReclaimed);
                }
            }
            bufferManager.clearPartition(partition);
        }

        private int getPartition(int entryInHashTable) {
            return entryInHashTable / entriesPerPartition;
        }

        private int getFirstEntryInHashTable(int partition) {
            return partition * entriesPerPartition;
        }

        private int getLastEntryInHashTable(int partition) {
            return Math.min(tableSize, (partition + 1) * entriesPerPartition);
        }

        @Override
        public boolean insert(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
            for (int i = 0; i < hashTableForTuplePointer.getTupleCount(entryInHashTable); i++) {
                hashTableForTuplePointer.getTuplePointer(entryInHashTable, i, pointer);
                bufferAccessor.reset(pointer);
                int c = ftpcInputCompareToAggregate.compare(accessor, tIndex, bufferAccessor);
                if (c == 0) {
                    aggregateExistingTuple(accessor, tIndex, bufferAccessor, pointer.getTupleIndex());
                    return true;
                }
            }
            return insertNewAggregateEntry(entryInHashTable, accessor, tIndex);
        }

        /**
             * Inserts a new aggregate entry into the data table and hash table.
             * This insertion must be an atomic operation. We cannot have a partial success or failure.
             * So, if an insertion succeeds on the data table and the same insertion on the hash table fails, then
             * we need to revert the effect of data table insertion.
             */
        private boolean insertNewAggregateEntry(int entryInHashTable, IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            initStateTupleBuilder(accessor, tIndex);
            int pid = getPartition(entryInHashTable);
            // Insertion to the data table
            if (!bufferManager.insertTuple(pid, stateTupleBuilder.getByteArray(), stateTupleBuilder.getFieldEndOffsets(), 0, stateTupleBuilder.getSize(), pointer)) {
                return false;
            }
            // Insertion to the hash table
            if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
                // To preserve the atomicity of this method, we need to undo the effect
                // of the above bufferManager.insertTuple() call since the given insertion has failed.
                bufferManager.cancelInsertTuple(pid);
                return false;
            }
            return true;
        }

        private void initStateTupleBuilder(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            stateTupleBuilder.reset();
            for (int k = 0; k < keyFields.length; k++) {
                stateTupleBuilder.addField(accessor, tIndex, keyFields[k]);
            }
            aggregator.init(stateTupleBuilder, accessor, tIndex, aggregateState);
        }

        private void aggregateExistingTuple(IFrameTupleAccessor accessor, int tIndex, ITuplePointerAccessor bufferAccessor, int tupleIndex) throws HyracksDataException {
            aggregator.aggregate(accessor, tIndex, bufferAccessor, tupleIndex, aggregateState);
        }

        @Override
        public int flushFrames(int partition, IFrameWriter writer, AggregateType type) throws HyracksDataException {
            int count = 0;
            for (int hashEntryPid = getFirstEntryInHashTable(partition); hashEntryPid < getLastEntryInHashTable(partition); hashEntryPid++) {
                count += hashTableForTuplePointer.getTupleCount(hashEntryPid);
                for (int tid = 0; tid < hashTableForTuplePointer.getTupleCount(hashEntryPid); tid++) {
                    hashTableForTuplePointer.getTuplePointer(hashEntryPid, tid, pointer);
                    bufferAccessor.reset(pointer);
                    outputTupleBuilder.reset();
                    for (int k = 0; k < intermediateResultKeys.length; k++) {
                        outputTupleBuilder.addField(bufferAccessor.getBuffer().array(), bufferAccessor.getAbsFieldStartOffset(intermediateResultKeys[k]), bufferAccessor.getFieldLength(intermediateResultKeys[k]));
                    }
                    boolean hasOutput = false;
                    switch(type) {
                        case PARTIAL:
                            hasOutput = aggregator.outputPartialResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
                            break;
                        case FINAL:
                            hasOutput = aggregator.outputFinalResult(outputTupleBuilder, bufferAccessor, pointer.getTupleIndex(), aggregateState);
                            break;
                    }
                    if (hasOutput && !outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
                        outputAppender.write(writer, true);
                        if (!outputAppender.appendSkipEmptyField(outputTupleBuilder.getFieldEndOffsets(), outputTupleBuilder.getByteArray(), 0, outputTupleBuilder.getSize())) {
                            throw new HyracksDataException("The output item is too large to be fit into a frame.");
                        }
                    }
                }
            }
            outputAppender.write(writer, true);
            spilledSet.set(partition);
            return count;
        }

        @Override
        public int getNumPartitions() {
            return bufferManager.getNumPartitions();
        }

        @Override
        public int findVictimPartition(IFrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
            int entryInHashTable = tpc.partition(accessor, tIndex, tableSize);
            int partition = getPartition(entryInHashTable);
            return spillPolicy.selectVictimPartition(partition);
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) FramePoolBackedFrameBufferManager(org.apache.hyracks.dataflow.std.buffermanager.FramePoolBackedFrameBufferManager) VPartitionTupleBufferManager(org.apache.hyracks.dataflow.std.buffermanager.VPartitionTupleBufferManager) ITuplePointerAccessor(org.apache.hyracks.dataflow.std.buffermanager.ITuplePointerAccessor) TuplePointer(org.apache.hyracks.dataflow.std.structures.TuplePointer) ISimpleFrameBufferManager(org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager) ITuplePartitionComputer(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputer) IDeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.IDeallocatableFramePool) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) SerializableHashTable(org.apache.hyracks.dataflow.std.structures.SerializableHashTable) FrameTuplePairComparator(org.apache.hyracks.dataflow.std.util.FrameTuplePairComparator) BitSet(java.util.BitSet) FieldHashPartitionComputerFamily(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFamily) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) IDeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.IDeallocatableFramePool) DeallocatableFramePool(org.apache.hyracks.dataflow.std.buffermanager.DeallocatableFramePool) PreferToSpillFullyOccupiedFramePolicy(org.apache.hyracks.dataflow.std.buffermanager.PreferToSpillFullyOccupiedFramePolicy) ISerializableTable(org.apache.hyracks.dataflow.std.structures.ISerializableTable) IPartitionedTupleBufferManager(org.apache.hyracks.dataflow.std.buffermanager.IPartitionedTupleBufferManager)

Example 42 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class DelimitedDataTupleParserFactory method createTupleParser.

@Override
public ITupleParser createTupleParser(final IHyracksTaskContext ctx) {
    return new ITupleParser() {

        @Override
        public void parse(InputStream in, IFrameWriter writer) throws HyracksDataException {
            try {
                IValueParser[] valueParsers = new IValueParser[valueParserFactories.length];
                for (int i = 0; i < valueParserFactories.length; ++i) {
                    valueParsers[i] = valueParserFactories[i].createValueParser();
                }
                IFrame frame = new VSizeFrame(ctx);
                FrameTupleAppender appender = new FrameTupleAppender();
                appender.reset(frame, true);
                ArrayTupleBuilder tb = new ArrayTupleBuilder(valueParsers.length);
                DataOutput dos = tb.getDataOutput();
                FieldCursorForDelimitedDataParser cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote);
                while (cursor.nextRecord()) {
                    tb.reset();
                    for (int i = 0; i < valueParsers.length; ++i) {
                        if (!cursor.nextField()) {
                            break;
                        }
                        // Eliminate double quotes in the field that we are going to parse
                        if (cursor.isDoubleQuoteIncludedInThisField) {
                            cursor.eliminateDoubleQuote(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart);
                            cursor.fEnd -= cursor.doubleQuoteCount;
                            cursor.isDoubleQuoteIncludedInThisField = false;
                        }
                        valueParsers[i].parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos);
                        tb.addFieldEndOffset();
                    }
                    FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
                }
                appender.write(writer, true);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) DataOutput(java.io.DataOutput) InputStreamReader(java.io.InputStreamReader) IFrame(org.apache.hyracks.api.comm.IFrame) InputStream(java.io.InputStream) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IOException(java.io.IOException) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) IValueParser(org.apache.hyracks.dataflow.common.data.parsers.IValueParser)

Example 43 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class TreeIndexDiskOrderScanOperatorNodePushable method initialize.

@Override
public void initialize() throws HyracksDataException {
    treeIndexHelper.open();
    ITreeIndex treeIndex = (ITreeIndex) treeIndexHelper.getIndexInstance();
    try {
        ITreeIndexFrame cursorFrame = treeIndex.getLeafFrameFactory().createFrame();
        ITreeIndexCursor cursor = new TreeIndexDiskOrderScanCursor(cursorFrame);
        LocalResource resource = treeIndexHelper.getResource();
        ISearchOperationCallback searchCallback = searchCallbackFactory.createSearchOperationCallback(resource.getId(), ctx, null);
        ITreeIndexAccessor indexAccessor = (ITreeIndexAccessor) treeIndex.createAccessor(NoOpOperationCallback.INSTANCE, searchCallback);
        try {
            writer.open();
            indexAccessor.diskOrderScan(cursor);
            int fieldCount = treeIndex.getFieldCount();
            FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
            ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
            DataOutput dos = tb.getDataOutput();
            while (cursor.hasNext()) {
                tb.reset();
                cursor.next();
                ITupleReference frameTuple = cursor.getTuple();
                for (int i = 0; i < frameTuple.getFieldCount(); i++) {
                    dos.write(frameTuple.getFieldData(i), frameTuple.getFieldStart(i), frameTuple.getFieldLength(i));
                    tb.addFieldEndOffset();
                }
                FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
            }
            appender.write(writer, true);
        } catch (Throwable th) {
            writer.fail();
            throw new HyracksDataException(th);
        } finally {
            try {
                cursor.close();
            } catch (Exception cursorCloseException) {
                throw new IllegalStateException(cursorCloseException);
            } finally {
                writer.close();
            }
        }
    } catch (Throwable th) {
        treeIndexHelper.close();
        throw new HyracksDataException(th);
    }
}
Also used : ITreeIndexCursor(org.apache.hyracks.storage.am.common.api.ITreeIndexCursor) DataOutput(java.io.DataOutput) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISearchOperationCallback(org.apache.hyracks.storage.common.ISearchOperationCallback) ITreeIndexAccessor(org.apache.hyracks.storage.am.common.api.ITreeIndexAccessor) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) LocalResource(org.apache.hyracks.storage.common.LocalResource) TreeIndexDiskOrderScanCursor(org.apache.hyracks.storage.am.common.impls.TreeIndexDiskOrderScanCursor) ITreeIndexFrame(org.apache.hyracks.storage.am.common.api.ITreeIndexFrame) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex)

Example 44 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class TreeIndexStatsOperatorNodePushable method initialize.

@Override
public void initialize() throws HyracksDataException {
    treeIndexHelper.open();
    ITreeIndex treeIndex = (ITreeIndex) treeIndexHelper.getIndexInstance();
    try {
        writer.open();
        IBufferCache bufferCache = storageManager.getBufferCache(ctx.getJobletContext().getServiceContext());
        IFileMapProvider fileMapProvider = storageManager.getFileMapProvider(ctx.getJobletContext().getServiceContext());
        LocalResource resource = treeIndexHelper.getResource();
        IIOManager ioManager = ctx.getIoManager();
        FileReference fileRef = ioManager.resolve(resource.getPath());
        int indexFileId = fileMapProvider.lookupFileId(fileRef);
        TreeIndexStatsGatherer statsGatherer = new TreeIndexStatsGatherer(bufferCache, treeIndex.getPageManager(), indexFileId, treeIndex.getRootPageId());
        TreeIndexStats stats = statsGatherer.gatherStats(treeIndex.getLeafFrameFactory().createFrame(), treeIndex.getInteriorFrameFactory().createFrame(), treeIndex.getPageManager().createMetadataFrame());
        // Write the stats output as a single string field.
        FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
        ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
        DataOutput dos = tb.getDataOutput();
        tb.reset();
        utf8SerDer.serialize(stats.toString(), dos);
        tb.addFieldEndOffset();
        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
            throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + appender.getBuffer().capacity() + ")");
        }
        appender.write(writer, false);
    } catch (Exception e) {
        writer.fail();
        throw new HyracksDataException(e);
    } finally {
        try {
            writer.close();
        } finally {
            treeIndexHelper.close();
        }
    }
}
Also used : DataOutput(java.io.DataOutput) TreeIndexStatsGatherer(org.apache.hyracks.storage.am.common.util.TreeIndexStatsGatherer) TreeIndexStats(org.apache.hyracks.storage.am.common.util.TreeIndexStats) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IIOManager(org.apache.hyracks.api.io.IIOManager) VSizeFrame(org.apache.hyracks.api.comm.VSizeFrame) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) LocalResource(org.apache.hyracks.storage.common.LocalResource) IFileMapProvider(org.apache.hyracks.storage.common.file.IFileMapProvider) FrameTupleAppender(org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex) FileReference(org.apache.hyracks.api.io.FileReference) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache)

Example 45 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method compareActualAndExpectedIndexes.

/**
     * Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
     */
@SuppressWarnings("unchecked")
public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx) throws HyracksDataException {
    IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
    ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
    MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
    IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
    int tokenFieldCount = invIndex.getTokenTypeTraits().length;
    int invListFieldCount = invIndex.getInvListTypeTraits().length;
    // All tokens that were inserted into the indexes.
    Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
    // Search key for finding an inverted-list in the actual index.
    ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
    ArrayTupleReference searchKey = new ArrayTupleReference();
    // Cursor over inverted list from actual index.
    IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
    // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
    ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
    // Includes the token fields.
    ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
    // Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
    int[] fieldPermutation = new int[invListFieldCount];
    for (int i = 0; i < fieldPermutation.length; i++) {
        fieldPermutation[i] = tokenFieldCount + i;
    }
    PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
    // Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
    while (tokensIter.hasNext()) {
        Comparable token = tokensIter.next();
        // Position inverted-list iterator on expected index.
        CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkLowKey.appendField(token);
        CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkHighKey.appendField(token);
        SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(testCtx.getCheckTuples(), checkLowKey, checkHighKey);
        Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
        // Position inverted-list cursor in actual index.
        OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
        invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
        if (actualInvListCursor.size() != expectedInvList.size()) {
            fail("Actual and expected inverted lists for token '" + token.toString() + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: " + expectedInvList.size() + ".");
        }
        // Compare inverted-list elements.
        int count = 0;
        actualInvListCursor.pinPages();
        try {
            while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
                actualInvListCursor.next();
                ITupleReference actual = actualInvListCursor.getTuple();
                CheckTuple expected = expectedInvListIter.next();
                OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple, fieldSerdes);
                expectedTuple.reset(completeExpectedTuple);
                if (invListCmp.compare(actual, expectedTuple) != 0) {
                    fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
                }
                count++;
            }
        } finally {
            actualInvListCursor.unpinPages();
        }
    }
}
Also used : MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IInvertedIndex(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)

Aggregations

ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)99 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)45 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)42 Test (org.junit.Test)40 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)35 DataOutput (java.io.DataOutput)33 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)24 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)21 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)21 ITreeIndex (org.apache.hyracks.storage.am.common.api.ITreeIndex)18 FrameTupleAppender (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender)17 ConstantTupleSourceOperatorDescriptor (org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor)17 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)16 JobSpecification (org.apache.hyracks.api.job.JobSpecification)16 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)16 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)16 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)15 BTreeSearchOperatorDescriptor (org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor)14 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)12