Search in sources :

Example 1 with IInvertedListCursor

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.

the class OnDiskInvertedIndex method validate.

@Override
public void validate() throws HyracksDataException {
    btree.validate();
    // Scan the btree and validate the order of elements in each inverted-list.
    IIndexAccessor btreeAccessor = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
    IIndexCursor btreeCursor = btreeAccessor.createSearchCursor(false);
    MultiComparator btreeCmp = MultiComparator.create(btree.getComparatorFactories());
    RangePredicate rangePred = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
    int[] fieldPermutation = new int[tokenTypeTraits.length];
    for (int i = 0; i < tokenTypeTraits.length; i++) {
        fieldPermutation[i] = i;
    }
    PermutingTupleReference tokenTuple = new PermutingTupleReference(fieldPermutation);
    IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
    IInvertedListCursor invListCursor = invIndexAccessor.createInvertedListCursor();
    MultiComparator invListCmp = MultiComparator.create(invListCmpFactories);
    try {
        // Search key for finding an inverted-list in the actual index.
        ArrayTupleBuilder prevBuilder = new ArrayTupleBuilder(invListTypeTraits.length);
        ArrayTupleReference prevTuple = new ArrayTupleReference();
        btreeAccessor.search(btreeCursor, rangePred);
        while (btreeCursor.hasNext()) {
            btreeCursor.next();
            tokenTuple.reset(btreeCursor.getTuple());
            // Validate inverted list by checking that the elements are totally ordered.
            invIndexAccessor.openInvertedListCursor(invListCursor, tokenTuple);
            invListCursor.pinPages();
            try {
                if (invListCursor.hasNext()) {
                    invListCursor.next();
                    ITupleReference invListElement = invListCursor.getTuple();
                    // Initialize prev tuple.
                    TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
                    prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
                }
                while (invListCursor.hasNext()) {
                    invListCursor.next();
                    ITupleReference invListElement = invListCursor.getTuple();
                    // Compare with previous element.
                    if (invListCmp.compare(invListElement, prevTuple) <= 0) {
                        throw new HyracksDataException("Index validation failed.");
                    }
                    // Set new prevTuple.
                    TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
                    prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
                }
            } finally {
                invListCursor.unpinPages();
            }
        }
    } finally {
        btreeCursor.close();
    }
}
Also used : RangePredicate(org.apache.hyracks.storage.am.btree.impls.RangePredicate) MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IIndexAccessor(org.apache.hyracks.storage.common.IIndexAccessor) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IIndexCursor(org.apache.hyracks.storage.common.IIndexCursor)

Example 2 with IInvertedListCursor

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.

the class PartitionedOnDiskInvertedIndex method openInvertedListPartitionCursors.

@Override
public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx, short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions, List<IInvertedListCursor> cursorsOrderedByTokens) throws HyracksDataException {
    PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
    OnDiskInvertedIndexOpContext ctx = (OnDiskInvertedIndexOpContext) ictx;
    ITupleReference lowSearchKey = null;
    ITupleReference highSearchKey = null;
    partSearcher.setNumTokensBoundsInSearchKeys(numTokensLowerBound, numTokensUpperBound);
    if (numTokensLowerBound < 0) {
        ctx.getBtreePred().setLowKeyComparator(ctx.getPrefixSearchCmp());
        lowSearchKey = partSearcher.getPrefixSearchKey();
    } else {
        ctx.getBtreePred().setLowKeyComparator(ctx.getSearchCmp());
        lowSearchKey = partSearcher.getFullLowSearchKey();
    }
    if (numTokensUpperBound < 0) {
        ctx.getBtreePred().setHighKeyComparator(ctx.getPrefixSearchCmp());
        highSearchKey = partSearcher.getPrefixSearchKey();
    } else {
        ctx.getBtreePred().setHighKeyComparator(ctx.getSearchCmp());
        highSearchKey = partSearcher.getFullHighSearchKey();
    }
    ctx.getBtreePred().setLowKey(lowSearchKey, true);
    ctx.getBtreePred().setHighKey(highSearchKey, true);
    ctx.getBtreeAccessor().search(ctx.getBtreeCursor(), ctx.getBtreePred());
    boolean tokenExists = false;
    try {
        while (ctx.getBtreeCursor().hasNext()) {
            ctx.getBtreeCursor().next();
            ITupleReference btreeTuple = ctx.getBtreeCursor().getTuple();
            short numTokens = ShortPointable.getShort(btreeTuple.getFieldData(PARTITIONING_NUM_TOKENS_FIELD), btreeTuple.getFieldStart(PARTITIONING_NUM_TOKENS_FIELD));
            IInvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
            resetInvertedListCursor(btreeTuple, invListCursor);
            cursorsOrderedByTokens.add(invListCursor);
            invListPartitions.addInvertedListCursor(invListCursor, numTokens);
            tokenExists = true;
        }
    } finally {
        ctx.getBtreeCursor().close();
        ctx.getBtreeCursor().reset();
    }
    return tokenExists;
}
Also used : ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) PartitionedTOccurrenceSearcher(org.apache.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)

Example 3 with IInvertedListCursor

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.

the class InvertedListMerger method merge.

public void merge(ArrayList<IInvertedListCursor> invListCursors, int occurrenceThreshold, int numPrefixLists, SearchResult searchResult) throws HyracksDataException {
    Collections.sort(invListCursors);
    int numInvLists = invListCursors.size();
    SearchResult result = null;
    for (int i = 0; i < numInvLists; i++) {
        SearchResult swapTemp = prevSearchResult;
        prevSearchResult = newSearchResult;
        newSearchResult = swapTemp;
        newSearchResult.reset();
        if (i + 1 != numInvLists) {
            // Use temporary search results when not merging last list.
            result = newSearchResult;
        } else {
            // When merging the last list, append results to the final search result.
            result = searchResult;
        }
        IInvertedListCursor invListCursor = invListCursors.get(i);
        invListCursor.pinPages();
        if (i < numPrefixLists) {
            // Merge prefix list.
            mergePrefixList(invListCursor, prevSearchResult, result);
        } else {
            // Merge suffix list.
            int numInvListElements = invListCursor.size();
            int currentNumResults = prevSearchResult.getNumResults();
            // Should we binary search the next list or should we sort-merge it?
            if (currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) {
                mergeSuffixListProbe(invListCursor, prevSearchResult, result, i, numInvLists, occurrenceThreshold);
            } else {
                mergeSuffixListScan(invListCursor, prevSearchResult, result, i, numInvLists, occurrenceThreshold);
            }
        }
        invListCursor.unpinPages();
    }
}
Also used : IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)

Example 4 with IInvertedListCursor

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method compareActualAndExpectedIndexes.

/**
     * Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
     */
@SuppressWarnings("unchecked")
public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx) throws HyracksDataException {
    IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
    ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
    MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
    IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
    int tokenFieldCount = invIndex.getTokenTypeTraits().length;
    int invListFieldCount = invIndex.getInvListTypeTraits().length;
    // All tokens that were inserted into the indexes.
    Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
    // Search key for finding an inverted-list in the actual index.
    ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
    ArrayTupleReference searchKey = new ArrayTupleReference();
    // Cursor over inverted list from actual index.
    IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
    // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
    ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
    // Includes the token fields.
    ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
    // Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
    int[] fieldPermutation = new int[invListFieldCount];
    for (int i = 0; i < fieldPermutation.length; i++) {
        fieldPermutation[i] = tokenFieldCount + i;
    }
    PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
    // Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
    while (tokensIter.hasNext()) {
        Comparable token = tokensIter.next();
        // Position inverted-list iterator on expected index.
        CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkLowKey.appendField(token);
        CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkHighKey.appendField(token);
        SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(testCtx.getCheckTuples(), checkLowKey, checkHighKey);
        Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
        // Position inverted-list cursor in actual index.
        OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
        invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
        if (actualInvListCursor.size() != expectedInvList.size()) {
            fail("Actual and expected inverted lists for token '" + token.toString() + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: " + expectedInvList.size() + ".");
        }
        // Compare inverted-list elements.
        int count = 0;
        actualInvListCursor.pinPages();
        try {
            while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
                actualInvListCursor.next();
                ITupleReference actual = actualInvListCursor.getTuple();
                CheckTuple expected = expectedInvListIter.next();
                OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple, fieldSerdes);
                expectedTuple.reset(completeExpectedTuple);
                if (invListCmp.compare(actual, expectedTuple) != 0) {
                    fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
                }
                count++;
            }
        } finally {
            actualInvListCursor.unpinPages();
        }
    }
}
Also used : MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IInvertedIndex(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)

Example 5 with IInvertedListCursor

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.

the class TOccurrenceSearcher method search.

@Override
public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred, IIndexOperationContext ictx) throws HyracksDataException {
    tokenizeQuery(searchPred);
    int numQueryTokens = queryTokenAppender.getTupleCount();
    invListCursors.clear();
    invListCursorCache.reset();
    for (int i = 0; i < numQueryTokens; i++) {
        searchKey.reset(queryTokenAppender, i);
        IInvertedListCursor invListCursor = invListCursorCache.getNext();
        invIndex.openInvertedListCursor(invListCursor, searchKey, ictx);
        invListCursors.add(invListCursor);
    }
    IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
    occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
    if (occurrenceThreshold <= 0) {
        throw HyracksDataException.create(ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION);
    }
    int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, invListCursors.size());
    searchResult.reset();
    invListMerger.merge(invListCursors, occurrenceThreshold, numPrefixLists, searchResult);
    resultCursor.open(null, searchPred);
}
Also used : IInvertedIndexSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)

Aggregations

IInvertedListCursor (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)5 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)3 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)2 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)2 PermutingTupleReference (org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference)2 IInvertedIndexAccessor (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor)2 MultiComparator (org.apache.hyracks.storage.common.MultiComparator)2 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)1 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)1 RangePredicate (org.apache.hyracks.storage.am.btree.impls.RangePredicate)1 CheckTuple (org.apache.hyracks.storage.am.common.CheckTuple)1 IInvertedIndex (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)1 IInvertedIndexSearchModifier (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier)1 PartitionedTOccurrenceSearcher (org.apache.hyracks.storage.am.lsm.invertedindex.search.PartitionedTOccurrenceSearcher)1 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)1 IIndexCursor (org.apache.hyracks.storage.common.IIndexCursor)1