Search in sources :

Example 1 with PermutingTupleReference

use of org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference in project asterixdb by apache.

the class OnDiskInvertedIndex method validate.

@Override
public void validate() throws HyracksDataException {
    btree.validate();
    // Scan the btree and validate the order of elements in each inverted-list.
    IIndexAccessor btreeAccessor = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
    IIndexCursor btreeCursor = btreeAccessor.createSearchCursor(false);
    MultiComparator btreeCmp = MultiComparator.create(btree.getComparatorFactories());
    RangePredicate rangePred = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
    int[] fieldPermutation = new int[tokenTypeTraits.length];
    for (int i = 0; i < tokenTypeTraits.length; i++) {
        fieldPermutation[i] = i;
    }
    PermutingTupleReference tokenTuple = new PermutingTupleReference(fieldPermutation);
    IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
    IInvertedListCursor invListCursor = invIndexAccessor.createInvertedListCursor();
    MultiComparator invListCmp = MultiComparator.create(invListCmpFactories);
    try {
        // Search key for finding an inverted-list in the actual index.
        ArrayTupleBuilder prevBuilder = new ArrayTupleBuilder(invListTypeTraits.length);
        ArrayTupleReference prevTuple = new ArrayTupleReference();
        btreeAccessor.search(btreeCursor, rangePred);
        while (btreeCursor.hasNext()) {
            btreeCursor.next();
            tokenTuple.reset(btreeCursor.getTuple());
            // Validate inverted list by checking that the elements are totally ordered.
            invIndexAccessor.openInvertedListCursor(invListCursor, tokenTuple);
            invListCursor.pinPages();
            try {
                if (invListCursor.hasNext()) {
                    invListCursor.next();
                    ITupleReference invListElement = invListCursor.getTuple();
                    // Initialize prev tuple.
                    TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
                    prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
                }
                while (invListCursor.hasNext()) {
                    invListCursor.next();
                    ITupleReference invListElement = invListCursor.getTuple();
                    // Compare with previous element.
                    if (invListCmp.compare(invListElement, prevTuple) <= 0) {
                        throw new HyracksDataException("Index validation failed.");
                    }
                    // Set new prevTuple.
                    TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
                    prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
                }
            } finally {
                invListCursor.unpinPages();
            }
        }
    } finally {
        btreeCursor.close();
    }
}
Also used : RangePredicate(org.apache.hyracks.storage.am.btree.impls.RangePredicate) MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) IIndexAccessor(org.apache.hyracks.storage.common.IIndexAccessor) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IIndexCursor(org.apache.hyracks.storage.common.IIndexCursor)

Example 2 with PermutingTupleReference

use of org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method compareActualAndExpectedIndexes.

/**
     * Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
     */
@SuppressWarnings("unchecked")
public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx) throws HyracksDataException {
    IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
    ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
    MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
    IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
    int tokenFieldCount = invIndex.getTokenTypeTraits().length;
    int invListFieldCount = invIndex.getInvListTypeTraits().length;
    // All tokens that were inserted into the indexes.
    Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
    // Search key for finding an inverted-list in the actual index.
    ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
    ArrayTupleReference searchKey = new ArrayTupleReference();
    // Cursor over inverted list from actual index.
    IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
    // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
    ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
    // Includes the token fields.
    ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
    // Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
    int[] fieldPermutation = new int[invListFieldCount];
    for (int i = 0; i < fieldPermutation.length; i++) {
        fieldPermutation[i] = tokenFieldCount + i;
    }
    PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
    // Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
    while (tokensIter.hasNext()) {
        Comparable token = tokensIter.next();
        // Position inverted-list iterator on expected index.
        CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkLowKey.appendField(token);
        CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkHighKey.appendField(token);
        SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(testCtx.getCheckTuples(), checkLowKey, checkHighKey);
        Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
        // Position inverted-list cursor in actual index.
        OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
        invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
        if (actualInvListCursor.size() != expectedInvList.size()) {
            fail("Actual and expected inverted lists for token '" + token.toString() + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: " + expectedInvList.size() + ".");
        }
        // Compare inverted-list elements.
        int count = 0;
        actualInvListCursor.pinPages();
        try {
            while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
                actualInvListCursor.next();
                ITupleReference actual = actualInvListCursor.getTuple();
                CheckTuple expected = expectedInvListIter.next();
                OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple, fieldSerdes);
                expectedTuple.reset(completeExpectedTuple);
                if (invListCmp.compare(actual, expectedTuple) != 0) {
                    fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
                }
                count++;
            }
        } finally {
            actualInvListCursor.unpinPages();
        }
    }
}
Also used : MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IInvertedIndex(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)

Example 3 with PermutingTupleReference

use of org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference in project asterixdb by apache.

the class LSMInvertedIndex method createCursorInitialState.

private ICursorInitialState createCursorInitialState(ISearchPredicate pred, IIndexOperationContext ictx, boolean includeMutableComponent, ArrayList<IIndexAccessor> indexAccessors, ArrayList<IIndexAccessor> deletedKeysBTreeAccessors, List<ILSMComponent> operationalComponents) {
    ICursorInitialState initState;
    PermutingTupleReference keysOnlyTuple = createKeysOnlyTupleReference();
    MultiComparator keyCmp = MultiComparator.create(invListCmpFactories);
    // Distinguish between regular searches and range searches (mostly used in merges).
    if (pred instanceof InvertedIndexSearchPredicate) {
        initState = new LSMInvertedIndexSearchCursorInitialState(keyCmp, keysOnlyTuple, indexAccessors, deletedKeysBTreeAccessors, ((LSMInvertedIndexMemoryComponent) memoryComponents.get(currentMutableComponentId.get())).getDeletedKeysBTree().getLeafFrameFactory(), ictx, includeMutableComponent, getLsmHarness(), operationalComponents);
    } else {
        LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) memoryComponents.get(currentMutableComponentId.get());
        InMemoryInvertedIndex memInvIndex = (InMemoryInvertedIndex) mutableComponent.getInvIndex();
        MultiComparator tokensAndKeysCmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
        initState = new LSMInvertedIndexRangeSearchCursorInitialState(tokensAndKeysCmp, keyCmp, keysOnlyTuple, ((LSMInvertedIndexMemoryComponent) memoryComponents.get(currentMutableComponentId.get())).getDeletedKeysBTree().getLeafFrameFactory(), includeMutableComponent, getLsmHarness(), indexAccessors, deletedKeysBTreeAccessors, pred, operationalComponents);
    }
    return initState;
}
Also used : PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ICursorInitialState(org.apache.hyracks.storage.common.ICursorInitialState) MultiComparator(org.apache.hyracks.storage.common.MultiComparator) InvertedIndexSearchPredicate(org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate) InMemoryInvertedIndex(org.apache.hyracks.storage.am.lsm.invertedindex.inmemory.InMemoryInvertedIndex)

Example 4 with PermutingTupleReference

use of org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method testIndexSearch.

public static void testIndexSearch(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, Random rnd, int numDocQueries, int numRandomQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray) throws IOException, HyracksDataException {
    IInvertedIndex invIndex = testCtx.invIndex;
    IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
    IBinaryTokenizer tokenizer = testCtx.getTokenizerFactory().createTokenizer();
    InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
    List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
    // Project away the primary-key field.
    int[] fieldPermutation = new int[] { 0 };
    PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
    int numQueries = numDocQueries + numRandomQueries;
    for (int i = 0; i < numQueries; i++) {
        // If number of documents in the corpus is less than numDocQueries, then replace the remaining ones with random queries.
        if (i >= numDocQueries || i >= documentCorpus.size()) {
            // Generate a random query.
            ITupleReference randomQuery = tupleGen.next();
            searchDocument.reset(randomQuery);
        } else {
            // Pick a random document from the corpus to use as the search query.
            int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
            searchDocument.reset(documentCorpus.get(queryIndex));
        }
        // Set query tuple in search predicate.
        searchPred.setQueryTuple(searchDocument);
        searchPred.setQueryFieldIndex(0);
        IIndexCursor resultCursor = accessor.createSearchCursor(false);
        boolean panic = false;
        try {
            accessor.search(resultCursor, searchPred);
        } catch (HyracksDataException e) {
            // ignore panic queries.
            if (e.getErrorCode() == ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION) {
                panic = true;
            } else {
                throw e;
            }
        }
        try {
            if (!panic) {
                // Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
                ArrayList<Integer> actualResults = new ArrayList<>();
                try {
                    while (resultCursor.hasNext()) {
                        resultCursor.next();
                        ITupleReference resultTuple = resultCursor.getTuple();
                        int actual = IntegerPointable.getInteger(resultTuple.getFieldData(0), resultTuple.getFieldStart(0));
                        actualResults.add(Integer.valueOf(actual));
                    }
                } catch (HyracksDataException e) {
                    if (e.getErrorCode() == ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION) {
                        // Ignore panic queries.
                        continue;
                    } else {
                        throw e;
                    }
                }
                Collections.sort(actualResults);
                // Get expected results.
                List<Integer> expectedResults = new ArrayList<>();
                LSMInvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(), searchDocument, tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults, testCtx.getInvertedIndexType());
                Iterator<Integer> expectedIter = expectedResults.iterator();
                Iterator<Integer> actualIter = actualResults.iterator();
                while (expectedIter.hasNext() && actualIter.hasNext()) {
                    int expected = expectedIter.next();
                    int actual = actualIter.next();
                    if (actual != expected) {
                        fail("Query results do not match. Encountered: " + actual + ". Expected: " + expected + "");
                    }
                }
                if (expectedIter.hasNext()) {
                    fail("Query results do not match. Actual results missing.");
                }
                if (actualIter.hasNext()) {
                    fail("Query results do not match. Actual contains too many results.");
                }
            }
        } finally {
            resultCursor.close();
        }
    }
}
Also used : InvertedIndexSearchPredicate(org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate) ArrayList(java.util.ArrayList) IInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IIndexCursor(org.apache.hyracks.storage.common.IIndexCursor) IBinaryTokenizer(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer) IInvertedIndex(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)

Aggregations

PermutingTupleReference (org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference)4 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)3 IInvertedIndexAccessor (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor)3 MultiComparator (org.apache.hyracks.storage.common.MultiComparator)3 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)2 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)2 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)2 IInvertedIndex (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)2 IInvertedListCursor (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)2 InvertedIndexSearchPredicate (org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate)2 IIndexCursor (org.apache.hyracks.storage.common.IIndexCursor)2 ArrayList (java.util.ArrayList)1 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)1 RangePredicate (org.apache.hyracks.storage.am.btree.impls.RangePredicate)1 CheckTuple (org.apache.hyracks.storage.am.common.CheckTuple)1 InMemoryInvertedIndex (org.apache.hyracks.storage.am.lsm.invertedindex.inmemory.InMemoryInvertedIndex)1 IBinaryTokenizer (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer)1 ICursorInitialState (org.apache.hyracks.storage.common.ICursorInitialState)1 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)1