Search in sources :

Example 6 with CheckTuple

use of org.apache.hyracks.storage.am.common.CheckTuple in project asterixdb by apache.

the class OrderedIndexTestUtils method checkPointSearches.

public void checkPointSearches(IIndexTestContext ictx) throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Testing Point Searches On All Expected Keys.");
    }
    OrderedIndexTestContext ctx = (OrderedIndexTestContext) ictx;
    IIndexCursor searchCursor = ctx.getIndexAccessor().createSearchCursor(false);
    ArrayTupleBuilder lowKeyBuilder = new ArrayTupleBuilder(ctx.getKeyFieldCount());
    ArrayTupleReference lowKey = new ArrayTupleReference();
    ArrayTupleBuilder highKeyBuilder = new ArrayTupleBuilder(ctx.getKeyFieldCount());
    ArrayTupleReference highKey = new ArrayTupleReference();
    RangePredicate rangePred = new RangePredicate(lowKey, highKey, true, true, null, null);
    // BTree to verify the tuple can be reached.
    for (CheckTuple checkTuple : ctx.getCheckTuples()) {
        createTupleFromCheckTuple(checkTuple, lowKeyBuilder, lowKey, ctx.getFieldSerdes());
        createTupleFromCheckTuple(checkTuple, highKeyBuilder, highKey, ctx.getFieldSerdes());
        MultiComparator lowKeyCmp = BTreeUtils.getSearchMultiComparator(ctx.getComparatorFactories(), lowKey);
        MultiComparator highKeyCmp = BTreeUtils.getSearchMultiComparator(ctx.getComparatorFactories(), highKey);
        rangePred.setLowKey(lowKey, true);
        rangePred.setHighKey(highKey, true);
        rangePred.setLowKeyComparator(lowKeyCmp);
        rangePred.setHighKeyComparator(highKeyCmp);
        ctx.getIndexAccessor().search(searchCursor, rangePred);
        try {
            // We expect exactly one answer.
            if (searchCursor.hasNext()) {
                searchCursor.next();
                ITupleReference tuple = searchCursor.getTuple();
                compareActualAndExpected(tuple, checkTuple, ctx.getFieldSerdes());
            }
            if (searchCursor.hasNext()) {
                fail("Point search returned more than one answer.");
            }
        } finally {
            searchCursor.close();
        }
    }
}
Also used : RangePredicate(org.apache.hyracks.storage.am.btree.impls.RangePredicate) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IIndexCursor(org.apache.hyracks.storage.common.IIndexCursor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)

Example 7 with CheckTuple

use of org.apache.hyracks.storage.am.common.CheckTuple in project asterixdb by apache.

the class RTreeTestUtils method bulkLoadDoubleTuples.

@SuppressWarnings("unchecked")
public void bulkLoadDoubleTuples(IIndexTestContext ctx, int numTuples, Random rnd) throws Exception {
    int fieldCount = ctx.getFieldCount();
    int numKeyFields = ctx.getKeyFieldCount();
    double[] fieldValues = new double[ctx.getFieldCount()];
    double maxValue = Math.ceil(Math.pow(numTuples, 1.0 / numKeyFields));
    Collection<CheckTuple> tmpCheckTuples = createCheckTuplesCollection();
    for (int i = 0; i < numTuples; i++) {
        // Set keys.
        setDoubleKeyFields(fieldValues, numKeyFields, maxValue, rnd);
        // Set values.
        setDoublePayloadFields(fieldValues, numKeyFields, fieldCount);
        // Set expected values.
        ctx.insertCheckTuple(createDoubleCheckTuple(fieldValues, ctx.getKeyFieldCount()), tmpCheckTuples);
    }
    bulkLoadCheckTuples(ctx, tmpCheckTuples);
    // Add tmpCheckTuples to ctx check tuples for comparing searches.
    for (CheckTuple checkTuple : tmpCheckTuples) {
        ctx.insertCheckTuple(checkTuple, ctx.getCheckTuples());
    }
}
Also used : CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple)

Example 8 with CheckTuple

use of org.apache.hyracks.storage.am.common.CheckTuple in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method bulkLoadInvIndex.

public static void bulkLoadInvIndex(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs, boolean appendOnly) throws HyracksDataException, IOException {
    SortedSet<CheckTuple> tmpMemIndex = new TreeSet<>();
    // First generate the expected index by inserting the documents one-by-one.
    for (int i = 0; i < numDocs; i++) {
        ITupleReference tuple = tupleGen.next();
        testCtx.insertCheckTuples(tuple, tmpMemIndex);
    }
    ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
    // Use the expected index to bulk-load the actual index.
    IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false, numDocs, true);
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
    ArrayTupleReference tuple = new ArrayTupleReference();
    Iterator<CheckTuple> checkTupleIter = tmpMemIndex.iterator();
    while (checkTupleIter.hasNext()) {
        CheckTuple checkTuple = checkTupleIter.next();
        OrderedIndexTestUtils.createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, fieldSerdes);
        bulkLoader.add(tuple);
    }
    bulkLoader.end();
    // Add all check tuples from the temp index to the text context.
    testCtx.getCheckTuples().addAll(tmpMemIndex);
}
Also used : CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) TreeSet(java.util.TreeSet) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)

Example 9 with CheckTuple

use of org.apache.hyracks.storage.am.common.CheckTuple in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method getExpectedResults.

@SuppressWarnings("unchecked")
public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples, ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde, IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned) throws IOException {
    // Reset scan count array.
    Arrays.fill(scanCountArray, 0);
    expectedResults.clear();
    GrowableArray tokenData = new GrowableArray();
    tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
    // Run though tokenizer to get number of tokens.
    int numQueryTokens = 0;
    while (tokenizer.hasNext()) {
        tokenizer.next();
        numQueryTokens++;
    }
    short numTokensLowerBound = -1;
    short numTokensUpperBound = -1;
    int invListElementField = 1;
    if (isPartitioned) {
        numTokensLowerBound = searchModifier.getNumTokensLowerBound((short) numQueryTokens);
        numTokensUpperBound = searchModifier.getNumTokensUpperBound((short) numQueryTokens);
        invListElementField = 2;
    }
    int occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
    tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
    while (tokenizer.hasNext()) {
        tokenizer.next();
        IToken token = tokenizer.getToken();
        tokenData.reset();
        token.serializeToken(tokenData);
        ByteArrayInputStream inStream = new ByteArrayInputStream(tokenData.getByteArray(), 0, tokenData.getLength());
        DataInput dataIn = new DataInputStream(inStream);
        Comparable tokenObj = (Comparable) tokenSerde.deserialize(dataIn);
        CheckTuple lowKey;
        if (numTokensLowerBound < 0) {
            // Index is not partitioned, or no length filtering is possible for this search modifier.
            lowKey = new CheckTuple(1, 1);
            lowKey.appendField(tokenObj);
        } else {
            // Index is length partitioned, and search modifier supports length filtering.
            lowKey = new CheckTuple(2, 2);
            lowKey.appendField(tokenObj);
            lowKey.appendField(Short.valueOf(numTokensLowerBound));
        }
        CheckTuple highKey;
        if (numTokensUpperBound < 0) {
            // Index is not partitioned, or no length filtering is possible for this search modifier.
            highKey = new CheckTuple(1, 1);
            highKey.appendField(tokenObj);
        } else {
            // Index is length partitioned, and search modifier supports length filtering.
            highKey = new CheckTuple(2, 2);
            highKey.appendField(tokenObj);
            highKey.appendField(Short.valueOf(numTokensUpperBound));
        }
        // Get view over check tuples containing inverted-list corresponding to token.
        SortedSet<CheckTuple> invList = OrderedIndexTestUtils.getPrefixExpectedSubset(checkTuples, lowKey, highKey);
        Iterator<CheckTuple> invListIter = invList.iterator();
        // Iterate over inverted list and update scan count array.
        while (invListIter.hasNext()) {
            CheckTuple checkTuple = invListIter.next();
            Integer element = (Integer) checkTuple.getField(invListElementField);
            scanCountArray[element]++;
        }
    }
    // Run through scan count array, and see whether elements satisfy the given occurrence threshold.
    expectedResults.clear();
    for (int i = 0; i < scanCountArray.length; i++) {
        if (scanCountArray[i] >= occurrenceThreshold) {
            expectedResults.add(i);
        }
    }
}
Also used : DataInput(java.io.DataInput) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) IToken(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken) ByteArrayInputStream(java.io.ByteArrayInputStream) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) DataInputStream(java.io.DataInputStream)

Example 10 with CheckTuple

use of org.apache.hyracks.storage.am.common.CheckTuple in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method compareActualAndExpectedIndexes.

/**
     * Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
     */
@SuppressWarnings("unchecked")
public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx) throws HyracksDataException {
    IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
    ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
    MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
    IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
    int tokenFieldCount = invIndex.getTokenTypeTraits().length;
    int invListFieldCount = invIndex.getInvListTypeTraits().length;
    // All tokens that were inserted into the indexes.
    Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
    // Search key for finding an inverted-list in the actual index.
    ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
    ArrayTupleReference searchKey = new ArrayTupleReference();
    // Cursor over inverted list from actual index.
    IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
    // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
    ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
    // Includes the token fields.
    ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
    // Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
    int[] fieldPermutation = new int[invListFieldCount];
    for (int i = 0; i < fieldPermutation.length; i++) {
        fieldPermutation[i] = tokenFieldCount + i;
    }
    PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
    // Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
    while (tokensIter.hasNext()) {
        Comparable token = tokensIter.next();
        // Position inverted-list iterator on expected index.
        CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkLowKey.appendField(token);
        CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
        checkHighKey.appendField(token);
        SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(testCtx.getCheckTuples(), checkLowKey, checkHighKey);
        Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
        // Position inverted-list cursor in actual index.
        OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
        invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
        if (actualInvListCursor.size() != expectedInvList.size()) {
            fail("Actual and expected inverted lists for token '" + token.toString() + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: " + expectedInvList.size() + ".");
        }
        // Compare inverted-list elements.
        int count = 0;
        actualInvListCursor.pinPages();
        try {
            while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
                actualInvListCursor.next();
                ITupleReference actual = actualInvListCursor.getTuple();
                CheckTuple expected = expectedInvListIter.next();
                OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple, fieldSerdes);
                expectedTuple.reset(completeExpectedTuple);
                if (invListCmp.compare(actual, expectedTuple) != 0) {
                    fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
                }
                count++;
            }
        } finally {
            actualInvListCursor.unpinPages();
        }
    }
}
Also used : MultiComparator(org.apache.hyracks.storage.common.MultiComparator) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) PermutingTupleReference(org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IInvertedIndex(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)

Aggregations

CheckTuple (org.apache.hyracks.storage.am.common.CheckTuple)18 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)8 TreeSet (java.util.TreeSet)6 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)6 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)6 MultiComparator (org.apache.hyracks.storage.common.MultiComparator)4 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)3 RangePredicate (org.apache.hyracks.storage.am.btree.impls.RangePredicate)3 IIndexCursor (org.apache.hyracks.storage.common.IIndexCursor)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInput (java.io.DataInput)2 DataInputStream (java.io.DataInputStream)2 IInvertedIndex (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex)2 IInvertedIndexAccessor (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor)2 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)1 GrowableArray (org.apache.hyracks.data.std.util.GrowableArray)1 PermutingTupleReference (org.apache.hyracks.storage.am.common.tuples.PermutingTupleReference)1 IInvertedListCursor (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)1 IToken (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken)1 IIndexBulkLoader (org.apache.hyracks.storage.common.IIndexBulkLoader)1