Search in sources :

Example 1 with IInvertedIndexSearchModifier

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier in project asterixdb by apache.

the class AbstractInvertedIndexSearchTest method testNGramInvIndexIndex.

private void testNGramInvIndexIndex(LSMInvertedIndexTestContext testCtx) throws IOException {
    TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
    List<IInvertedIndexSearchModifier> searchModifiers = new ArrayList<>();
    searchModifiers.add(new ConjunctiveSearchModifier());
    searchModifiers.add(new JaccardSearchModifier(1.0f));
    searchModifiers.add(new JaccardSearchModifier(0.8f));
    searchModifiers.add(new JaccardSearchModifier(0.5f));
    searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 0));
    searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 1));
    searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 2));
    searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 3));
    runTest(testCtx, tupleGen, searchModifiers);
}
Also used : IInvertedIndexSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier) JaccardSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier) ArrayList(java.util.ArrayList) TupleGenerator(org.apache.hyracks.storage.am.common.datagen.TupleGenerator) EditDistanceSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.search.EditDistanceSearchModifier) ConjunctiveSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier)

Example 2 with IInvertedIndexSearchModifier

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier in project asterixdb by apache.

the class AbstractInvertedIndexSearchTest method runTest.

protected void runTest(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, List<IInvertedIndexSearchModifier> searchModifiers) throws IOException {
    IIndex invIndex = testCtx.getIndex();
    if ((invIndexType != InvertedIndexType.LSM) && (invIndexType != InvertedIndexType.PARTITIONED_LSM) || !bulkLoad) {
        invIndex.create();
        invIndex.activate();
    }
    if (bulkLoad) {
        if ((invIndexType != InvertedIndexType.LSM) && (invIndexType != InvertedIndexType.PARTITIONED_LSM)) {
            LSMInvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT, false);
        } else {
            LSMInvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT, true);
        }
    } else {
        LSMInvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
    }
    invIndex.validate();
    for (IInvertedIndexSearchModifier searchModifier : searchModifiers) {
        if (LOGGER.isLoggable(Level.INFO)) {
            LOGGER.info("Running searches with: " + searchModifier.toString());
        }
        LSMInvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_DOC_QUERIES, NUM_RANDOM_QUERIES, searchModifier, SCAN_COUNT_ARRAY);
    }
    invIndex.deactivate();
    invIndex.destroy();
}
Also used : IIndex(org.apache.hyracks.storage.common.IIndex) IInvertedIndexSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier)

Example 3 with IInvertedIndexSearchModifier

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier in project asterixdb by apache.

the class AbstractInvertedIndexSearchTest method testWordInvIndexIndex.

private void testWordInvIndexIndex(LSMInvertedIndexTestContext testCtx) throws IOException {
    TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
    List<IInvertedIndexSearchModifier> searchModifiers = new ArrayList<>();
    searchModifiers.add(new ConjunctiveSearchModifier());
    searchModifiers.add(new JaccardSearchModifier(1.0f));
    searchModifiers.add(new JaccardSearchModifier(0.8f));
    searchModifiers.add(new JaccardSearchModifier(0.5f));
    runTest(testCtx, tupleGen, searchModifiers);
}
Also used : IInvertedIndexSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier) JaccardSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier) ArrayList(java.util.ArrayList) TupleGenerator(org.apache.hyracks.storage.am.common.datagen.TupleGenerator) ConjunctiveSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier)

Example 4 with IInvertedIndexSearchModifier

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier in project asterixdb by apache.

the class TOccurrenceSearcher method search.

@Override
public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred, IIndexOperationContext ictx) throws HyracksDataException {
    tokenizeQuery(searchPred);
    int numQueryTokens = queryTokenAppender.getTupleCount();
    invListCursors.clear();
    invListCursorCache.reset();
    for (int i = 0; i < numQueryTokens; i++) {
        searchKey.reset(queryTokenAppender, i);
        IInvertedListCursor invListCursor = invListCursorCache.getNext();
        invIndex.openInvertedListCursor(invListCursor, searchKey, ictx);
        invListCursors.add(invListCursor);
    }
    IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
    occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
    if (occurrenceThreshold <= 0) {
        throw HyracksDataException.create(ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION);
    }
    int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, invListCursors.size());
    searchResult.reset();
    invListMerger.merge(invListCursors, occurrenceThreshold, numPrefixLists, searchResult);
    resultCursor.open(null, searchPred);
}
Also used : IInvertedIndexSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier) IInvertedListCursor(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)

Example 5 with IInvertedIndexSearchModifier

use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier in project asterixdb by apache.

the class PartitionedTOccurrenceSearcher method search.

@Override
public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred, IIndexOperationContext ictx) throws HyracksDataException {
    IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
    searchResult.reset();
    if (partInvIndex.isEmpty()) {
        return;
    }
    tokenizeQuery(searchPred);
    short numQueryTokens = (short) queryTokenAppender.getTupleCount();
    IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
    short numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens);
    short numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens);
    occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
    if (occurrenceThreshold <= 0) {
        throw HyracksDataException.create(ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION);
    }
    short maxCountPossible = numQueryTokens;
    invListCursorCache.reset();
    partitions.reset(numTokensLowerBound, numTokensUpperBound);
    cursorsOrderedByTokens.clear();
    for (int i = 0; i < numQueryTokens; i++) {
        searchKey.reset(queryTokenAppender, i);
        if (!partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound, partitions, cursorsOrderedByTokens)) {
            maxCountPossible--;
            // No results possible.
            if (maxCountPossible < occurrenceThreshold) {
                return;
            }
        }
    }
    ArrayList<IInvertedListCursor>[] partitionCursors = partitions.getPartitions();
    short start = partitions.getMinValidPartitionIndex();
    short end = partitions.getMaxValidPartitionIndex();
    // if we'd have to wait for a page to get evicted.
    if (!cursorsOrderedByTokens.isEmpty()) {
        for (int i = start; i <= end; i++) {
            if (partitionCursors[i] == null) {
                continue;
            }
            // Prune partition because no element in it can satisfy the occurrence threshold.
            if (partitionCursors[i].size() < occurrenceThreshold) {
                cursorsOrderedByTokens.removeAll(partitionCursors[i]);
            }
        }
        // Pin all the cursors in the order of tokens.
        int numCursors = cursorsOrderedByTokens.size();
        for (int i = 0; i < numCursors; i++) {
            cursorsOrderedByTokens.get(i).pinPages();
        }
    }
    // Process the partitions one-by-one.
    for (int i = start; i <= end; i++) {
        if (partitionCursors[i] == null) {
            continue;
        }
        // Prune partition because no element in it can satisfy the occurrence threshold.
        if (partitionCursors[i].size() < occurrenceThreshold) {
            continue;
        }
        // Merge inverted lists of current partition.
        int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, partitionCursors[i].size());
        invListMerger.reset();
        invListMerger.merge(partitionCursors[i], occurrenceThreshold, numPrefixLists, searchResult);
    }
    resultCursor.open(null, searchPred);
}
Also used : IInvertedIndexSearchModifier(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier) ArrayList(java.util.ArrayList) IPartitionedInvertedIndex(org.apache.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex)

Aggregations

IInvertedIndexSearchModifier (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier)5 ArrayList (java.util.ArrayList)3 TupleGenerator (org.apache.hyracks.storage.am.common.datagen.TupleGenerator)2 ConjunctiveSearchModifier (org.apache.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier)2 JaccardSearchModifier (org.apache.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier)2 IInvertedListCursor (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor)1 IPartitionedInvertedIndex (org.apache.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex)1 EditDistanceSearchModifier (org.apache.hyracks.storage.am.lsm.invertedindex.search.EditDistanceSearchModifier)1 IIndex (org.apache.hyracks.storage.common.IIndex)1