Examples with IBinaryTokenizerFactory - org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory

Example 6 with IBinaryTokenizerFactory

use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method createNGramInvIndexTestContext.

public static LSMInvertedIndexTestContext createNGramInvIndexTestContext(LSMInvertedIndexTestHarness harness, InvertedIndexType invIndexType) throws IOException, HyracksDataException {
    ISerializerDeserializer[] fieldSerdes = getNonHashedIndexFieldSerdes(invIndexType);
    ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
    IBinaryTokenizerFactory tokenizerFactory = new NGramUTF8StringBinaryTokenizerFactory(TEST_GRAM_LENGTH, true, true, false, tokenFactory);
    LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, fieldSerdes.length - 1, tokenizerFactory, invIndexType, null, null, null, null, null, null);
    return testCtx;
}

Also used : UTF8NGramTokenFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8NGramTokenFactory) HashedUTF8NGramTokenFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8NGramTokenFactory) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) ITokenFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) NGramUTF8StringBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizerFactory)

Example 7 with IBinaryTokenizerFactory

use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method createHashedWordInvIndexTestContext.

public static LSMInvertedIndexTestContext createHashedWordInvIndexTestContext(LSMInvertedIndexTestHarness harness, InvertedIndexType invIndexType) throws IOException, HyracksDataException {
    ISerializerDeserializer[] fieldSerdes = getHashedIndexFieldSerdes(invIndexType);
    ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
    IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false, tokenFactory);
    LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, fieldSerdes.length - 1, tokenizerFactory, invIndexType, null, null, null, null, null, null);
    return testCtx;
}

Also used : DelimitedUTF8StringBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory) HashedUTF8WordTokenFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8WordTokenFactory) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) ITokenFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)

Example 8 with IBinaryTokenizerFactory

use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory in project asterixdb by apache.

the class LSMInvertedIndexTestWorker method performOp.

@Override
public void performOp(ITupleReference tuple, TestOperation op) throws HyracksDataException {
    LSMInvertedIndexAccessor accessor = (LSMInvertedIndexAccessor) indexAccessor;
    IIndexCursor searchCursor = accessor.createSearchCursor(false);
    IIndexCursor rangeSearchCursor = accessor.createRangeSearchCursor();
    RangePredicate rangePred = new RangePredicate(null, null, true, true, null, null);
    IBinaryTokenizerFactory tokenizerFactory = invIndex.getTokenizerFactory();
    int searchModifierIndex = Math.abs(rnd.nextInt()) % TEST_SEARCH_MODIFIERS.length;
    InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizerFactory.createTokenizer(), TEST_SEARCH_MODIFIERS[searchModifierIndex]);
    switch(op) {
        case INSERT:
            {
                insert(accessor, tuple);
                break;
            }
        case DELETE:
            {
                // Randomly pick a document from the corpus to delete.
                if (!documentCorpus.isEmpty()) {
                    int docIndex = Math.abs(rnd.nextInt()) % documentCorpus.size();
                    ITupleReference deleteTuple = documentCorpus.get(docIndex);
                    accessor.delete(deleteTuple);
                    // Swap tupleIndex with last element.
                    documentCorpus.set(docIndex, documentCorpus.get(documentCorpus.size() - 1));
                    documentCorpus.remove(documentCorpus.size() - 1);
                } else {
                    // No existing documents to delete, treat this case as an insert.
                    insert(accessor, tuple);
                }
                break;
            }
        case POINT_SEARCH:
            {
                searchCursor.reset();
                searchPred.setQueryTuple(tuple);
                searchPred.setQueryFieldIndex(0);
                try {
                    accessor.search(searchCursor, searchPred);
                    consumeCursorTuples(searchCursor);
                } catch (HyracksDataException e) {
                    // Ignore.
                    if (e.getErrorCode() != ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION) {
                        throw e;
                    }
                }
                break;
            }
        case SCAN:
            {
                rangeSearchCursor.reset();
                accessor.rangeSearch(rangeSearchCursor, rangePred);
                consumeCursorTuples(rangeSearchCursor);
                break;
            }
        case MERGE:
            {
                accessor.scheduleMerge(NoOpIOOperationCallbackFactory.INSTANCE.createIoOpCallback(), invIndex.getImmutableComponents());
                break;
            }
        default:
            throw new HyracksDataException("Op " + op.toString() + " not supported.");
    }
}

Also used : RangePredicate(org.apache.hyracks.storage.am.btree.impls.RangePredicate) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) InvertedIndexSearchPredicate(org.apache.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IIndexCursor(org.apache.hyracks.storage.common.IIndexCursor) LSMInvertedIndexAccessor(org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexAccessor) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Aggregations

IBinaryTokenizerFactory (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory)8 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)5 ITokenFactory (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory)4 List (java.util.List)2 AsterixException (org.apache.asterix.common.exceptions.AsterixException)2 CompilationException (org.apache.asterix.common.exceptions.CompilationException)2 MetadataException (org.apache.asterix.metadata.MetadataException)2 Index (org.apache.asterix.metadata.entities.Index)2 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)2 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)2 Pair (org.apache.hyracks.algebricks.common.utils.Pair)2 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)2 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)2 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)2 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)2 DelimitedUTF8StringBinaryTokenizerFactory (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory)2 HashedUTF8NGramTokenFactory (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8NGramTokenFactory)2 HashedUTF8WordTokenFactory (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8WordTokenFactory)2 NGramUTF8StringBinaryTokenizerFactory (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizerFactory)2 IOException (java.io.IOException)1