use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method createNGramInvIndexTestContext.
public static LSMInvertedIndexTestContext createNGramInvIndexTestContext(LSMInvertedIndexTestHarness harness, InvertedIndexType invIndexType) throws IOException, HyracksDataException {
ISerializerDeserializer[] fieldSerdes = getNonHashedIndexFieldSerdes(invIndexType);
ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
IBinaryTokenizerFactory tokenizerFactory = new NGramUTF8StringBinaryTokenizerFactory(TEST_GRAM_LENGTH, true, true, false, tokenFactory);
LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, fieldSerdes.length - 1, tokenizerFactory, invIndexType, null, null, null, null, null, null);
return testCtx;
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method createHashedWordInvIndexTestContext.
public static LSMInvertedIndexTestContext createHashedWordInvIndexTestContext(LSMInvertedIndexTestHarness harness, InvertedIndexType invIndexType) throws IOException, HyracksDataException {
ISerializerDeserializer[] fieldSerdes = getHashedIndexFieldSerdes(invIndexType);
ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false, tokenFactory);
LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, fieldSerdes.length - 1, tokenizerFactory, invIndexType, null, null, null, null, null, null);
return testCtx;
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory in project asterixdb by apache.
the class LSMInvertedIndexTestWorker method performOp.
@Override
public void performOp(ITupleReference tuple, TestOperation op) throws HyracksDataException {
LSMInvertedIndexAccessor accessor = (LSMInvertedIndexAccessor) indexAccessor;
IIndexCursor searchCursor = accessor.createSearchCursor(false);
IIndexCursor rangeSearchCursor = accessor.createRangeSearchCursor();
RangePredicate rangePred = new RangePredicate(null, null, true, true, null, null);
IBinaryTokenizerFactory tokenizerFactory = invIndex.getTokenizerFactory();
int searchModifierIndex = Math.abs(rnd.nextInt()) % TEST_SEARCH_MODIFIERS.length;
InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizerFactory.createTokenizer(), TEST_SEARCH_MODIFIERS[searchModifierIndex]);
switch(op) {
case INSERT:
{
insert(accessor, tuple);
break;
}
case DELETE:
{
// Randomly pick a document from the corpus to delete.
if (!documentCorpus.isEmpty()) {
int docIndex = Math.abs(rnd.nextInt()) % documentCorpus.size();
ITupleReference deleteTuple = documentCorpus.get(docIndex);
accessor.delete(deleteTuple);
// Swap tupleIndex with last element.
documentCorpus.set(docIndex, documentCorpus.get(documentCorpus.size() - 1));
documentCorpus.remove(documentCorpus.size() - 1);
} else {
// No existing documents to delete, treat this case as an insert.
insert(accessor, tuple);
}
break;
}
case POINT_SEARCH:
{
searchCursor.reset();
searchPred.setQueryTuple(tuple);
searchPred.setQueryFieldIndex(0);
try {
accessor.search(searchCursor, searchPred);
consumeCursorTuples(searchCursor);
} catch (HyracksDataException e) {
// Ignore.
if (e.getErrorCode() != ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION) {
throw e;
}
}
break;
}
case SCAN:
{
rangeSearchCursor.reset();
accessor.rangeSearch(rangeSearchCursor, rangePred);
consumeCursorTuples(rangeSearchCursor);
break;
}
case MERGE:
{
accessor.scheduleMerge(NoOpIOOperationCallbackFactory.INSTANCE.createIoOpCallback(), invIndex.getImmutableComponents());
break;
}
default:
throw new HyracksDataException("Op " + op.toString() + " not supported.");
}
}
Aggregations