use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.
the class OnDiskInvertedIndex method validate.
@Override
public void validate() throws HyracksDataException {
btree.validate();
// Scan the btree and validate the order of elements in each inverted-list.
IIndexAccessor btreeAccessor = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
IIndexCursor btreeCursor = btreeAccessor.createSearchCursor(false);
MultiComparator btreeCmp = MultiComparator.create(btree.getComparatorFactories());
RangePredicate rangePred = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
int[] fieldPermutation = new int[tokenTypeTraits.length];
for (int i = 0; i < tokenTypeTraits.length; i++) {
fieldPermutation[i] = i;
}
PermutingTupleReference tokenTuple = new PermutingTupleReference(fieldPermutation);
IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
IInvertedListCursor invListCursor = invIndexAccessor.createInvertedListCursor();
MultiComparator invListCmp = MultiComparator.create(invListCmpFactories);
try {
// Search key for finding an inverted-list in the actual index.
ArrayTupleBuilder prevBuilder = new ArrayTupleBuilder(invListTypeTraits.length);
ArrayTupleReference prevTuple = new ArrayTupleReference();
btreeAccessor.search(btreeCursor, rangePred);
while (btreeCursor.hasNext()) {
btreeCursor.next();
tokenTuple.reset(btreeCursor.getTuple());
// Validate inverted list by checking that the elements are totally ordered.
invIndexAccessor.openInvertedListCursor(invListCursor, tokenTuple);
invListCursor.pinPages();
try {
if (invListCursor.hasNext()) {
invListCursor.next();
ITupleReference invListElement = invListCursor.getTuple();
// Initialize prev tuple.
TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
}
while (invListCursor.hasNext()) {
invListCursor.next();
ITupleReference invListElement = invListCursor.getTuple();
// Compare with previous element.
if (invListCmp.compare(invListElement, prevTuple) <= 0) {
throw new HyracksDataException("Index validation failed.");
}
// Set new prevTuple.
TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
}
} finally {
invListCursor.unpinPages();
}
}
} finally {
btreeCursor.close();
}
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.
the class PartitionedOnDiskInvertedIndex method openInvertedListPartitionCursors.
@Override
public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx, short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions, List<IInvertedListCursor> cursorsOrderedByTokens) throws HyracksDataException {
PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
OnDiskInvertedIndexOpContext ctx = (OnDiskInvertedIndexOpContext) ictx;
ITupleReference lowSearchKey = null;
ITupleReference highSearchKey = null;
partSearcher.setNumTokensBoundsInSearchKeys(numTokensLowerBound, numTokensUpperBound);
if (numTokensLowerBound < 0) {
ctx.getBtreePred().setLowKeyComparator(ctx.getPrefixSearchCmp());
lowSearchKey = partSearcher.getPrefixSearchKey();
} else {
ctx.getBtreePred().setLowKeyComparator(ctx.getSearchCmp());
lowSearchKey = partSearcher.getFullLowSearchKey();
}
if (numTokensUpperBound < 0) {
ctx.getBtreePred().setHighKeyComparator(ctx.getPrefixSearchCmp());
highSearchKey = partSearcher.getPrefixSearchKey();
} else {
ctx.getBtreePred().setHighKeyComparator(ctx.getSearchCmp());
highSearchKey = partSearcher.getFullHighSearchKey();
}
ctx.getBtreePred().setLowKey(lowSearchKey, true);
ctx.getBtreePred().setHighKey(highSearchKey, true);
ctx.getBtreeAccessor().search(ctx.getBtreeCursor(), ctx.getBtreePred());
boolean tokenExists = false;
try {
while (ctx.getBtreeCursor().hasNext()) {
ctx.getBtreeCursor().next();
ITupleReference btreeTuple = ctx.getBtreeCursor().getTuple();
short numTokens = ShortPointable.getShort(btreeTuple.getFieldData(PARTITIONING_NUM_TOKENS_FIELD), btreeTuple.getFieldStart(PARTITIONING_NUM_TOKENS_FIELD));
IInvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
resetInvertedListCursor(btreeTuple, invListCursor);
cursorsOrderedByTokens.add(invListCursor);
invListPartitions.addInvertedListCursor(invListCursor, numTokens);
tokenExists = true;
}
} finally {
ctx.getBtreeCursor().close();
ctx.getBtreeCursor().reset();
}
return tokenExists;
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.
the class InvertedListMerger method merge.
public void merge(ArrayList<IInvertedListCursor> invListCursors, int occurrenceThreshold, int numPrefixLists, SearchResult searchResult) throws HyracksDataException {
Collections.sort(invListCursors);
int numInvLists = invListCursors.size();
SearchResult result = null;
for (int i = 0; i < numInvLists; i++) {
SearchResult swapTemp = prevSearchResult;
prevSearchResult = newSearchResult;
newSearchResult = swapTemp;
newSearchResult.reset();
if (i + 1 != numInvLists) {
// Use temporary search results when not merging last list.
result = newSearchResult;
} else {
// When merging the last list, append results to the final search result.
result = searchResult;
}
IInvertedListCursor invListCursor = invListCursors.get(i);
invListCursor.pinPages();
if (i < numPrefixLists) {
// Merge prefix list.
mergePrefixList(invListCursor, prevSearchResult, result);
} else {
// Merge suffix list.
int numInvListElements = invListCursor.size();
int currentNumResults = prevSearchResult.getNumResults();
// Should we binary search the next list or should we sort-merge it?
if (currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) {
mergeSuffixListProbe(invListCursor, prevSearchResult, result, i, numInvLists, occurrenceThreshold);
} else {
mergeSuffixListScan(invListCursor, prevSearchResult, result, i, numInvLists, occurrenceThreshold);
}
}
invListCursor.unpinPages();
}
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method compareActualAndExpectedIndexes.
/**
* Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
*/
@SuppressWarnings("unchecked")
public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx) throws HyracksDataException {
IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
int tokenFieldCount = invIndex.getTokenTypeTraits().length;
int invListFieldCount = invIndex.getInvListTypeTraits().length;
// All tokens that were inserted into the indexes.
Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
// Search key for finding an inverted-list in the actual index.
ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
ArrayTupleReference searchKey = new ArrayTupleReference();
// Cursor over inverted list from actual index.
IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
// Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
// Includes the token fields.
ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
// Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
int[] fieldPermutation = new int[invListFieldCount];
for (int i = 0; i < fieldPermutation.length; i++) {
fieldPermutation[i] = tokenFieldCount + i;
}
PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
// Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
while (tokensIter.hasNext()) {
Comparable token = tokensIter.next();
// Position inverted-list iterator on expected index.
CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
checkLowKey.appendField(token);
CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
checkHighKey.appendField(token);
SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(testCtx.getCheckTuples(), checkLowKey, checkHighKey);
Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
// Position inverted-list cursor in actual index.
OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
if (actualInvListCursor.size() != expectedInvList.size()) {
fail("Actual and expected inverted lists for token '" + token.toString() + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: " + expectedInvList.size() + ".");
}
// Compare inverted-list elements.
int count = 0;
actualInvListCursor.pinPages();
try {
while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
actualInvListCursor.next();
ITupleReference actual = actualInvListCursor.getTuple();
CheckTuple expected = expectedInvListIter.next();
OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple, fieldSerdes);
expectedTuple.reset(completeExpectedTuple);
if (invListCmp.compare(actual, expectedTuple) != 0) {
fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
}
count++;
}
} finally {
actualInvListCursor.unpinPages();
}
}
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor in project asterixdb by apache.
the class TOccurrenceSearcher method search.
@Override
public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred, IIndexOperationContext ictx) throws HyracksDataException {
tokenizeQuery(searchPred);
int numQueryTokens = queryTokenAppender.getTupleCount();
invListCursors.clear();
invListCursorCache.reset();
for (int i = 0; i < numQueryTokens; i++) {
searchKey.reset(queryTokenAppender, i);
IInvertedListCursor invListCursor = invListCursorCache.getNext();
invIndex.openInvertedListCursor(invListCursor, searchKey, ictx);
invListCursors.add(invListCursor);
}
IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
if (occurrenceThreshold <= 0) {
throw HyracksDataException.create(ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION);
}
int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, invListCursors.size());
searchResult.reset();
invListMerger.merge(invListCursors, occurrenceThreshold, numPrefixLists, searchResult);
resultCursor.open(null, searchPred);
}
Aggregations