use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex in project asterixdb by apache.
the class LSMInvertedIndexTestContext method getComparatorFactories.
@Override
public IBinaryComparatorFactory[] getComparatorFactories() {
if (allCmpFactories == null) {
// Concatenate token and inv-list comparators.
IInvertedIndex invIndex = (IInvertedIndex) index;
IBinaryComparatorFactory[] tokenCmpFactories = invIndex.getTokenCmpFactories();
IBinaryComparatorFactory[] invListCmpFactories = invIndex.getInvListCmpFactories();
int totalCmpCount = tokenCmpFactories.length + invListCmpFactories.length;
allCmpFactories = new IBinaryComparatorFactory[totalCmpCount];
for (int i = 0; i < tokenCmpFactories.length; i++) {
allCmpFactories[i] = tokenCmpFactories[i];
}
for (int i = 0; i < invListCmpFactories.length; i++) {
allCmpFactories[i + tokenCmpFactories.length] = invListCmpFactories[i];
}
}
return allCmpFactories;
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method compareActualAndExpectedIndexes.
/**
* Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
*/
@SuppressWarnings("unchecked")
public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx) throws HyracksDataException {
IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
int tokenFieldCount = invIndex.getTokenTypeTraits().length;
int invListFieldCount = invIndex.getInvListTypeTraits().length;
// All tokens that were inserted into the indexes.
Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
// Search key for finding an inverted-list in the actual index.
ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
ArrayTupleReference searchKey = new ArrayTupleReference();
// Cursor over inverted list from actual index.
IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
// Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
// Includes the token fields.
ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
// Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
int[] fieldPermutation = new int[invListFieldCount];
for (int i = 0; i < fieldPermutation.length; i++) {
fieldPermutation[i] = tokenFieldCount + i;
}
PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
// Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
while (tokensIter.hasNext()) {
Comparable token = tokensIter.next();
// Position inverted-list iterator on expected index.
CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
checkLowKey.appendField(token);
CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
checkHighKey.appendField(token);
SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(testCtx.getCheckTuples(), checkLowKey, checkHighKey);
Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
// Position inverted-list cursor in actual index.
OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
if (actualInvListCursor.size() != expectedInvList.size()) {
fail("Actual and expected inverted lists for token '" + token.toString() + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: " + expectedInvList.size() + ".");
}
// Compare inverted-list elements.
int count = 0;
actualInvListCursor.pinPages();
try {
while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
actualInvListCursor.next();
ITupleReference actual = actualInvListCursor.getTuple();
CheckTuple expected = expectedInvListIter.next();
OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple, fieldSerdes);
expectedTuple.reset(completeExpectedTuple);
if (invListCmp.compare(actual, expectedTuple) != 0) {
fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
}
count++;
}
} finally {
actualInvListCursor.unpinPages();
}
}
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method compareActualAndExpectedIndexesRangeSearch.
/**
* Compares actual and expected indexes using the rangeSearch() method of the inverted-index accessor.
*/
public static void compareActualAndExpectedIndexesRangeSearch(LSMInvertedIndexTestContext testCtx) throws HyracksDataException {
IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
int tokenFieldCount = invIndex.getTokenTypeTraits().length;
int invListFieldCount = invIndex.getInvListTypeTraits().length;
IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) invIndex.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
IIndexCursor invIndexCursor = invIndexAccessor.createRangeSearchCursor();
MultiComparator tokenCmp = MultiComparator.create(invIndex.getTokenCmpFactories());
IBinaryComparatorFactory[] tupleCmpFactories = new IBinaryComparatorFactory[tokenFieldCount + invListFieldCount];
for (int i = 0; i < tokenFieldCount; i++) {
tupleCmpFactories[i] = invIndex.getTokenCmpFactories()[i];
}
for (int i = 0; i < invListFieldCount; i++) {
tupleCmpFactories[tokenFieldCount + i] = invIndex.getInvListCmpFactories()[i];
}
MultiComparator tupleCmp = MultiComparator.create(tupleCmpFactories);
RangePredicate nullPred = new RangePredicate(null, null, true, true, tokenCmp, tokenCmp);
invIndexAccessor.rangeSearch(invIndexCursor, nullPred);
// Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
ArrayTupleReference expectedTuple = new ArrayTupleReference();
Iterator<CheckTuple> expectedIter = testCtx.getCheckTuples().iterator();
// Compare index elements.
try {
while (invIndexCursor.hasNext() && expectedIter.hasNext()) {
invIndexCursor.next();
ITupleReference actualTuple = invIndexCursor.getTuple();
CheckTuple expected = expectedIter.next();
OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, expectedTuple, fieldSerdes);
if (tupleCmp.compare(actualTuple, expectedTuple) != 0) {
fail("Index entries differ for token '" + expected.getField(0) + "'.");
}
}
if (expectedIter.hasNext()) {
fail("Indexes do not match. Actual index is missing entries.");
}
if (invIndexCursor.hasNext()) {
fail("Indexes do not match. Actual index contains too many entries.");
}
} finally {
invIndexCursor.close();
}
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method testIndexSearch.
public static void testIndexSearch(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, Random rnd, int numDocQueries, int numRandomQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray) throws IOException, HyracksDataException {
IInvertedIndex invIndex = testCtx.invIndex;
IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
IBinaryTokenizer tokenizer = testCtx.getTokenizerFactory().createTokenizer();
InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
// Project away the primary-key field.
int[] fieldPermutation = new int[] { 0 };
PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
int numQueries = numDocQueries + numRandomQueries;
for (int i = 0; i < numQueries; i++) {
// If number of documents in the corpus is less than numDocQueries, then replace the remaining ones with random queries.
if (i >= numDocQueries || i >= documentCorpus.size()) {
// Generate a random query.
ITupleReference randomQuery = tupleGen.next();
searchDocument.reset(randomQuery);
} else {
// Pick a random document from the corpus to use as the search query.
int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
searchDocument.reset(documentCorpus.get(queryIndex));
}
// Set query tuple in search predicate.
searchPred.setQueryTuple(searchDocument);
searchPred.setQueryFieldIndex(0);
IIndexCursor resultCursor = accessor.createSearchCursor(false);
boolean panic = false;
try {
accessor.search(resultCursor, searchPred);
} catch (HyracksDataException e) {
// ignore panic queries.
if (e.getErrorCode() == ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION) {
panic = true;
} else {
throw e;
}
}
try {
if (!panic) {
// Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
ArrayList<Integer> actualResults = new ArrayList<>();
try {
while (resultCursor.hasNext()) {
resultCursor.next();
ITupleReference resultTuple = resultCursor.getTuple();
int actual = IntegerPointable.getInteger(resultTuple.getFieldData(0), resultTuple.getFieldStart(0));
actualResults.add(Integer.valueOf(actual));
}
} catch (HyracksDataException e) {
if (e.getErrorCode() == ErrorCode.OCCURRENCE_THRESHOLD_PANIC_EXCEPTION) {
// Ignore panic queries.
continue;
} else {
throw e;
}
}
Collections.sort(actualResults);
// Get expected results.
List<Integer> expectedResults = new ArrayList<>();
LSMInvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(), searchDocument, tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults, testCtx.getInvertedIndexType());
Iterator<Integer> expectedIter = expectedResults.iterator();
Iterator<Integer> actualIter = actualResults.iterator();
while (expectedIter.hasNext() && actualIter.hasNext()) {
int expected = expectedIter.next();
int actual = actualIter.next();
if (actual != expected) {
fail("Query results do not match. Encountered: " + actual + ". Expected: " + expected + "");
}
}
if (expectedIter.hasNext()) {
fail("Query results do not match. Actual results missing.");
}
if (actualIter.hasNext()) {
fail("Query results do not match. Actual contains too many results.");
}
}
} finally {
resultCursor.close();
}
}
}
use of org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex in project asterixdb by apache.
the class LSMInvertedIndexTestContext method create.
public static LSMInvertedIndexTestContext create(LSMInvertedIndexTestHarness harness, ISerializerDeserializer[] fieldSerdes, int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType, int[] invertedIndexFields, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories, int[] filterFields, int[] filterFieldsForNonBulkLoadOps, int[] invertedIndexFieldsForNonBulkLoadOps) throws HyracksDataException {
ITypeTraits[] allTypeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IOManager ioManager = harness.getIOManager();
IBinaryComparatorFactory[] allCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, fieldSerdes.length);
// Set token type traits and comparators.
ITypeTraits[] tokenTypeTraits = new ITypeTraits[tokenFieldCount];
IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[tokenFieldCount];
for (int i = 0; i < tokenTypeTraits.length; i++) {
tokenTypeTraits[i] = allTypeTraits[i];
tokenCmpFactories[i] = allCmpFactories[i];
}
// Set inverted-list element type traits and comparators.
int invListFieldCount = fieldSerdes.length - tokenFieldCount;
ITypeTraits[] invListTypeTraits = new ITypeTraits[invListFieldCount];
IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[invListFieldCount];
for (int i = 0; i < invListTypeTraits.length; i++) {
invListTypeTraits[i] = allTypeTraits[i + tokenFieldCount];
invListCmpFactories[i] = allCmpFactories[i + tokenFieldCount];
}
// Create index and test context.
IInvertedIndex invIndex;
assert harness.getVirtualBufferCaches().size() > 0;
switch(invIndexType) {
case INMEMORY:
{
invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getVirtualBufferCaches().get(0), new VirtualFreePageManager(harness.getVirtualBufferCaches().get(0)), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, ioManager.resolveAbsolutePath(harness.getOnDiskDir()));
break;
}
case PARTITIONED_INMEMORY:
{
invIndex = InvertedIndexUtils.createPartitionedInMemoryBTreeInvertedindex(harness.getVirtualBufferCaches().get(0), new VirtualFreePageManager(harness.getVirtualBufferCaches().get(0)), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, ioManager.resolveAbsolutePath(harness.getOnDiskDir()));
break;
}
case ONDISK:
{
invIndex = InvertedIndexUtils.createOnDiskInvertedIndex(ioManager, harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, harness.getInvListsFileRef(), harness.getMetadataPageManagerFactory());
break;
}
case PARTITIONED_ONDISK:
{
invIndex = InvertedIndexUtils.createPartitionedOnDiskInvertedIndex(ioManager, harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, harness.getInvListsFileRef(), harness.getMetadataPageManagerFactory());
break;
}
case LSM:
{
invIndex = InvertedIndexUtils.createLSMInvertedIndex(ioManager, harness.getVirtualBufferCaches(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, harness.getDiskBufferCache(), harness.getOnDiskDir(), harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback(), invertedIndexFields, filterTypeTraits, filterCmpFactories, filterFields, filterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, true, harness.getMetadataPageManagerFactory());
break;
}
case PARTITIONED_LSM:
{
invIndex = InvertedIndexUtils.createPartitionedLSMInvertedIndex(ioManager, harness.getVirtualBufferCaches(), harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, harness.getDiskBufferCache(), harness.getOnDiskDir(), harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback(), invertedIndexFields, filterTypeTraits, filterCmpFactories, filterFields, filterFieldsForNonBulkLoadOps, invertedIndexFieldsForNonBulkLoadOps, true, harness.getMetadataPageManagerFactory());
break;
}
default:
{
throw HyracksDataException.create(ErrorCode.UNKNOWN_INVERTED_INDEX_TYPE, invIndexType);
}
}
InvertedIndexTokenizingTupleIterator indexTupleIter = null;
switch(invIndexType) {
case INMEMORY:
case ONDISK:
case LSM:
{
indexTupleIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
break;
}
case PARTITIONED_INMEMORY:
case PARTITIONED_ONDISK:
case PARTITIONED_LSM:
{
indexTupleIter = new PartitionedInvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
break;
}
default:
{
throw HyracksDataException.create(ErrorCode.UNKNOWN_INVERTED_INDEX_TYPE, invIndexType);
}
}
LSMInvertedIndexTestContext testCtx = new LSMInvertedIndexTestContext(fieldSerdes, invIndex, tokenizerFactory, invIndexType, indexTupleIter);
return testCtx;
}
Aggregations