use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.
the class LSMInvertedIndex method flush.
@Override
public ILSMDiskComponent flush(ILSMIOOperation operation) throws HyracksDataException {
LSMInvertedIndexFlushOperation flushOp = (LSMInvertedIndexFlushOperation) operation;
// Create an inverted index instance to be bulk loaded.
LSMInvertedIndexDiskComponent component = createDiskInvIndexComponent(componentFactory, flushOp.getTarget(), flushOp.getDeletedKeysBTreeTarget(), flushOp.getBloomFilterTarget(), true);
// Create a scan cursor on the BTree underlying the in-memory inverted index.
LSMInvertedIndexMemoryComponent flushingComponent = (LSMInvertedIndexMemoryComponent) flushOp.getFlushingComponent();
RangePredicate nullPred = new RangePredicate(null, null, true, true, null, null);
// Search the deleted keys BTree to calculate the number of elements for BloomFilter
IIndexAccessor deletedKeysBTreeAccessor = flushingComponent.getDeletedKeysBTree().createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
IIndexCursor btreeCountingCursor = ((BTreeAccessor) deletedKeysBTreeAccessor).createCountingSearchCursor();
deletedKeysBTreeAccessor.search(btreeCountingCursor, nullPred);
long numBTreeTuples = 0L;
try {
while (btreeCountingCursor.hasNext()) {
btreeCountingCursor.next();
ITupleReference countTuple = btreeCountingCursor.getTuple();
numBTreeTuples = IntegerPointable.getInteger(countTuple.getFieldData(0), countTuple.getFieldStart(0));
}
} finally {
btreeCountingCursor.close();
}
ILSMDiskComponentBulkLoader componentBulkLoader = createComponentBulkLoader(component, 1.0f, false, numBTreeTuples, false, false);
// Create a scan cursor on the deleted keys BTree underlying the in-memory inverted index.
IIndexCursor deletedKeysScanCursor = deletedKeysBTreeAccessor.createSearchCursor(false);
deletedKeysBTreeAccessor.search(deletedKeysScanCursor, nullPred);
try {
while (deletedKeysScanCursor.hasNext()) {
deletedKeysScanCursor.next();
((LSMInvertedIndexDiskComponentBulkLoader) componentBulkLoader).delete(deletedKeysScanCursor.getTuple());
}
} finally {
deletedKeysScanCursor.close();
}
// Scan the in-memory inverted index
InMemoryInvertedIndexAccessor memInvIndexAccessor = (InMemoryInvertedIndexAccessor) flushingComponent.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
BTreeAccessor memBTreeAccessor = memInvIndexAccessor.getBTreeAccessor();
IIndexCursor scanCursor = memBTreeAccessor.createSearchCursor(false);
memBTreeAccessor.search(scanCursor, nullPred);
// Bulk load the disk inverted index from the in-memory inverted index.
try {
while (scanCursor.hasNext()) {
scanCursor.next();
componentBulkLoader.add(scanCursor.getTuple());
}
} finally {
scanCursor.close();
}
if (component.getLSMComponentFilter() != null) {
List<ITupleReference> filterTuples = new ArrayList<>();
filterTuples.add(flushingComponent.getLSMComponentFilter().getMinTuple());
filterTuples.add(flushingComponent.getLSMComponentFilter().getMaxTuple());
filterManager.updateFilter(component.getLSMComponentFilter(), filterTuples);
filterManager.writeFilter(component.getLSMComponentFilter(), ((OnDiskInvertedIndex) component.getInvIndex()).getBTree());
}
flushingComponent.getMetadata().copy(component.getMetadata());
componentBulkLoader.end();
return component;
}
use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.
the class InMemoryInvertedIndex method delete.
public void delete(ITupleReference tuple, BTreeAccessor btreeAccessor, IIndexOperationContext ictx) throws HyracksDataException {
InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
ctx.getTupleIter().reset(tuple);
while (ctx.getTupleIter().hasNext()) {
ctx.getTupleIter().next();
ITupleReference deleteTuple = ctx.getTupleIter().getTuple();
try {
btreeAccessor.delete(deleteTuple);
} catch (HyracksDataException e) {
if (e.getErrorCode() != ErrorCode.UPDATE_OR_DELETE_NON_EXISTENT_KEY) {
// Ignore this exception, since a document may have duplicate tokens.
throw e;
}
}
}
}
use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.
the class InMemoryInvertedListCursor method printInvList.
@SuppressWarnings("rawtypes")
@Override
public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
StringBuilder strBuilder = new StringBuilder();
try {
while (btreeCursor.hasNext()) {
btreeCursor.next();
ITupleReference tuple = btreeCursor.getTuple();
ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(1), tuple.getFieldStart(1), tuple.getFieldLength(1));
DataInput dataIn = new DataInputStream(inStream);
Object o = serdes[0].deserialize(dataIn);
strBuilder.append(o.toString() + " ");
}
} finally {
btreeCursor.close();
btreeCursor.reset();
}
btreeAccessor.search(btreeCursor, btreePred);
return strBuilder.toString();
}
use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.
the class OnDiskInvertedIndex method validate.
@Override
public void validate() throws HyracksDataException {
btree.validate();
// Scan the btree and validate the order of elements in each inverted-list.
IIndexAccessor btreeAccessor = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
IIndexCursor btreeCursor = btreeAccessor.createSearchCursor(false);
MultiComparator btreeCmp = MultiComparator.create(btree.getComparatorFactories());
RangePredicate rangePred = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
int[] fieldPermutation = new int[tokenTypeTraits.length];
for (int i = 0; i < tokenTypeTraits.length; i++) {
fieldPermutation[i] = i;
}
PermutingTupleReference tokenTuple = new PermutingTupleReference(fieldPermutation);
IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
IInvertedListCursor invListCursor = invIndexAccessor.createInvertedListCursor();
MultiComparator invListCmp = MultiComparator.create(invListCmpFactories);
try {
// Search key for finding an inverted-list in the actual index.
ArrayTupleBuilder prevBuilder = new ArrayTupleBuilder(invListTypeTraits.length);
ArrayTupleReference prevTuple = new ArrayTupleReference();
btreeAccessor.search(btreeCursor, rangePred);
while (btreeCursor.hasNext()) {
btreeCursor.next();
tokenTuple.reset(btreeCursor.getTuple());
// Validate inverted list by checking that the elements are totally ordered.
invIndexAccessor.openInvertedListCursor(invListCursor, tokenTuple);
invListCursor.pinPages();
try {
if (invListCursor.hasNext()) {
invListCursor.next();
ITupleReference invListElement = invListCursor.getTuple();
// Initialize prev tuple.
TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
}
while (invListCursor.hasNext()) {
invListCursor.next();
ITupleReference invListElement = invListCursor.getTuple();
// Compare with previous element.
if (invListCmp.compare(invListElement, prevTuple) <= 0) {
throw new HyracksDataException("Index validation failed.");
}
// Set new prevTuple.
TupleUtils.copyTuple(prevBuilder, invListElement, invListElement.getFieldCount());
prevTuple.reset(prevBuilder.getFieldEndOffsets(), prevBuilder.getByteArray());
}
} finally {
invListCursor.unpinPages();
}
}
} finally {
btreeCursor.close();
}
}
use of org.apache.hyracks.dataflow.common.data.accessors.ITupleReference in project asterixdb by apache.
the class PartitionedOnDiskInvertedIndex method openInvertedListPartitionCursors.
@Override
public boolean openInvertedListPartitionCursors(IInvertedIndexSearcher searcher, IIndexOperationContext ictx, short numTokensLowerBound, short numTokensUpperBound, InvertedListPartitions invListPartitions, List<IInvertedListCursor> cursorsOrderedByTokens) throws HyracksDataException {
PartitionedTOccurrenceSearcher partSearcher = (PartitionedTOccurrenceSearcher) searcher;
OnDiskInvertedIndexOpContext ctx = (OnDiskInvertedIndexOpContext) ictx;
ITupleReference lowSearchKey = null;
ITupleReference highSearchKey = null;
partSearcher.setNumTokensBoundsInSearchKeys(numTokensLowerBound, numTokensUpperBound);
if (numTokensLowerBound < 0) {
ctx.getBtreePred().setLowKeyComparator(ctx.getPrefixSearchCmp());
lowSearchKey = partSearcher.getPrefixSearchKey();
} else {
ctx.getBtreePred().setLowKeyComparator(ctx.getSearchCmp());
lowSearchKey = partSearcher.getFullLowSearchKey();
}
if (numTokensUpperBound < 0) {
ctx.getBtreePred().setHighKeyComparator(ctx.getPrefixSearchCmp());
highSearchKey = partSearcher.getPrefixSearchKey();
} else {
ctx.getBtreePred().setHighKeyComparator(ctx.getSearchCmp());
highSearchKey = partSearcher.getFullHighSearchKey();
}
ctx.getBtreePred().setLowKey(lowSearchKey, true);
ctx.getBtreePred().setHighKey(highSearchKey, true);
ctx.getBtreeAccessor().search(ctx.getBtreeCursor(), ctx.getBtreePred());
boolean tokenExists = false;
try {
while (ctx.getBtreeCursor().hasNext()) {
ctx.getBtreeCursor().next();
ITupleReference btreeTuple = ctx.getBtreeCursor().getTuple();
short numTokens = ShortPointable.getShort(btreeTuple.getFieldData(PARTITIONING_NUM_TOKENS_FIELD), btreeTuple.getFieldStart(PARTITIONING_NUM_TOKENS_FIELD));
IInvertedListCursor invListCursor = partSearcher.getCachedInvertedListCursor();
resetInvertedListCursor(btreeTuple, invListCursor);
cursorsOrderedByTokens.add(invListCursor);
invListPartitions.addInvertedListCursor(invListCursor, numTokens);
tokenExists = true;
}
} finally {
ctx.getBtreeCursor().close();
ctx.getBtreeCursor().reset();
}
return tokenExists;
}
Aggregations