use of com.apple.foundationdb.record.RecordCursor in project fdb-record-layer by FoundationDB.
the class IndexingBase method iterateRangeOnly.
/**
* iterate cursor's items and index them.
*
* @param store the record store.
* @param cursor iteration items.
* @param getRecordToIndex function to convert cursor's item to a record that should be indexed (or null, if inapplicable)
* @param nextResultCont when return, if hasMore is true, holds the last cursored result - unprocessed - as a
* continuation item.
* @param hasMore when return, true if the cursor's source is not exhausted (not more items in range).
* @param recordsScanned when return, number of scanned records.
* @param isIdempotent are all the built indexes idempotent
* @param <T> cursor result's type.
*
* @return hasMore, nextResultCont, and recordsScanned.
*/
protected <T> CompletableFuture<Void> iterateRangeOnly(@Nonnull FDBRecordStore store, @Nonnull RecordCursor<T> cursor, @Nonnull BiFunction<FDBRecordStore, RecordCursorResult<T>, CompletableFuture<FDBStoredRecord<Message>>> getRecordToIndex, @Nonnull AtomicReference<RecordCursorResult<T>> nextResultCont, @Nonnull AtomicBoolean hasMore, @Nullable AtomicLong recordsScanned, final boolean isIdempotent) {
final FDBStoreTimer timer = getRunner().getTimer();
final FDBRecordContext context = store.getContext();
// Need to do this each transaction because other index enabled state might have changed. Could cache based on that.
// Copying the state also guards against changes made by other online building from check version.
AtomicLong recordsScannedCounter = new AtomicLong();
final AtomicReference<RecordCursorResult<T>> nextResult = new AtomicReference<>(null);
return AsyncUtil.whileTrue(() -> cursor.onNext().thenCompose(result -> {
RecordCursorResult<T> currResult;
final boolean isExhausted;
if (result.hasNext()) {
// has next, process one previous item (if exists)
currResult = nextResult.get();
nextResult.set(result);
if (currResult == null) {
// that was the first item, nothing to process
return AsyncUtil.READY_TRUE;
}
isExhausted = false;
} else {
// end of the cursor list
timerIncrement(timer, FDBStoreTimer.Counts.ONLINE_INDEX_BUILDER_RANGES_BY_COUNT);
if (!result.getNoNextReason().isSourceExhausted()) {
nextResultCont.set(nextResult.get());
hasMore.set(true);
return AsyncUtil.READY_FALSE;
}
// source is exhausted, fall down to handle the last item and return with hasMore=false
currResult = nextResult.get();
if (currResult == null) {
// there was no data
hasMore.set(false);
return AsyncUtil.READY_FALSE;
}
// here, process the last item and return
nextResult.set(null);
isExhausted = true;
}
// here: currResult must have value
timerIncrement(timer, FDBStoreTimer.Counts.ONLINE_INDEX_BUILDER_RECORDS_SCANNED);
recordsScannedCounter.incrementAndGet();
return getRecordToIndex.apply(store, currResult).thenCompose(rec -> {
if (null == rec) {
if (isExhausted) {
hasMore.set(false);
return AsyncUtil.READY_FALSE;
}
return AsyncUtil.READY_TRUE;
}
// This record should be indexed. Add it to the transaction.
if (isIdempotent) {
store.addRecordReadConflict(rec.getPrimaryKey());
}
timerIncrement(timer, FDBStoreTimer.Counts.ONLINE_INDEX_BUILDER_RECORDS_INDEXED);
final CompletableFuture<Void> updateMaintainer = updateMaintainerBuilder(store, rec);
if (isExhausted) {
// we've just processed the last item
hasMore.set(false);
return updateMaintainer.thenApply(vignore -> false);
}
return updateMaintainer.thenCompose(vignore -> context.getApproximateTransactionSize().thenApply(size -> {
if (size >= common.config.getMaxWriteLimitBytes()) {
// the transaction becomes too big - stop iterating
timerIncrement(timer, FDBStoreTimer.Counts.ONLINE_INDEX_BUILDER_RANGES_BY_SIZE);
nextResultCont.set(nextResult.get());
hasMore.set(true);
return false;
}
return true;
}));
});
}), cursor.getExecutor()).thenApply(vignore -> {
long recordsScannedInTransaction = recordsScannedCounter.get();
if (recordsScanned != null) {
recordsScanned.addAndGet(recordsScannedInTransaction);
}
if (common.isTrackProgress()) {
for (Index index : common.getTargetIndexes()) {
final Subspace scannedRecordsSubspace = indexBuildScannedRecordsSubspace(store, index);
store.context.ensureActive().mutate(MutationType.ADD, scannedRecordsSubspace.getKey(), FDBRecordStore.encodeRecordCount(recordsScannedInTransaction));
}
}
return null;
});
}
use of com.apple.foundationdb.record.RecordCursor in project fdb-record-layer by FoundationDB.
the class IndexingByIndex method buildRangeOnly.
@Nonnull
private CompletableFuture<Boolean> buildRangeOnly(@Nonnull FDBRecordStore store, byte[] startBytes, byte[] endBytes, @Nonnull AtomicLong recordsScanned) {
// return false when done
validateSameMetadataOrThrow(store);
final Index index = common.getIndex();
final IndexMaintainer maintainer = store.getIndexMaintainer(index);
// idempotence - We could have verified it at the first iteration only, but the repeating checks seem harmless
validateOrThrowEx(maintainer.isIdempotent(), "target index is not idempotent");
// readability - This method shouldn't block if one has already opened the record store (as we did)
Index srcIndex = getSourceIndex(store.getRecordMetaData());
validateOrThrowEx(store.isIndexReadable(srcIndex), "source index is not readable");
RangeSet rangeSet = new RangeSet(store.indexRangeSubspace(index));
AsyncIterator<Range> ranges = rangeSet.missingRanges(store.ensureContextActive(), startBytes, endBytes).iterator();
final ExecuteProperties.Builder executeProperties = ExecuteProperties.newBuilder().setIsolationLevel(IsolationLevel.SNAPSHOT).setReturnedRowLimit(// respect limit in this path; +1 allows a continuation item
getLimit() + 1);
final ScanProperties scanProperties = new ScanProperties(executeProperties.build());
return ranges.onHasNext().thenCompose(hasNext -> {
if (Boolean.FALSE.equals(hasNext)) {
// no more missing ranges - all done
return AsyncUtil.READY_FALSE;
}
final Range range = ranges.next();
final Tuple rangeStart = RangeSet.isFirstKey(range.begin) ? null : Tuple.fromBytes(range.begin);
final Tuple rangeEnd = RangeSet.isFinalKey(range.end) ? null : Tuple.fromBytes(range.end);
final TupleRange tupleRange = TupleRange.between(rangeStart, rangeEnd);
RecordCursor<FDBIndexedRecord<Message>> cursor = store.scanIndexRecords(srcIndex.getName(), IndexScanType.BY_VALUE, tupleRange, null, scanProperties);
final AtomicReference<RecordCursorResult<FDBIndexedRecord<Message>>> lastResult = new AtomicReference<>(RecordCursorResult.exhausted());
final AtomicBoolean hasMore = new AtomicBoolean(true);
// Note that currently indexing by index is online implemented for idempotent indexes
final boolean isIdempotent = true;
return iterateRangeOnly(store, cursor, this::getRecordIfTypeMatch, lastResult, hasMore, recordsScanned, isIdempotent).thenApply(vignore -> hasMore.get() ? lastResult.get().get().getIndexEntry().getKey() : rangeEnd).thenCompose(cont -> rangeSet.insertRange(store.ensureContextActive(), packOrNull(rangeStart), packOrNull(cont), true).thenApply(ignore -> !Objects.equals(cont, rangeEnd)));
});
}
use of com.apple.foundationdb.record.RecordCursor in project fdb-record-layer by FoundationDB.
the class IndexingMultiTargetByRecords method buildRangeOnly.
@Nonnull
private CompletableFuture<Boolean> buildRangeOnly(@Nonnull FDBRecordStore store, byte[] startBytes, byte[] endBytes, @Nonnull AtomicLong recordsScanned) {
// return false when done
/* Multi target consistency:
* 1. Identify missing ranges from only the first index
* 2. Update all indexes' range sets as the indexes are built - each inserted range is validated as empty.
* 3. While each index as readable, we validate that its range is completely built.
*/
validateSameMetadataOrThrow(store);
RangeSet rangeSet = new RangeSet(store.indexRangeSubspace(common.getPrimaryIndex()));
AsyncIterator<Range> ranges = rangeSet.missingRanges(store.ensureContextActive(), startBytes, endBytes).iterator();
final List<Index> targetIndexes = common.getTargetIndexes();
final List<RangeSet> targetRangeSets = targetIndexes.stream().map(targetIndex -> new RangeSet(store.indexRangeSubspace(targetIndex))).collect(Collectors.toList());
final boolean isIdempotent = areTheyAllIdempotent(store, targetIndexes);
final IsolationLevel isolationLevel = isIdempotent ? IsolationLevel.SNAPSHOT : IsolationLevel.SERIALIZABLE;
final ExecuteProperties.Builder executeProperties = ExecuteProperties.newBuilder().setIsolationLevel(isolationLevel).setReturnedRowLimit(// always respect limit in this path; +1 allows a continuation item
getLimit() + 1);
final ScanProperties scanProperties = new ScanProperties(executeProperties.build());
return ranges.onHasNext().thenCompose(hasNext -> {
if (Boolean.FALSE.equals(hasNext)) {
// no more missing ranges - all done
return AsyncUtil.READY_FALSE;
}
final Range range = ranges.next();
final Tuple rangeStart = RangeSet.isFirstKey(range.begin) ? null : Tuple.fromBytes(range.begin);
final Tuple rangeEnd = RangeSet.isFinalKey(range.end) ? null : Tuple.fromBytes(range.end);
final TupleRange tupleRange = TupleRange.between(rangeStart, rangeEnd);
RecordCursor<FDBStoredRecord<Message>> cursor = store.scanRecords(tupleRange, null, scanProperties);
final AtomicReference<RecordCursorResult<FDBStoredRecord<Message>>> lastResult = new AtomicReference<>(RecordCursorResult.exhausted());
final AtomicBoolean hasMore = new AtomicBoolean(true);
return iterateRangeOnly(store, cursor, this::getRecordIfTypeMatch, lastResult, hasMore, recordsScanned, isIdempotent).thenApply(vignore -> hasMore.get() ? lastResult.get().get().getPrimaryKey() : rangeEnd).thenCompose(cont -> insertRanges(store.ensureContextActive(), targetRangeSets, packOrNull(rangeStart), packOrNull(cont)).thenApply(ignore -> !Objects.equals(cont, rangeEnd)));
});
}
use of com.apple.foundationdb.record.RecordCursor in project fdb-record-layer by FoundationDB.
the class IndexingScrubDangling method scrubIndexRangeOnly.
@Nonnull
private CompletableFuture<Boolean> scrubIndexRangeOnly(@Nonnull FDBRecordStore store, byte[] startBytes, byte[] endBytes, @Nonnull AtomicLong recordsScanned) {
// return false when done
Index index = common.getIndex();
final RecordMetaData metaData = store.getRecordMetaData();
final RecordMetaDataProvider recordMetaDataProvider = common.getRecordStoreBuilder().getMetaDataProvider();
if (recordMetaDataProvider == null || !metaData.equals(recordMetaDataProvider.getRecordMetaData())) {
throw new MetaDataException("Store does not have the same metadata");
}
final IndexMaintainer maintainer = store.getIndexMaintainer(index);
// scrubbing only readable, VALUE, idempotence indexes (at least for now)
validateOrThrowEx(maintainer.isIdempotent(), "scrubbed index is not idempotent");
validateOrThrowEx(index.getType().equals(IndexTypes.VALUE) || scrubbingPolicy.ignoreIndexTypeCheck(), "scrubbed index is not a VALUE index");
validateOrThrowEx(store.getIndexState(index) == IndexState.READABLE, "scrubbed index is not readable");
RangeSet rangeSet = new RangeSet(indexScrubIndexRangeSubspace(store, index));
AsyncIterator<Range> ranges = rangeSet.missingRanges(store.ensureContextActive(), startBytes, endBytes).iterator();
final ExecuteProperties.Builder executeProperties = ExecuteProperties.newBuilder().setIsolationLevel(IsolationLevel.SNAPSHOT).setReturnedRowLimit(// always respectLimit in this path; +1 allows a continuation item
getLimit() + 1);
final ScanProperties scanProperties = new ScanProperties(executeProperties.build());
return ranges.onHasNext().thenCompose(hasNext -> {
if (Boolean.FALSE.equals(hasNext)) {
// Here: no more missing ranges - all done
// To avoid stale metadata, we'll keep the scrubbed-ranges indicator empty until the next scrub call.
Transaction tr = store.getContext().ensureActive();
tr.clear(indexScrubIndexRangeSubspace(store, index).range());
return AsyncUtil.READY_FALSE;
}
final Range range = ranges.next();
final Tuple rangeStart = RangeSet.isFirstKey(range.begin) ? null : Tuple.fromBytes(range.begin);
final Tuple rangeEnd = RangeSet.isFinalKey(range.end) ? null : Tuple.fromBytes(range.end);
final TupleRange tupleRange = TupleRange.between(rangeStart, rangeEnd);
RecordCursor<FDBIndexedRecord<Message>> cursor = store.scanIndexRecords(index.getName(), IndexScanType.BY_VALUE, tupleRange, null, IndexOrphanBehavior.RETURN, scanProperties);
final AtomicBoolean hasMore = new AtomicBoolean(true);
final AtomicReference<RecordCursorResult<FDBIndexedRecord<Message>>> lastResult = new AtomicReference<>(RecordCursorResult.exhausted());
final long scanLimit = scrubbingPolicy.getEntriesScanLimit();
return iterateRangeOnly(store, cursor, this::deleteIndexIfDangling, lastResult, hasMore, recordsScanned, true).thenApply(vignore -> hasMore.get() ? lastResult.get().get().getIndexEntry().getKey() : rangeEnd).thenCompose(cont -> rangeSet.insertRange(store.ensureContextActive(), packOrNull(rangeStart), packOrNull(cont), true).thenApply(ignore -> {
if (scanLimit > 0) {
scanCounter += recordsScanned.get();
if (scanLimit <= scanCounter) {
return false;
}
}
return !Objects.equals(cont, rangeEnd);
}));
});
}
use of com.apple.foundationdb.record.RecordCursor in project fdb-record-layer by FoundationDB.
the class TextScan method scan.
@Nonnull
// try-with-resources - the two cursors returned cannot be closed because they are wrapped and returned
@SuppressWarnings("squid:S2095")
private <M extends Message> RecordCursor<IndexEntry> scan(@Nonnull FDBRecordStoreBase<M> store, @Nonnull EvaluationContext context, @Nullable Tuple prefix, @Nullable TupleRange suffix, @Nonnull Index index, @Nonnull List<String> tokenList, @Nullable byte[] continuation, @Nonnull ScanProperties scanProperties) {
if (tokenList.isEmpty()) {
return RecordCursor.empty();
}
final int prefixEntries = 1 + (prefix != null ? prefix.size() : 0);
final Comparisons.Type comparisonType = textComparison.getType();
if (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_PREFIX) || (tokenList.size() == 1 && (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_ALL_PREFIXES) || comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_ANY_PREFIX)))) {
if (tokenList.size() != 1) {
throw new RecordCoreException("text prefix comparison included " + tokenList.size() + " comparands instead of one");
}
return scanTokenPrefix(store, tokenList.get(0), prefix, suffix, index, scanProperties).apply(continuation);
} else if (tokenList.size() == 1) {
// is necessary, not just nice to have.
return scanToken(store, tokenList.get(0), prefix, suffix, index, scanProperties).apply(continuation);
} else if (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_ALL)) {
// Take the intersection of all children. Note that to handle skip and the returned row limit correctly,
// the skip and limit are both removed and then applied later.
final ScanProperties childScanProperties = scanProperties.with(ExecuteProperties::clearSkipAndLimit);
List<Function<byte[], RecordCursor<IndexEntry>>> intersectionChildren = tokenList.stream().map(token -> scanToken(store, token, prefix, suffix, index, childScanProperties)).collect(Collectors.toList());
return IntersectionCursor.create(suffixComparisonKeyFunction(prefixEntries), scanProperties.isReverse(), intersectionChildren, continuation, store.getTimer()).skip(scanProperties.getExecuteProperties().getSkip()).limitRowsTo(scanProperties.getExecuteProperties().getReturnedRowLimit());
} else if (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_ALL_PREFIXES)) {
final Comparisons.TextContainsAllPrefixesComparison allPrefixesComparison = (Comparisons.TextContainsAllPrefixesComparison) textComparison;
final ScanProperties childScanProperties = scanProperties.with(ExecuteProperties::clearSkipAndLimit);
List<Function<byte[], RecordCursor<IndexEntry>>> intersectionChildren = tokenList.stream().map(token -> scanTokenPrefix(store, token, prefix, suffix, index, childScanProperties)).collect(Collectors.toList());
return ProbableIntersectionCursor.create(suffixComparisonKeyFunction(prefixEntries), intersectionChildren, allPrefixesComparison.getExpectedRecords(), allPrefixesComparison.getFalsePositivePercentage(), continuation, store.getTimer()).skip(scanProperties.getExecuteProperties().getSkip()).limitRowsTo(scanProperties.getExecuteProperties().getReturnedRowLimit());
} else if (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_ANY)) {
// Take the union of all children. Note that to handle skip and the returned row limit correctly,
// the skip is removed from the children and applied to the returned cursor. Also, the limit
// is adjusted upwards and then must be applied again to returned union.
final ScanProperties childScanProperties = scanProperties.with(ExecuteProperties::clearSkipAndAdjustLimit);
List<Function<byte[], RecordCursor<IndexEntry>>> unionChildren = tokenList.stream().map(token -> scanToken(store, token, prefix, suffix, index, childScanProperties)).collect(Collectors.toList());
return UnionCursor.create(suffixComparisonKeyFunction(prefixEntries), scanProperties.isReverse(), unionChildren, continuation, store.getTimer()).skip(scanProperties.getExecuteProperties().getSkip()).limitRowsTo(scanProperties.getExecuteProperties().getReturnedRowLimit());
} else if (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_ANY_PREFIX)) {
final ScanProperties childScanProperties = scanProperties.with(ExecuteProperties::clearSkipAndAdjustLimit);
List<Function<byte[], RecordCursor<IndexEntry>>> unionChildren = tokenList.stream().map(token -> scanTokenPrefix(store, token, prefix, suffix, index, childScanProperties)).collect(Collectors.toList());
return UnorderedUnionCursor.create(unionChildren, continuation, store.getTimer()).skip(scanProperties.getExecuteProperties().getSkip()).limitRowsTo(scanProperties.getExecuteProperties().getReturnedRowLimit());
} else {
// Apply the filter based on the position lists
final Function<List<IndexEntry>, Boolean> predicate;
if (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_ALL_WITHIN) && textComparison instanceof Comparisons.TextWithMaxDistanceComparison) {
int maxDistance = ((Comparisons.TextWithMaxDistanceComparison) textComparison).getMaxDistance();
predicate = entries -> entriesContainAllWithin(entries, maxDistance);
} else if (comparisonType.equals(Comparisons.Type.TEXT_CONTAINS_PHRASE)) {
List<String> tokensWithStopWords = getTokenList(store, context, false);
predicate = entries -> entriesContainPhrase(entries, tokensWithStopWords);
} else {
throw new RecordCoreException("unsupported comparison type for text query: " + comparisonType);
}
// It's either TEXT_CONTAINS_ALL_WITHIN_DISTANCE or TEXT_CONTAINS_PHRASE. In any case, we need to scan
// all tokens, intersect, and then apply a filter on the returned list.
final ScanProperties childScanProperties = scanProperties.with(ExecuteProperties::clearSkipAndLimit);
List<Function<byte[], RecordCursor<IndexEntry>>> intersectionChildren = tokenList.stream().map(token -> scanToken(store, token, prefix, suffix, index, childScanProperties)).collect(Collectors.toList());
final RecordCursor<List<IndexEntry>> intersectionCursor = IntersectionMultiCursor.create(suffixComparisonKeyFunction(prefixEntries), scanProperties.isReverse(), intersectionChildren, continuation, store.getTimer());
return intersectionCursor.filterInstrumented(predicate, store.getTimer(), inCounts, duringEvents, successCounts, failureCounts).map(indexEntries -> indexEntries.get(0)).skip(scanProperties.getExecuteProperties().getSkip()).limitRowsTo(scanProperties.getExecuteProperties().getReturnedRowLimit());
}
}
Aggregations