use of com.apple.foundationdb.record.IndexEntry in project fdb-record-layer by FoundationDB.
the class TextIndexMaintainer method updateOneKeyAsync.
@Nonnull
private <M extends Message> CompletableFuture<Void> updateOneKeyAsync(@Nonnull FDBIndexableRecord<M> savedRecord, final boolean remove, @Nonnull IndexEntry entry, int textPosition, int recordTokenizerVersion) {
long startTime = System.nanoTime();
final Tuple indexEntryKey = indexEntryKey(entry.getKey(), savedRecord.getPrimaryKey());
final String text = indexEntryKey.getString(textPosition);
if (text == null || text.isEmpty()) {
// empty or not set. Either way, there is nothing to tokenize, so just exit now.
return AsyncUtil.DONE;
}
final Tuple groupingKey = (textPosition == 0) ? null : TupleHelpers.subTuple(indexEntryKey, 0, textPosition);
final Tuple groupedKey = TupleHelpers.subTuple(indexEntryKey, textPosition + 1, indexEntryKey.size());
final Map<String, List<Integer>> positionMap = tokenizer.tokenizeToMap(text, recordTokenizerVersion, TextTokenizer.TokenizerMode.INDEX);
final StoreTimer.Event indexUpdateEvent = remove ? FDBStoreTimer.Events.DELETE_INDEX_ENTRY : FDBStoreTimer.Events.SAVE_INDEX_ENTRY;
if (LOGGER.isDebugEnabled()) {
final Pair<Integer, Integer> estimatedSize = estimateSize(groupingKey, positionMap, groupedKey);
KeyValueLogMessage msg = KeyValueLogMessage.build("performed text tokenization", LogMessageKeys.REMOVE, remove, LogMessageKeys.TEXT_SIZE, text.length(), LogMessageKeys.UNIQUE_TOKENS, positionMap.size(), LogMessageKeys.AVG_TOKEN_SIZE, positionMap.keySet().stream().mapToInt(String::length).sum() * 1.0 / positionMap.size(), LogMessageKeys.MAX_TOKEN_SIZE, positionMap.keySet().stream().mapToInt(String::length).max().orElse(0), LogMessageKeys.AVG_POSITIONS, positionMap.values().stream().mapToInt(List::size).sum() * 1.0 / positionMap.size(), LogMessageKeys.MAX_POSITIONS, positionMap.values().stream().mapToInt(List::size).max().orElse(0), LogMessageKeys.TEXT_KEY_SIZE, estimatedSize.getKey(), LogMessageKeys.TEXT_VALUE_SIZE, estimatedSize.getValue(), LogMessageKeys.TEXT_INDEX_SIZE_AMORTIZED, estimatedSize.getKey() / 10 + estimatedSize.getValue(), IndexOptions.TEXT_TOKENIZER_NAME_OPTION, tokenizer.getName(), IndexOptions.TEXT_TOKENIZER_VERSION_OPTION, recordTokenizerVersion, IndexOptions.TEXT_ADD_AGGRESSIVE_CONFLICT_RANGES_OPTION, addAggressiveConflictRanges, LogMessageKeys.PRIMARY_KEY, savedRecord.getPrimaryKey(), LogMessageKeys.SUBSPACE, ByteArrayUtil2.loggable(state.store.getSubspace().getKey()), LogMessageKeys.INDEX_SUBSPACE, ByteArrayUtil2.loggable(state.indexSubspace.getKey()), LogMessageKeys.WROTE_INDEX, true);
LOGGER.debug(msg.toString());
}
if (positionMap.isEmpty()) {
if (state.store.getTimer() != null) {
state.store.getTimer().recordSinceNanoTime(indexUpdateEvent, startTime);
}
return AsyncUtil.DONE;
}
if (addAggressiveConflictRanges) {
// Add a read and write conflict range over the whole index to decrease the number of mutations
// sent to the resolver. In theory, this will increase the number of conflicts in that if two
// records with the same grouping key come in at the same time, then they will now definitely
// conflict. However, this isn't too bad because there is already a high chance of conflict
// in the text index because each token insert has to do a read on its own.
final Range indexRange = groupingKey == null ? state.indexSubspace.range() : state.indexSubspace.range(groupingKey);
state.context.ensureActive().addReadConflictRange(indexRange.begin, indexRange.end);
state.context.ensureActive().addWriteConflictRange(indexRange.begin, indexRange.end);
}
final BunchedMap<Tuple, List<Integer>> bunchedMap = getBunchedMap(state.context);
CompletableFuture<Void> tokenInsertFuture = RecordCursor.fromIterator(state.context.getExecutor(), positionMap.entrySet().iterator()).forEachAsync((Map.Entry<String, List<Integer>> tokenEntry) -> {
Tuple subspaceTuple;
if (groupingKey == null) {
subspaceTuple = Tuple.from(tokenEntry.getKey());
} else {
subspaceTuple = groupingKey.add(tokenEntry.getKey());
}
Subspace mapSubspace = state.indexSubspace.subspace(subspaceTuple);
if (remove) {
return bunchedMap.remove(state.transaction, mapSubspace, groupedKey).thenAccept(ignore -> {
});
} else {
final List<Integer> value = omitPositionLists ? Collections.emptyList() : tokenEntry.getValue();
return bunchedMap.put(state.transaction, mapSubspace, groupedKey, value).thenAccept(ignore -> {
});
}
}, state.store.getPipelineSize(PipelineOperation.TEXT_INDEX_UPDATE));
if (state.store.getTimer() != null) {
return state.store.getTimer().instrument(indexUpdateEvent, tokenInsertFuture, state.context.getExecutor(), startTime);
} else {
return tokenInsertFuture;
}
}
use of com.apple.foundationdb.record.IndexEntry in project fdb-record-layer by FoundationDB.
the class TextIndexMaintainer method scan.
/**
* Scan this index between a range of tokens. This index type requires that it be scanned only
* by text token. The range to scan can otherwise be between any two entries in the list, and
* scans over a prefix are supported by passing a value of <code>range</code> that uses
* {@link com.apple.foundationdb.record.EndpointType#PREFIX_STRING PREFIX_STRING} as both endpoint types.
* The keys returned in the index entry will include the token that was found in the index
* when scanning in the column that is used for the text field of the index's root expression.
* The value portion of each index entry will be a tuple whose first element is the position
* list for that entry within its associated record's field.
*
* @param scanType the {@link IndexScanType type} of scan to perform
* @param range the range to scan
* @param continuation any continuation from a previous scan invocation
* @param scanProperties skip, limit and other properties of the scan
* @return a cursor over all index entries in <code>range</code>
* @throws RecordCoreException if <code>scanType</code> is not {@link IndexScanType#BY_TEXT_TOKEN}
* @see TextCursor
*/
@Nonnull
@Override
// not closing the returned cursor
@SuppressWarnings("squid:S2095")
public RecordCursor<IndexEntry> scan(@Nonnull IndexScanType scanType, @Nonnull TupleRange range, @Nullable byte[] continuation, @Nonnull ScanProperties scanProperties) {
if (scanType != IndexScanType.BY_TEXT_TOKEN) {
throw new RecordCoreException("Can only scan text index by text token.");
}
int textPosition = textFieldPosition(state.index.getRootExpression());
TextSubspaceSplitter subspaceSplitter = new TextSubspaceSplitter(state.indexSubspace, textPosition + 1);
Range byteRange = range.toRange();
ScanProperties withAdjustedLimit = scanProperties.with(ExecuteProperties::clearSkipAndAdjustLimit);
ExecuteProperties adjustedExecuteProperties = withAdjustedLimit.getExecuteProperties();
// Callback for updating the byte scan limit
final ByteScanLimiter byteScanLimiter = adjustedExecuteProperties.getState().getByteScanLimiter();
final Consumer<KeyValue> callback = keyValue -> byteScanLimiter.registerScannedBytes(keyValue.getKey().length + keyValue.getValue().length);
BunchedMapMultiIterator<Tuple, List<Integer>, Tuple> iterator = getBunchedMap(state.context).scanMulti(state.context.readTransaction(adjustedExecuteProperties.getIsolationLevel().isSnapshot()), state.indexSubspace, subspaceSplitter, byteRange.begin, byteRange.end, continuation, adjustedExecuteProperties.getReturnedRowLimit(), callback, scanProperties.isReverse());
RecordCursor<IndexEntry> cursor = new TextCursor(iterator, state.store.getExecutor(), state.context, withAdjustedLimit, state.index);
if (scanProperties.getExecuteProperties().getSkip() != 0) {
cursor = cursor.skip(scanProperties.getExecuteProperties().getSkip());
}
return cursor;
}
use of com.apple.foundationdb.record.IndexEntry in project fdb-record-layer by FoundationDB.
the class RankIndexMaintainer method updateIndexKeys.
@Override
protected <M extends Message> CompletableFuture<Void> updateIndexKeys(@Nonnull final FDBIndexableRecord<M> savedRecord, final boolean remove, @Nonnull final List<IndexEntry> indexEntries) {
final int groupPrefixSize = getGroupingCount();
final Subspace extraSubspace = getSecondarySubspace();
final List<CompletableFuture<Void>> ordinaryIndexFutures = new ArrayList<>(indexEntries.size());
final Map<Subspace, CompletableFuture<Void>> rankFutures = Maps.newHashMapWithExpectedSize(indexEntries.size());
for (IndexEntry indexEntry : indexEntries) {
// Maintain an ordinary B-tree index by score.
CompletableFuture<Void> updateOrdinaryIndex = updateOneKeyAsync(savedRecord, remove, indexEntry);
if (!MoreAsyncUtil.isCompletedNormally(updateOrdinaryIndex)) {
ordinaryIndexFutures.add(updateOrdinaryIndex);
}
final Subspace rankSubspace;
final Tuple scoreKey;
if (groupPrefixSize > 0) {
final List<Object> keyValues = indexEntry.getKey().getItems();
rankSubspace = extraSubspace.subspace(Tuple.fromList(keyValues.subList(0, groupPrefixSize)));
scoreKey = Tuple.fromList(keyValues.subList(groupPrefixSize, keyValues.size()));
} else {
rankSubspace = extraSubspace;
scoreKey = indexEntry.getKey();
}
// It is unsafe to have two concurrent updates to the same ranked set, so ensure that at most
// one update per grouping key is ongoing at any given time
final Function<Void, CompletableFuture<Void>> futureSupplier = vignore -> RankedSetIndexHelper.updateRankedSet(state, rankSubspace, config, indexEntry.getKey(), scoreKey, remove);
CompletableFuture<Void> existingFuture = rankFutures.get(rankSubspace);
if (existingFuture == null) {
rankFutures.put(rankSubspace, futureSupplier.apply(null));
} else {
rankFutures.put(rankSubspace, existingFuture.thenCompose(futureSupplier));
}
}
return CompletableFuture.allOf(AsyncUtil.whenAll(ordinaryIndexFutures), AsyncUtil.whenAll(rankFutures.values()));
}
use of com.apple.foundationdb.record.IndexEntry in project fdb-record-layer by FoundationDB.
the class BitmapValueIndexMaintainer method scan.
@Nonnull
@Override
public RecordCursor<IndexEntry> scan(@Nonnull IndexScanType scanType, @Nonnull TupleRange range, @Nullable byte[] continuation, @Nonnull ScanProperties scanProperties) {
if (scanType != IndexScanType.BY_GROUP) {
throw new RecordCoreException("Can only scan bitmap index by group.");
}
final int groupPrefixSize = getGroupingCount();
final long startPosition;
if (range.getLow() != null && range.getLow().size() > groupPrefixSize && range.getLow().get(groupPrefixSize) != null) {
if (range.getLowEndpoint() == EndpointType.RANGE_EXCLUSIVE) {
startPosition = range.getLow().getLong(groupPrefixSize) + 1;
} else {
startPosition = range.getLow().getLong(groupPrefixSize);
}
if (startPosition % entrySize != 0) {
range = new TupleRange(range.getLow().popBack().add(startPosition - Math.floorMod(startPosition, (long) entrySize)), range.getHigh(), EndpointType.RANGE_INCLUSIVE, range.getHighEndpoint());
}
} else {
startPosition = Long.MIN_VALUE;
}
final long endPosition;
if (range.getHigh() != null && range.getHigh().size() > groupPrefixSize && range.getHigh().get(groupPrefixSize) != null) {
if (range.getHighEndpoint() == EndpointType.RANGE_INCLUSIVE) {
endPosition = range.getHigh().getLong(groupPrefixSize) + 1;
} else {
endPosition = range.getHigh().getLong(groupPrefixSize);
}
if (endPosition % entrySize != 0) {
range = new TupleRange(range.getLow(), range.getHigh().popBack().add(endPosition + Math.floorMod(entrySize - endPosition, (long) entrySize)), range.getLowEndpoint(), EndpointType.RANGE_INCLUSIVE);
}
} else {
endPosition = Long.MAX_VALUE;
}
return scan(range, continuation, scanProperties).map(indexEntry -> {
final long entryStart = indexEntry.getKey().getLong(groupPrefixSize);
final byte[] entryBitmap = indexEntry.getValue().getBytes(0);
final long entryEnd = entryStart + entryBitmap.length * 8;
if (entryStart < startPosition || entryEnd > endPosition) {
final long trimmedStart = Math.max(entryStart, startPosition);
final long trimmedEnd = Math.min(entryEnd, endPosition);
if (trimmedStart < trimmedEnd) {
final Tuple trimmedKey = indexEntry.getKey().popBack().add(trimmedStart);
final byte[] trimmedBitmap = new byte[((int) (trimmedEnd - trimmedStart) + 7) / 8];
for (long i = trimmedStart; i < trimmedEnd; i++) {
int offset = (int) (i - entryStart);
if ((entryBitmap[offset / 8] & (byte) (1 << (offset % 8))) != 0) {
int trimmedOffset = (int) (i - trimmedStart);
trimmedBitmap[trimmedOffset / 8] |= (byte) (1 << (trimmedOffset % 8));
}
}
final Tuple subValue = Tuple.from(trimmedBitmap);
return Optional.of(new IndexEntry(indexEntry.getIndex(), trimmedKey, subValue));
} else {
return Optional.<IndexEntry>empty();
}
} else {
return Optional.of(indexEntry);
}
}).filter(Optional::isPresent).map(Optional::get);
}
use of com.apple.foundationdb.record.IndexEntry in project fdb-record-layer by FoundationDB.
the class BitmapValueIndexMaintainer method evaluateAggregateFunction.
@Override
@Nonnull
public CompletableFuture<Tuple> evaluateAggregateFunction(@Nonnull IndexAggregateFunction function, @Nonnull TupleRange range, @Nonnull IsolationLevel isolationveLevel) {
if (!function.getName().equals(AGGREGATE_FUNCTION_NAME)) {
throw new MetaDataException("this index does not support aggregate function: " + function);
}
final RecordCursor<IndexEntry> cursor = scan(IndexScanType.BY_GROUP, range, null, new ScanProperties(ExecuteProperties.newBuilder().setIsolationLevel(isolationveLevel).build()));
final int groupPrefixSize = getGroupingCount();
long startPosition = 0;
if (range.getLow() != null && range.getLow().size() > groupPrefixSize) {
startPosition = range.getLow().getLong(groupPrefixSize);
}
int size = entrySize;
if (range.getHigh() != null && range.getHigh().size() > groupPrefixSize) {
long endPosition = range.getHigh().getLong(groupPrefixSize);
if (size > endPosition - startPosition) {
// Narrow size to what can actually be passed through from scan.
size = (int) (endPosition - startPosition);
}
}
return cursor.reduce(new BitmapAggregator(startPosition, size), (combined, kv) -> combined.append(kv.getKey().getLong(kv.getKeySize() - 1), kv.getValue().getBytes(0))).thenApply(combined -> Tuple.from(combined.asByteArray()));
}
Aggregations