Examples with TextTokenizer - com.apple.foundationdb.record.provider.common.text.TextTokenizer

Example 1 with TextTokenizer

use of com.apple.foundationdb.record.provider.common.text.TextTokenizer in project fdb-record-layer by FoundationDB.

the class TextScan method getTokenList.

// Get the comparand as a list of strings. This might involve tokenizing the
// query string if the comparison didn't do that already.
private List<String> getTokenList(@Nonnull FDBRecordStoreBase<?> store, @Nonnull EvaluationContext context, boolean removeStopWords) {
    final Object comparand = textComparison.getComparand(store, context);
    List<String> tokenList;
    if (comparand instanceof List<?>) {
        tokenList = ((List<?>) comparand).stream().map(Object::toString).collect(Collectors.toList());
    } else if (comparand instanceof String) {
        TextTokenizer tokenizer = TextIndexMaintainer.getTokenizer(index);
        int tokenizerVersion = TextIndexMaintainer.getIndexTokenizerVersion(index);
        tokenList = tokenizer.tokenizeToList((String) comparand, tokenizerVersion, TextTokenizer.TokenizerMode.QUERY);
    } else {
        throw new RecordCoreException("Comparand for text query of incompatible type: " + (comparand == null ? "null" : comparand.getClass()));
    }
    if (removeStopWords && tokenList.contains("")) {
        // Remove all stop words from this list
        tokenList = tokenList.stream().filter(s -> !s.isEmpty()).collect(Collectors.toList());
    }
    return tokenList;
}

Also used : TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) RecordCoreException(com.apple.foundationdb.record.RecordCoreException) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with TextTokenizer

use of com.apple.foundationdb.record.provider.common.text.TextTokenizer in project fdb-record-layer by FoundationDB.

the class TextIndexTest method queryScanEquivalence.

/**
 * Generate random documents and then make sure that querying them using the index
 * produces the same result as performing a full scan of all records.
 */
@MethodSource("indexArguments")
@ParameterizedTest
public void queryScanEquivalence(@Nonnull Index index) throws Exception {
    final Random r = new Random(0xba5eba1L + index.getName().hashCode());
    final int recordCount = 100;
    final int recordBatch = 25;
    final int queryCount = 25;
    final List<String> lexicon = getStandardLexicon();
    TextTokenizerRegistryImpl.instance().register(FILTERING_TOKENIZER);
    final TextTokenizer tokenizer = TextIndexMaintainer.getTokenizer(index);
    final RecordMetaDataHook hook = metaDataBuilder -> {
        metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
        metaDataBuilder.addIndex(SIMPLE_DOC, index);
    };
    long seed = r.nextLong();
    LOGGER.info(KeyValueLogMessage.of("initializing random number generator", TestLogMessageKeys.SEED, seed));
    r.setSeed(seed);
    for (int i = 0; i < recordCount; i += recordBatch) {
        List<SimpleDocument> records = getRandomRecords(r, recordBatch, lexicon);
        LOGGER.info(KeyValueLogMessage.of("creating and saving random records", TestLogMessageKeys.BATCH_SIZE, recordBatch));
        try (FDBRecordContext context = openContext()) {
            openRecordStore(context, hook);
            records.forEach(recordStore::saveRecord);
            commit(context);
        }
    }
    double[] proportions = getZipfProportions(lexicon);
    long totalScanningTime = 0;
    long totalQueryingTime = 0;
    long totalResults = 0;
    for (int i = 0; i < queryCount; i++) {
        // Generate a random text query
        List<String> tokens = getRandomWords(r, lexicon, proportions, 6, 3);
        String tokenString = String.join(" ", tokens);
        double filterChoice = r.nextDouble();
        final QueryComponent filter;
        if (filterChoice < 0.2) {
            filter = Query.field("text").text(tokenizer.getName()).containsAll(tokenString);
        } else if (filterChoice < 0.4) {
            filter = Query.field("text").text(tokenizer.getName()).containsAny(tokenString);
        } else if (filterChoice < 0.6) {
            filter = Query.field("text").text(tokenizer.getName()).containsPhrase(tokenString);
        } else if (filterChoice < 0.8) {
            int maxDistance = r.nextInt(10) + tokens.size();
            filter = Query.field("text").text(tokenizer.getName()).containsAll(tokenString, maxDistance);
        } else if (filterChoice < 0.9) {
            filter = Query.field("text").text(tokenizer.getName()).containsAnyPrefix(tokenString);
        } else if (filterChoice < 0.95) {
            filter = Query.field("text").text(tokenizer.getName()).containsAllPrefixes(tokenString);
        } else {
            if (tokens.isEmpty()) {
                continue;
            }
            // Choose the first non-empty token from the iterator
            Iterator<? extends CharSequence> tokenIterator = tokenizer.tokenize(tokenString, tokenizer.getMaxVersion(), TextTokenizer.TokenizerMode.QUERY);
            String firstToken = null;
            while (tokenIterator.hasNext()) {
                String nextToken = tokenIterator.next().toString();
                if (!nextToken.isEmpty()) {
                    firstToken = nextToken;
                    break;
                }
            }
            if (firstToken == null) {
                continue;
            }
            int prefixEnd;
            if (firstToken.length() > 1) {
                prefixEnd = r.nextInt(firstToken.length() - 1) + 1;
            } else {
                prefixEnd = 1;
            }
            filter = Query.field("text").text(tokenizer.getName()).containsPrefix(firstToken.substring(0, prefixEnd));
        }
        LOGGER.info(KeyValueLogMessage.of("generated random filter", TestLogMessageKeys.ITERATION, i, LogMessageKeys.FILTER, filter));
        // Manual scan all of the records
        long startTime = System.nanoTime();
        final Set<Long> manualRecordIds = performQueryWithRecordStoreScan(hook, filter);
        long endTime = System.nanoTime();
        LOGGER.info(KeyValueLogMessage.of("manual scan completed", TestLogMessageKeys.SCAN_MILLIS, TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS)));
        totalScanningTime += endTime - startTime;
        // Generate a query and use the index
        startTime = System.nanoTime();
        final Set<Long> queryRecordIds = performQueryWithIndexScan(hook, index, filter);
        endTime = System.nanoTime();
        LOGGER.info(KeyValueLogMessage.of("query completed", TestLogMessageKeys.SCAN_MILLIS, TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS)));
        totalQueryingTime += endTime - startTime;
        if (!manualRecordIds.equals(queryRecordIds)) {
            Set<Long> onlyManual = new HashSet<>(manualRecordIds);
            onlyManual.removeAll(queryRecordIds);
            Set<Long> onlyQuery = new HashSet<>(queryRecordIds);
            onlyManual.removeAll(manualRecordIds);
            LOGGER.warn(KeyValueLogMessage.of("results did not match", LogMessageKeys.FILTER, filter, TestLogMessageKeys.MANUAL_RESULT_COUNT, manualRecordIds.size(), TestLogMessageKeys.QUERY_RESULT_COUNT, queryRecordIds.size(), TestLogMessageKeys.ONLY_MANUAL_COUNT, onlyManual.size(), TestLogMessageKeys.ONLY_QUERY_COUNT, onlyQuery.size()));
        }
        assertEquals(manualRecordIds, queryRecordIds);
        LOGGER.info(KeyValueLogMessage.of("results matched", LogMessageKeys.FILTER, filter, TestLogMessageKeys.RESULT_COUNT, manualRecordIds.size()));
        totalResults += queryRecordIds.size();
    }
    LOGGER.info(KeyValueLogMessage.of("test completed", TestLogMessageKeys.TOTAL_SCAN_MILLIS, TimeUnit.MILLISECONDS.convert(totalScanningTime, TimeUnit.NANOSECONDS), TestLogMessageKeys.TOTAL_QUERY_MILLIS, TimeUnit.MILLISECONDS.convert(totalQueryingTime, TimeUnit.NANOSECONDS), TestLogMessageKeys.TOTAL_RESULT_COUNT, totalResults));
}

Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) LogMessageKeys(com.apple.foundationdb.record.logging.LogMessageKeys) BY_GROUP(com.apple.foundationdb.record.IndexScanType.BY_GROUP) Matchers.not(org.hamcrest.Matchers.not) MetaDataException(com.apple.foundationdb.record.metadata.MetaDataException) TextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.TextTokenizerFactory) ComplexDocument(com.apple.foundationdb.record.TestRecordsTextProto.ComplexDocument) Subspace(com.apple.foundationdb.subspace.Subspace) TextSamples(com.apple.foundationdb.record.provider.common.text.TextSamples) RecordCursorResult(com.apple.foundationdb.record.RecordCursorResult) Pair(org.apache.commons.lang3.tuple.Pair) RecordCoreException(com.apple.foundationdb.record.RecordCoreException) COMPLEX_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.COMPLEX_DOC) VersionKeyExpression(com.apple.foundationdb.record.metadata.expressions.VersionKeyExpression) Map(java.util.Map) Expressions.concat(com.apple.foundationdb.record.metadata.Key.Expressions.concat) GroupingKeyExpression(com.apple.foundationdb.record.metadata.expressions.GroupingKeyExpression) Tag(org.junit.jupiter.api.Tag) Query(com.apple.foundationdb.record.query.expressions.Query) KeyExpression(com.apple.foundationdb.record.metadata.expressions.KeyExpression) IndexOptions(com.apple.foundationdb.record.metadata.IndexOptions) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Matchers.allOf(org.hamcrest.Matchers.allOf) Set(java.util.Set) PlanMatchers.textComparison(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textComparison) FanType(com.apple.foundationdb.record.metadata.expressions.KeyExpression.FanType) Arguments(org.junit.jupiter.params.provider.Arguments) BY_VALUE(com.apple.foundationdb.record.IndexScanType.BY_VALUE) TupleRange(com.apple.foundationdb.record.TupleRange) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestRecordsTextProto(com.apple.foundationdb.record.TestRecordsTextProto) Stream(java.util.stream.Stream) PlanMatchers.indexName(com.apple.foundationdb.record.query.plan.match.PlanMatchers.indexName) Matchers.anything(org.hamcrest.Matchers.anything) Matchers.contains(org.hamcrest.Matchers.contains) TupleHelpers(com.apple.foundationdb.tuple.TupleHelpers) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) PlanMatchers.typeFilter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.typeFilter) Matchers.containsString(org.hamcrest.Matchers.containsString) FDBIndexedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBIndexedRecord) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) RecordMetaData(com.apple.foundationdb.record.RecordMetaData) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) PlanMatchers.fetch(com.apple.foundationdb.record.query.plan.match.PlanMatchers.fetch) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) AsyncUtil(com.apple.foundationdb.async.AsyncUtil) RecordQuery(com.apple.foundationdb.record.query.RecordQuery) ComponentWithComparison(com.apple.foundationdb.record.query.expressions.ComponentWithComparison) RecordQueryPlan(com.apple.foundationdb.record.query.plan.plans.RecordQueryPlan) ArrayList(java.util.ArrayList) PlanMatchers(com.apple.foundationdb.record.query.plan.match.PlanMatchers) BunchedMap(com.apple.foundationdb.map.BunchedMap) TestLogMessageKeys(com.apple.foundationdb.record.logging.TestLogMessageKeys) LoggableException(com.apple.foundationdb.util.LoggableException) Matchers.lessThan(org.hamcrest.Matchers.lessThan) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Nullable(javax.annotation.Nullable) FDBStoredRecord(com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord) FieldWithComparison(com.apple.foundationdb.record.query.expressions.FieldWithComparison) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) SOURCE_EXHAUSTED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SOURCE_EXHAUSTED) Tags(com.apple.test.Tags) SCAN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SCAN_LIMIT_REACHED) OrComponent(com.apple.foundationdb.record.query.expressions.OrComponent) BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) FDBRecordStoreTestBase(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase) ExecutionException(java.util.concurrent.ExecutionException) AndOrComponent(com.apple.foundationdb.record.query.expressions.AndOrComponent) Comparisons(com.apple.foundationdb.record.query.expressions.Comparisons) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Index(com.apple.foundationdb.record.metadata.Index) Matcher(org.hamcrest.Matcher) PlanMatchers.unorderedUnion(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unorderedUnion) TextIndexBunchedSerializerTest.entryOf(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexBunchedSerializerTest.entryOf) IndexEntry(com.apple.foundationdb.record.IndexEntry) PlanMatchers.groupingBounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.groupingBounds) StoreTimer(com.apple.foundationdb.record.provider.common.StoreTimer) LoggerFactory(org.slf4j.LoggerFactory) BY_RANK(com.apple.foundationdb.record.IndexScanType.BY_RANK) PrefixTextTokenizer(com.apple.foundationdb.record.provider.common.text.PrefixTextTokenizer) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) Random(java.util.Random) SubspaceSplitter(com.apple.foundationdb.map.SubspaceSplitter) PlanMatchers.bounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.bounds) RecordQueryPlanner(com.apple.foundationdb.record.query.plan.RecordQueryPlanner) Tuple(com.apple.foundationdb.tuple.Tuple) KeyValueLogMessage(com.apple.foundationdb.record.logging.KeyValueLogMessage) PlanMatchers.textIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textIndexScan) TextTokenizerRegistryImpl(com.apple.foundationdb.record.provider.common.text.TextTokenizerRegistryImpl) Expressions.concatenateFields(com.apple.foundationdb.record.metadata.Key.Expressions.concatenateFields) RETURN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.RETURN_LIMIT_REACHED) FDBExceptions(com.apple.foundationdb.record.provider.foundationdb.FDBExceptions) MethodSource(org.junit.jupiter.params.provider.MethodSource) PlanMatchers.coveringIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.coveringIndexScan) ImmutableSet(com.google.common.collect.ImmutableSet) KeyValue(com.apple.foundationdb.KeyValue) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) FDBStoreTimer(com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer) ImmutableMap(com.google.common.collect.ImmutableMap) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) RecordCoreArgumentException(com.apple.foundationdb.record.RecordCoreArgumentException) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) List(java.util.List) EvaluationContext(com.apple.foundationdb.record.EvaluationContext) FDBQueriedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBQueriedRecord) Matchers.equalTo(org.hamcrest.Matchers.equalTo) MapDocument(com.apple.foundationdb.record.TestRecordsTextProto.MapDocument) IndexTypes(com.apple.foundationdb.record.metadata.IndexTypes) Matchers.anyOf(org.hamcrest.Matchers.anyOf) IntStream(java.util.stream.IntStream) PlanMatchers.primaryKeyDistinct(com.apple.foundationdb.record.query.plan.match.PlanMatchers.primaryKeyDistinct) Descriptors(com.google.protobuf.Descriptors) CompletableFuture(java.util.concurrent.CompletableFuture) BooleanNormalizer(com.apple.foundationdb.record.query.plan.planning.BooleanNormalizer) PlanHashable(com.apple.foundationdb.record.PlanHashable) PlanMatchers.filter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.filter) HashSet(java.util.HashSet) ExecuteProperties(com.apple.foundationdb.record.ExecuteProperties) FDBRecordStore(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore) ScanProperties(com.apple.foundationdb.record.ScanProperties) RecordCursorIterator(com.apple.foundationdb.record.RecordCursorIterator) DefaultTextTokenizer(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizer) BY_TEXT_TOKEN(com.apple.foundationdb.record.IndexScanType.BY_TEXT_TOKEN) BunchedMapMultiIterator(com.apple.foundationdb.map.BunchedMapMultiIterator) Nonnull(javax.annotation.Nonnull) Expressions.field(com.apple.foundationdb.record.metadata.Key.Expressions.field) EmptyKeyExpression(com.apple.foundationdb.record.metadata.expressions.EmptyKeyExpression) SIMPLE_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.SIMPLE_DOC) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) RecordMetaDataBuilder(com.apple.foundationdb.record.RecordMetaDataBuilder) RecordTypeBuilder(com.apple.foundationdb.record.metadata.RecordTypeBuilder) BY_TIME_WINDOW(com.apple.foundationdb.record.IndexScanType.BY_TIME_WINDOW) FilteringTextTokenizer(com.apple.foundationdb.record.provider.common.text.FilteringTextTokenizer) ReadTransaction(com.apple.foundationdb.ReadTransaction) Normalizer(java.text.Normalizer) Matchers.any(org.hamcrest.Matchers.any) TimeUnit(java.util.concurrent.TimeUnit) DefaultTextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizerFactory) PlanMatchers.unbounded(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unbounded) FDBDatabaseFactory(com.apple.foundationdb.record.provider.foundationdb.FDBDatabaseFactory) Message(com.google.protobuf.Message) RecordCursor(com.apple.foundationdb.record.RecordCursor) QueryComponent(com.apple.foundationdb.record.query.expressions.QueryComponent) PlanMatchers.descendant(com.apple.foundationdb.record.query.plan.match.PlanMatchers.descendant) Comparator(java.util.Comparator) Collections(java.util.Collections) AllSuffixesTextTokenizer(com.apple.foundationdb.record.provider.common.text.AllSuffixesTextTokenizer) QueryComponent(com.apple.foundationdb.record.query.expressions.QueryComponent) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) Matchers.containsString(org.hamcrest.Matchers.containsString) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) PrefixTextTokenizer(com.apple.foundationdb.record.provider.common.text.PrefixTextTokenizer) TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) DefaultTextTokenizer(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizer) FilteringTextTokenizer(com.apple.foundationdb.record.provider.common.text.FilteringTextTokenizer) AllSuffixesTextTokenizer(com.apple.foundationdb.record.provider.common.text.AllSuffixesTextTokenizer) Random(java.util.Random) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) RecordCursorIterator(com.apple.foundationdb.record.RecordCursorIterator) BunchedMapMultiIterator(com.apple.foundationdb.map.BunchedMapMultiIterator) Iterator(java.util.Iterator) HashSet(java.util.HashSet) MethodSource(org.junit.jupiter.params.provider.MethodSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with TextTokenizer

use of com.apple.foundationdb.record.provider.common.text.TextTokenizer in project fdb-record-layer by FoundationDB.

the class TextIndexMaintainerFactory method getIndexValidator.

/**
 * Validates that the index provided is valid for text indexes. This means that
 * the index must:
 *
 * <ul>
 *     <li>Not be a unique index.</li>
 *     <li>Not include a {@link com.apple.foundationdb.record.metadata.expressions.VersionKeyExpression#VERSION} expression in its root expression.</li>
 *     <li>Have a key expression whose first column is of type <code>string</code> (possibly with grouping columns
 *         before the tokenized text column) and is not repeated.</li> <!--Maybe we should support FanType.Concatenate?-->
 *     <li>Specify a valid tokenizer and tokenizer version through the index options (possibly using the defaults).</li>
 *     <li>Not define a value expression.</li>
 * </ul>
 *
 * @param index the index to validate
 * @return a validator to run against the index
 * @throws KeyExpression.InvalidExpressionException if the expression does not contain a string as its first ungrouped column
 * @throws com.apple.foundationdb.record.metadata.MetaDataException if the tokenizer is not defined, if the tokenizer version
 *                                                    is out of range, or if the index is marked as unique
 */
@Nonnull
@Override
public IndexValidator getIndexValidator(Index index) {
    return new IndexValidator(index) {

        @Override
        public void validate(@Nonnull MetaDataValidator metaDataValidator) {
            super.validate(metaDataValidator);
            validateNotVersion();
            validateNotUnique();
            // TODO: allow value expressions for covering text indexes
            validateNoValue();
            // Validate that the tokenizer exists and that the version is in a valid range.
            TextTokenizer tokenizer = TextIndexMaintainer.getTokenizer(index);
            int tokenizerVersion = TextIndexMaintainer.getIndexTokenizerVersion(index);
            tokenizer.validateVersion(tokenizerVersion);
        }

        @Override
        public void validateIndexForRecordType(@Nonnull RecordType recordType, @Nonnull MetaDataValidator metaDataValidator) {
            final List<Descriptors.FieldDescriptor> fields = metaDataValidator.validateIndexForRecordType(index, recordType);
            int textFieldPosition = TextIndexMaintainer.textFieldPosition(index.getRootExpression());
            if (textFieldPosition > fields.size()) {
                throw new KeyExpression.InvalidExpressionException("text index does not have text field after grouped fields");
            } else {
                Descriptors.FieldDescriptor textFieldDescriptor = fields.get(textFieldPosition);
                if (!textFieldDescriptor.getType().equals(Descriptors.FieldDescriptor.Type.STRING)) {
                    throw new KeyExpression.InvalidExpressionException(String.format("text index has non-string type %s as text field", textFieldDescriptor.getLiteJavaType()));
                }
                if (textFieldDescriptor.isRepeated()) {
                    throw new KeyExpression.InvalidExpressionException("text index does not allow a repeated field for text body");
                }
            }
        }

        /**
         * Validate any options that have changed. There are several options unique to text indexes which
         * may change without requiring the index be rebuilt. They are:
         *
         * <ul>
         *     <li>{@link IndexOptions#TEXT_TOKENIZER_VERSION_OPTION} which can be increased (but not decreased)</li>
         *     <li>{@link IndexOptions#TEXT_ADD_AGGRESSIVE_CONFLICT_RANGES_OPTION} which only affects what conflict ranges
         *          are added at index update time and thus has no impact on the on-disk representation</li>
         *     <li>{@link IndexOptions#TEXT_OMIT_POSITIONS_OPTION} which changes whether the position lists are included
         *          in index entries</li>
         * </ul>
         *
         * <p>
         * Note that the {@link IndexOptions#TEXT_TOKENIZER_NAME_OPTION} is <em>not</em> allowed to change
         * (without rebuilding the index).
         * </p>
         *
         * @param oldIndex an older version of this index
         * @param changedOptions the set of changed options
         */
        @Override
        protected void validateChangedOptions(@Nonnull Index oldIndex, @Nonnull Set<String> changedOptions) {
            for (String changedOption : changedOptions) {
                switch(changedOption) {
                    case IndexOptions.TEXT_ADD_AGGRESSIVE_CONFLICT_RANGES_OPTION:
                    case IndexOptions.TEXT_OMIT_POSITIONS_OPTION:
                        // without breaking compatibility.
                        break;
                    case IndexOptions.TEXT_TOKENIZER_NAME_OPTION:
                        String oldTokenizerName = TextIndexMaintainer.getTokenizer(oldIndex).getName();
                        String newTokenizerName = TextIndexMaintainer.getTokenizer(index).getName();
                        if (!oldTokenizerName.equals(newTokenizerName)) {
                            throw new MetaDataException("text tokenizer changed", LogMessageKeys.INDEX_NAME, index.getName());
                        }
                        break;
                    case IndexOptions.TEXT_TOKENIZER_VERSION_OPTION:
                        // The tokenizer version should always go up.
                        int oldTokenizerVersion = TextIndexMaintainer.getIndexTokenizerVersion(oldIndex);
                        int newTokenizerVersion = TextIndexMaintainer.getIndexTokenizerVersion(index);
                        if (oldTokenizerVersion > newTokenizerVersion) {
                            throw new MetaDataException("text tokenizer version downgraded", LogMessageKeys.INDEX_NAME, index.getName(), LogMessageKeys.OLD_VERSION, oldTokenizerVersion, LogMessageKeys.NEW_VERSION, newTokenizerVersion);
                        }
                        break;
                    default:
                        // Changed options that are not text options will be handled by super class
                        if (TEXT_OPTIONS.contains(changedOption)) {
                            throw new MetaDataException("index option changed", LogMessageKeys.INDEX_NAME, index.getName(), LogMessageKeys.INDEX_OPTION, changedOption, LogMessageKeys.OLD_OPTION, oldIndex.getOption(changedOption), LogMessageKeys.NEW_OPTION, index.getOption(changedOption));
                        }
                }
            }
            changedOptions.removeAll(TEXT_OPTIONS);
            super.validateChangedOptions(oldIndex, changedOptions);
        }
    };
}

Also used : IndexValidator(com.apple.foundationdb.record.metadata.IndexValidator) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) Nonnull(javax.annotation.Nonnull) Index(com.apple.foundationdb.record.metadata.Index) MetaDataException(com.apple.foundationdb.record.metadata.MetaDataException) TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) RecordType(com.apple.foundationdb.record.metadata.RecordType) MetaDataValidator(com.apple.foundationdb.record.metadata.MetaDataValidator) Descriptors(com.google.protobuf.Descriptors) Nonnull(javax.annotation.Nonnull)

Aggregations

TextTokenizer (com.apple.foundationdb.record.provider.common.text.TextTokenizer)3 RecordCoreException (com.apple.foundationdb.record.RecordCoreException)2 Index (com.apple.foundationdb.record.metadata.Index)2 MetaDataException (com.apple.foundationdb.record.metadata.MetaDataException)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Descriptors (com.google.protobuf.Descriptors)2 Set (java.util.Set)2 Nonnull (javax.annotation.Nonnull)2 KeyValue (com.apple.foundationdb.KeyValue)1 ReadTransaction (com.apple.foundationdb.ReadTransaction)1 AsyncUtil (com.apple.foundationdb.async.AsyncUtil)1 BunchedMap (com.apple.foundationdb.map.BunchedMap)1 BunchedMapMultiIterator (com.apple.foundationdb.map.BunchedMapMultiIterator)1 BunchedMapScanEntry (com.apple.foundationdb.map.BunchedMapScanEntry)1 SubspaceSplitter (com.apple.foundationdb.map.SubspaceSplitter)1 EvaluationContext (com.apple.foundationdb.record.EvaluationContext)1 ExecuteProperties (com.apple.foundationdb.record.ExecuteProperties)1 IndexEntry (com.apple.foundationdb.record.IndexEntry)1 BY_GROUP (com.apple.foundationdb.record.IndexScanType.BY_GROUP)1 BY_RANK (com.apple.foundationdb.record.IndexScanType.BY_RANK)1