Search in sources :

Example 11 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method saveSimpleDocumentsWithNoPositions.

@Test
public void saveSimpleDocumentsWithNoPositions() throws Exception {
    final SimpleDocument shakespeareDocument = SimpleDocument.newBuilder().setDocId(1623L).setText(TextSamples.ROMEO_AND_JULIET_PROLOGUE).build();
    final SimpleDocument germanDocument = SimpleDocument.newBuilder().setDocId(1066L).setText(TextSamples.GERMAN).build();
    // Save with positions
    final int shakespeareValueBytes;
    final int germanValueBytes;
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context);
        recordStore.saveRecord(shakespeareDocument);
        shakespeareValueBytes = getSaveIndexValueBytes(recordStore);
        recordStore.saveRecord(germanDocument);
        germanValueBytes = getSaveIndexValueBytes(recordStore) - shakespeareValueBytes;
        commit(context);
    }
    // Save without positions
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context, metaDataBuilder -> {
            metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
            metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_NO_POSITIONS);
        });
        recordStore.deleteAllRecords();
        recordStore.saveRecord(shakespeareDocument);
        assertThat(getSaveIndexValueBytes(recordStore), lessThan(shakespeareValueBytes));
        final int newShakespeareBytes = getSaveIndexValueBytes(recordStore);
        recordStore.saveRecord(germanDocument);
        assertThat(getSaveIndexValueBytes(recordStore) - newShakespeareBytes, lessThan(germanValueBytes));
        List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_NO_POSITIONS, Tuple.from("gewonnen"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1066L), Collections.emptyList())), entryList);
        entryList = scanMapEntries(recordStore, SIMPLE_TEXT_NO_POSITIONS, Tuple.from("dignity"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Collections.emptyList())), entryList);
        commit(context);
    }
}
Also used : BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) IndexEntry(com.apple.foundationdb.record.IndexEntry) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 12 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method textIndexPerf1000SerialInsertNoBatching.

@Tag(Tags.Performance)
@Test
public void textIndexPerf1000SerialInsertNoBatching() throws Exception {
    // Create 1000 records
    Random r = new Random();
    List<SimpleDocument> records = getRandomRecords(r, 1000);
    long startTime = System.nanoTime();
    for (int i = 0; i < records.size(); i++) {
        try (FDBRecordContext context = openContext()) {
            openRecordStore(context);
            recordStore.saveRecord(records.get(i));
            commit(context);
        }
    }
    long endTime = System.nanoTime();
    LOGGER.info("performed 1000 serial insertions in {} seconds.", (endTime - startTime) * 1e-9);
    printUsage();
}
Also used : Random(java.util.Random) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test) Tag(org.junit.jupiter.api.Tag)

Example 13 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method queryScanEquivalence.

/**
 * Generate random documents and then make sure that querying them using the index
 * produces the same result as performing a full scan of all records.
 */
@MethodSource("indexArguments")
@ParameterizedTest
public void queryScanEquivalence(@Nonnull Index index) throws Exception {
    final Random r = new Random(0xba5eba1L + index.getName().hashCode());
    final int recordCount = 100;
    final int recordBatch = 25;
    final int queryCount = 25;
    final List<String> lexicon = getStandardLexicon();
    TextTokenizerRegistryImpl.instance().register(FILTERING_TOKENIZER);
    final TextTokenizer tokenizer = TextIndexMaintainer.getTokenizer(index);
    final RecordMetaDataHook hook = metaDataBuilder -> {
        metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
        metaDataBuilder.addIndex(SIMPLE_DOC, index);
    };
    long seed = r.nextLong();
    LOGGER.info(KeyValueLogMessage.of("initializing random number generator", TestLogMessageKeys.SEED, seed));
    r.setSeed(seed);
    for (int i = 0; i < recordCount; i += recordBatch) {
        List<SimpleDocument> records = getRandomRecords(r, recordBatch, lexicon);
        LOGGER.info(KeyValueLogMessage.of("creating and saving random records", TestLogMessageKeys.BATCH_SIZE, recordBatch));
        try (FDBRecordContext context = openContext()) {
            openRecordStore(context, hook);
            records.forEach(recordStore::saveRecord);
            commit(context);
        }
    }
    double[] proportions = getZipfProportions(lexicon);
    long totalScanningTime = 0;
    long totalQueryingTime = 0;
    long totalResults = 0;
    for (int i = 0; i < queryCount; i++) {
        // Generate a random text query
        List<String> tokens = getRandomWords(r, lexicon, proportions, 6, 3);
        String tokenString = String.join(" ", tokens);
        double filterChoice = r.nextDouble();
        final QueryComponent filter;
        if (filterChoice < 0.2) {
            filter = Query.field("text").text(tokenizer.getName()).containsAll(tokenString);
        } else if (filterChoice < 0.4) {
            filter = Query.field("text").text(tokenizer.getName()).containsAny(tokenString);
        } else if (filterChoice < 0.6) {
            filter = Query.field("text").text(tokenizer.getName()).containsPhrase(tokenString);
        } else if (filterChoice < 0.8) {
            int maxDistance = r.nextInt(10) + tokens.size();
            filter = Query.field("text").text(tokenizer.getName()).containsAll(tokenString, maxDistance);
        } else if (filterChoice < 0.9) {
            filter = Query.field("text").text(tokenizer.getName()).containsAnyPrefix(tokenString);
        } else if (filterChoice < 0.95) {
            filter = Query.field("text").text(tokenizer.getName()).containsAllPrefixes(tokenString);
        } else {
            if (tokens.isEmpty()) {
                continue;
            }
            // Choose the first non-empty token from the iterator
            Iterator<? extends CharSequence> tokenIterator = tokenizer.tokenize(tokenString, tokenizer.getMaxVersion(), TextTokenizer.TokenizerMode.QUERY);
            String firstToken = null;
            while (tokenIterator.hasNext()) {
                String nextToken = tokenIterator.next().toString();
                if (!nextToken.isEmpty()) {
                    firstToken = nextToken;
                    break;
                }
            }
            if (firstToken == null) {
                continue;
            }
            int prefixEnd;
            if (firstToken.length() > 1) {
                prefixEnd = r.nextInt(firstToken.length() - 1) + 1;
            } else {
                prefixEnd = 1;
            }
            filter = Query.field("text").text(tokenizer.getName()).containsPrefix(firstToken.substring(0, prefixEnd));
        }
        LOGGER.info(KeyValueLogMessage.of("generated random filter", TestLogMessageKeys.ITERATION, i, LogMessageKeys.FILTER, filter));
        // Manual scan all of the records
        long startTime = System.nanoTime();
        final Set<Long> manualRecordIds = performQueryWithRecordStoreScan(hook, filter);
        long endTime = System.nanoTime();
        LOGGER.info(KeyValueLogMessage.of("manual scan completed", TestLogMessageKeys.SCAN_MILLIS, TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS)));
        totalScanningTime += endTime - startTime;
        // Generate a query and use the index
        startTime = System.nanoTime();
        final Set<Long> queryRecordIds = performQueryWithIndexScan(hook, index, filter);
        endTime = System.nanoTime();
        LOGGER.info(KeyValueLogMessage.of("query completed", TestLogMessageKeys.SCAN_MILLIS, TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS)));
        totalQueryingTime += endTime - startTime;
        if (!manualRecordIds.equals(queryRecordIds)) {
            Set<Long> onlyManual = new HashSet<>(manualRecordIds);
            onlyManual.removeAll(queryRecordIds);
            Set<Long> onlyQuery = new HashSet<>(queryRecordIds);
            onlyManual.removeAll(manualRecordIds);
            LOGGER.warn(KeyValueLogMessage.of("results did not match", LogMessageKeys.FILTER, filter, TestLogMessageKeys.MANUAL_RESULT_COUNT, manualRecordIds.size(), TestLogMessageKeys.QUERY_RESULT_COUNT, queryRecordIds.size(), TestLogMessageKeys.ONLY_MANUAL_COUNT, onlyManual.size(), TestLogMessageKeys.ONLY_QUERY_COUNT, onlyQuery.size()));
        }
        assertEquals(manualRecordIds, queryRecordIds);
        LOGGER.info(KeyValueLogMessage.of("results matched", LogMessageKeys.FILTER, filter, TestLogMessageKeys.RESULT_COUNT, manualRecordIds.size()));
        totalResults += queryRecordIds.size();
    }
    LOGGER.info(KeyValueLogMessage.of("test completed", TestLogMessageKeys.TOTAL_SCAN_MILLIS, TimeUnit.MILLISECONDS.convert(totalScanningTime, TimeUnit.NANOSECONDS), TestLogMessageKeys.TOTAL_QUERY_MILLIS, TimeUnit.MILLISECONDS.convert(totalQueryingTime, TimeUnit.NANOSECONDS), TestLogMessageKeys.TOTAL_RESULT_COUNT, totalResults));
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) LogMessageKeys(com.apple.foundationdb.record.logging.LogMessageKeys) BY_GROUP(com.apple.foundationdb.record.IndexScanType.BY_GROUP) Matchers.not(org.hamcrest.Matchers.not) MetaDataException(com.apple.foundationdb.record.metadata.MetaDataException) TextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.TextTokenizerFactory) ComplexDocument(com.apple.foundationdb.record.TestRecordsTextProto.ComplexDocument) Subspace(com.apple.foundationdb.subspace.Subspace) TextSamples(com.apple.foundationdb.record.provider.common.text.TextSamples) RecordCursorResult(com.apple.foundationdb.record.RecordCursorResult) Pair(org.apache.commons.lang3.tuple.Pair) RecordCoreException(com.apple.foundationdb.record.RecordCoreException) COMPLEX_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.COMPLEX_DOC) VersionKeyExpression(com.apple.foundationdb.record.metadata.expressions.VersionKeyExpression) Map(java.util.Map) Expressions.concat(com.apple.foundationdb.record.metadata.Key.Expressions.concat) GroupingKeyExpression(com.apple.foundationdb.record.metadata.expressions.GroupingKeyExpression) Tag(org.junit.jupiter.api.Tag) Query(com.apple.foundationdb.record.query.expressions.Query) KeyExpression(com.apple.foundationdb.record.metadata.expressions.KeyExpression) IndexOptions(com.apple.foundationdb.record.metadata.IndexOptions) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Matchers.allOf(org.hamcrest.Matchers.allOf) Set(java.util.Set) PlanMatchers.textComparison(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textComparison) FanType(com.apple.foundationdb.record.metadata.expressions.KeyExpression.FanType) Arguments(org.junit.jupiter.params.provider.Arguments) BY_VALUE(com.apple.foundationdb.record.IndexScanType.BY_VALUE) TupleRange(com.apple.foundationdb.record.TupleRange) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestRecordsTextProto(com.apple.foundationdb.record.TestRecordsTextProto) Stream(java.util.stream.Stream) PlanMatchers.indexName(com.apple.foundationdb.record.query.plan.match.PlanMatchers.indexName) Matchers.anything(org.hamcrest.Matchers.anything) Matchers.contains(org.hamcrest.Matchers.contains) TupleHelpers(com.apple.foundationdb.tuple.TupleHelpers) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) PlanMatchers.typeFilter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.typeFilter) Matchers.containsString(org.hamcrest.Matchers.containsString) FDBIndexedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBIndexedRecord) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) RecordMetaData(com.apple.foundationdb.record.RecordMetaData) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) PlanMatchers.fetch(com.apple.foundationdb.record.query.plan.match.PlanMatchers.fetch) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) AsyncUtil(com.apple.foundationdb.async.AsyncUtil) RecordQuery(com.apple.foundationdb.record.query.RecordQuery) ComponentWithComparison(com.apple.foundationdb.record.query.expressions.ComponentWithComparison) RecordQueryPlan(com.apple.foundationdb.record.query.plan.plans.RecordQueryPlan) ArrayList(java.util.ArrayList) PlanMatchers(com.apple.foundationdb.record.query.plan.match.PlanMatchers) BunchedMap(com.apple.foundationdb.map.BunchedMap) TestLogMessageKeys(com.apple.foundationdb.record.logging.TestLogMessageKeys) LoggableException(com.apple.foundationdb.util.LoggableException) Matchers.lessThan(org.hamcrest.Matchers.lessThan) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Nullable(javax.annotation.Nullable) FDBStoredRecord(com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord) FieldWithComparison(com.apple.foundationdb.record.query.expressions.FieldWithComparison) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) SOURCE_EXHAUSTED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SOURCE_EXHAUSTED) Tags(com.apple.test.Tags) SCAN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SCAN_LIMIT_REACHED) OrComponent(com.apple.foundationdb.record.query.expressions.OrComponent) BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) FDBRecordStoreTestBase(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase) ExecutionException(java.util.concurrent.ExecutionException) AndOrComponent(com.apple.foundationdb.record.query.expressions.AndOrComponent) Comparisons(com.apple.foundationdb.record.query.expressions.Comparisons) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Index(com.apple.foundationdb.record.metadata.Index) Matcher(org.hamcrest.Matcher) PlanMatchers.unorderedUnion(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unorderedUnion) TextIndexBunchedSerializerTest.entryOf(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexBunchedSerializerTest.entryOf) IndexEntry(com.apple.foundationdb.record.IndexEntry) PlanMatchers.groupingBounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.groupingBounds) StoreTimer(com.apple.foundationdb.record.provider.common.StoreTimer) LoggerFactory(org.slf4j.LoggerFactory) BY_RANK(com.apple.foundationdb.record.IndexScanType.BY_RANK) PrefixTextTokenizer(com.apple.foundationdb.record.provider.common.text.PrefixTextTokenizer) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) Random(java.util.Random) SubspaceSplitter(com.apple.foundationdb.map.SubspaceSplitter) PlanMatchers.bounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.bounds) RecordQueryPlanner(com.apple.foundationdb.record.query.plan.RecordQueryPlanner) Tuple(com.apple.foundationdb.tuple.Tuple) KeyValueLogMessage(com.apple.foundationdb.record.logging.KeyValueLogMessage) PlanMatchers.textIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textIndexScan) TextTokenizerRegistryImpl(com.apple.foundationdb.record.provider.common.text.TextTokenizerRegistryImpl) Expressions.concatenateFields(com.apple.foundationdb.record.metadata.Key.Expressions.concatenateFields) RETURN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.RETURN_LIMIT_REACHED) FDBExceptions(com.apple.foundationdb.record.provider.foundationdb.FDBExceptions) MethodSource(org.junit.jupiter.params.provider.MethodSource) PlanMatchers.coveringIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.coveringIndexScan) ImmutableSet(com.google.common.collect.ImmutableSet) KeyValue(com.apple.foundationdb.KeyValue) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) FDBStoreTimer(com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer) ImmutableMap(com.google.common.collect.ImmutableMap) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) RecordCoreArgumentException(com.apple.foundationdb.record.RecordCoreArgumentException) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) List(java.util.List) EvaluationContext(com.apple.foundationdb.record.EvaluationContext) FDBQueriedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBQueriedRecord) Matchers.equalTo(org.hamcrest.Matchers.equalTo) MapDocument(com.apple.foundationdb.record.TestRecordsTextProto.MapDocument) IndexTypes(com.apple.foundationdb.record.metadata.IndexTypes) Matchers.anyOf(org.hamcrest.Matchers.anyOf) IntStream(java.util.stream.IntStream) PlanMatchers.primaryKeyDistinct(com.apple.foundationdb.record.query.plan.match.PlanMatchers.primaryKeyDistinct) Descriptors(com.google.protobuf.Descriptors) CompletableFuture(java.util.concurrent.CompletableFuture) BooleanNormalizer(com.apple.foundationdb.record.query.plan.planning.BooleanNormalizer) PlanHashable(com.apple.foundationdb.record.PlanHashable) PlanMatchers.filter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.filter) HashSet(java.util.HashSet) ExecuteProperties(com.apple.foundationdb.record.ExecuteProperties) FDBRecordStore(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore) ScanProperties(com.apple.foundationdb.record.ScanProperties) RecordCursorIterator(com.apple.foundationdb.record.RecordCursorIterator) DefaultTextTokenizer(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizer) BY_TEXT_TOKEN(com.apple.foundationdb.record.IndexScanType.BY_TEXT_TOKEN) BunchedMapMultiIterator(com.apple.foundationdb.map.BunchedMapMultiIterator) Nonnull(javax.annotation.Nonnull) Expressions.field(com.apple.foundationdb.record.metadata.Key.Expressions.field) EmptyKeyExpression(com.apple.foundationdb.record.metadata.expressions.EmptyKeyExpression) SIMPLE_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.SIMPLE_DOC) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) RecordMetaDataBuilder(com.apple.foundationdb.record.RecordMetaDataBuilder) RecordTypeBuilder(com.apple.foundationdb.record.metadata.RecordTypeBuilder) BY_TIME_WINDOW(com.apple.foundationdb.record.IndexScanType.BY_TIME_WINDOW) FilteringTextTokenizer(com.apple.foundationdb.record.provider.common.text.FilteringTextTokenizer) ReadTransaction(com.apple.foundationdb.ReadTransaction) Normalizer(java.text.Normalizer) Matchers.any(org.hamcrest.Matchers.any) TimeUnit(java.util.concurrent.TimeUnit) DefaultTextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizerFactory) PlanMatchers.unbounded(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unbounded) FDBDatabaseFactory(com.apple.foundationdb.record.provider.foundationdb.FDBDatabaseFactory) Message(com.google.protobuf.Message) RecordCursor(com.apple.foundationdb.record.RecordCursor) QueryComponent(com.apple.foundationdb.record.query.expressions.QueryComponent) PlanMatchers.descendant(com.apple.foundationdb.record.query.plan.match.PlanMatchers.descendant) Comparator(java.util.Comparator) Collections(java.util.Collections) AllSuffixesTextTokenizer(com.apple.foundationdb.record.provider.common.text.AllSuffixesTextTokenizer) QueryComponent(com.apple.foundationdb.record.query.expressions.QueryComponent) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) Matchers.containsString(org.hamcrest.Matchers.containsString) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) PrefixTextTokenizer(com.apple.foundationdb.record.provider.common.text.PrefixTextTokenizer) TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) DefaultTextTokenizer(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizer) FilteringTextTokenizer(com.apple.foundationdb.record.provider.common.text.FilteringTextTokenizer) AllSuffixesTextTokenizer(com.apple.foundationdb.record.provider.common.text.AllSuffixesTextTokenizer) Random(java.util.Random) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) RecordCursorIterator(com.apple.foundationdb.record.RecordCursorIterator) BunchedMapMultiIterator(com.apple.foundationdb.map.BunchedMapMultiIterator) Iterator(java.util.Iterator) HashSet(java.util.HashSet) MethodSource(org.junit.jupiter.params.provider.MethodSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 14 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method scan.

@Test
public void scan() throws Exception {
    final Random r = new Random(0x5ca1ab1e);
    List<SimpleDocument> records = getRandomRecords(r, 50);
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context);
        records.forEach(recordStore::saveRecord);
        commit(context);
    }
    final Index index = recordStore.getRecordMetaData().getIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
    // existing token
    scanWithZeroScanRecordLimit(index, "angstrom", false);
    // existing token
    scanWithZeroScanRecordLimit(index, "angstrom", true);
    // non-existent token
    scanWithZeroScanRecordLimit(index, "asdfasdf", false);
    // non-existent token
    scanWithZeroScanRecordLimit(index, "asdfasdf", true);
    final List<Integer> limits = Arrays.asList(0, 1, 2, Integer.MAX_VALUE);
    final List<Integer> skips = Arrays.asList(0, 1, 10, 1000);
    final List<String> tokens = Arrays.asList("angstrom", "the", "not_a_token_in_the_lexicon", "שפראך");
    for (int limit : limits) {
        scanMultipleWithScanRecordLimits(index, tokens, limit, false);
        scanMultipleWithScanRecordLimits(index, tokens, limit, true);
        scanWithContinuations(index, "достопримечательности", limit, false);
        scanWithContinuations(index, "достопримечательности", limit, false);
        for (int skip : skips) {
            scanWithSkip(index, "toil", skip, limit, false);
            scanWithSkip(index, "toil", skip, limit, true);
        }
    }
}
Also used : Random(java.util.Random) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) Index(com.apple.foundationdb.record.metadata.Index) Matchers.containsString(org.hamcrest.Matchers.containsString) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 15 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method saveSimpleDocumentsWithFilter.

@Test
public void saveSimpleDocumentsWithFilter() throws Exception {
    final SimpleDocument russianDocument = SimpleDocument.newBuilder().setDocId(1547L).setText(TextSamples.RUSSIAN).build();
    try (FDBRecordContext context = openContext()) {
        // Because missing tokenizer
        assertThrows(MetaDataException.class, () -> openRecordStore(context, metaDataBuilder -> {
            metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
            metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_FILTERING);
        }));
    }
    TextTokenizerRegistryImpl.instance().register(FILTERING_TOKENIZER);
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context, metaDataBuilder -> {
            metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
            metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_FILTERING);
        });
        recordStore.saveRecord(russianDocument);
        // Note that достопримечательности has been filtered out, so it's probably a
        // lot less interesting to visit.
        assertEquals(4, getSaveIndexKeyCount(recordStore));
        List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_FILTERING, Tuple.from("достопримечательности"));
        assertEquals(Collections.emptyList(), entryList);
        entryList = scanMapEntries(recordStore, SIMPLE_TEXT_FILTERING, Tuple.from("москвы"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1547L), Collections.singletonList(4))), entryList);
        commit(context);
    }
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) LogMessageKeys(com.apple.foundationdb.record.logging.LogMessageKeys) BY_GROUP(com.apple.foundationdb.record.IndexScanType.BY_GROUP) Matchers.not(org.hamcrest.Matchers.not) MetaDataException(com.apple.foundationdb.record.metadata.MetaDataException) TextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.TextTokenizerFactory) ComplexDocument(com.apple.foundationdb.record.TestRecordsTextProto.ComplexDocument) Subspace(com.apple.foundationdb.subspace.Subspace) TextSamples(com.apple.foundationdb.record.provider.common.text.TextSamples) RecordCursorResult(com.apple.foundationdb.record.RecordCursorResult) Pair(org.apache.commons.lang3.tuple.Pair) RecordCoreException(com.apple.foundationdb.record.RecordCoreException) COMPLEX_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.COMPLEX_DOC) VersionKeyExpression(com.apple.foundationdb.record.metadata.expressions.VersionKeyExpression) Map(java.util.Map) Expressions.concat(com.apple.foundationdb.record.metadata.Key.Expressions.concat) GroupingKeyExpression(com.apple.foundationdb.record.metadata.expressions.GroupingKeyExpression) Tag(org.junit.jupiter.api.Tag) Query(com.apple.foundationdb.record.query.expressions.Query) KeyExpression(com.apple.foundationdb.record.metadata.expressions.KeyExpression) IndexOptions(com.apple.foundationdb.record.metadata.IndexOptions) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Matchers.allOf(org.hamcrest.Matchers.allOf) Set(java.util.Set) PlanMatchers.textComparison(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textComparison) FanType(com.apple.foundationdb.record.metadata.expressions.KeyExpression.FanType) Arguments(org.junit.jupiter.params.provider.Arguments) BY_VALUE(com.apple.foundationdb.record.IndexScanType.BY_VALUE) TupleRange(com.apple.foundationdb.record.TupleRange) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestRecordsTextProto(com.apple.foundationdb.record.TestRecordsTextProto) Stream(java.util.stream.Stream) PlanMatchers.indexName(com.apple.foundationdb.record.query.plan.match.PlanMatchers.indexName) Matchers.anything(org.hamcrest.Matchers.anything) Matchers.contains(org.hamcrest.Matchers.contains) TupleHelpers(com.apple.foundationdb.tuple.TupleHelpers) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) PlanMatchers.typeFilter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.typeFilter) Matchers.containsString(org.hamcrest.Matchers.containsString) FDBIndexedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBIndexedRecord) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) RecordMetaData(com.apple.foundationdb.record.RecordMetaData) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) PlanMatchers.fetch(com.apple.foundationdb.record.query.plan.match.PlanMatchers.fetch) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) AsyncUtil(com.apple.foundationdb.async.AsyncUtil) RecordQuery(com.apple.foundationdb.record.query.RecordQuery) ComponentWithComparison(com.apple.foundationdb.record.query.expressions.ComponentWithComparison) RecordQueryPlan(com.apple.foundationdb.record.query.plan.plans.RecordQueryPlan) ArrayList(java.util.ArrayList) PlanMatchers(com.apple.foundationdb.record.query.plan.match.PlanMatchers) BunchedMap(com.apple.foundationdb.map.BunchedMap) TestLogMessageKeys(com.apple.foundationdb.record.logging.TestLogMessageKeys) LoggableException(com.apple.foundationdb.util.LoggableException) Matchers.lessThan(org.hamcrest.Matchers.lessThan) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Nullable(javax.annotation.Nullable) FDBStoredRecord(com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord) FieldWithComparison(com.apple.foundationdb.record.query.expressions.FieldWithComparison) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) SOURCE_EXHAUSTED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SOURCE_EXHAUSTED) Tags(com.apple.test.Tags) SCAN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SCAN_LIMIT_REACHED) OrComponent(com.apple.foundationdb.record.query.expressions.OrComponent) BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) FDBRecordStoreTestBase(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase) ExecutionException(java.util.concurrent.ExecutionException) AndOrComponent(com.apple.foundationdb.record.query.expressions.AndOrComponent) Comparisons(com.apple.foundationdb.record.query.expressions.Comparisons) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Index(com.apple.foundationdb.record.metadata.Index) Matcher(org.hamcrest.Matcher) PlanMatchers.unorderedUnion(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unorderedUnion) TextIndexBunchedSerializerTest.entryOf(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexBunchedSerializerTest.entryOf) IndexEntry(com.apple.foundationdb.record.IndexEntry) PlanMatchers.groupingBounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.groupingBounds) StoreTimer(com.apple.foundationdb.record.provider.common.StoreTimer) LoggerFactory(org.slf4j.LoggerFactory) BY_RANK(com.apple.foundationdb.record.IndexScanType.BY_RANK) PrefixTextTokenizer(com.apple.foundationdb.record.provider.common.text.PrefixTextTokenizer) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) Random(java.util.Random) SubspaceSplitter(com.apple.foundationdb.map.SubspaceSplitter) PlanMatchers.bounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.bounds) RecordQueryPlanner(com.apple.foundationdb.record.query.plan.RecordQueryPlanner) Tuple(com.apple.foundationdb.tuple.Tuple) KeyValueLogMessage(com.apple.foundationdb.record.logging.KeyValueLogMessage) PlanMatchers.textIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textIndexScan) TextTokenizerRegistryImpl(com.apple.foundationdb.record.provider.common.text.TextTokenizerRegistryImpl) Expressions.concatenateFields(com.apple.foundationdb.record.metadata.Key.Expressions.concatenateFields) RETURN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.RETURN_LIMIT_REACHED) FDBExceptions(com.apple.foundationdb.record.provider.foundationdb.FDBExceptions) MethodSource(org.junit.jupiter.params.provider.MethodSource) PlanMatchers.coveringIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.coveringIndexScan) ImmutableSet(com.google.common.collect.ImmutableSet) KeyValue(com.apple.foundationdb.KeyValue) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) FDBStoreTimer(com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer) ImmutableMap(com.google.common.collect.ImmutableMap) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) RecordCoreArgumentException(com.apple.foundationdb.record.RecordCoreArgumentException) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) List(java.util.List) EvaluationContext(com.apple.foundationdb.record.EvaluationContext) FDBQueriedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBQueriedRecord) Matchers.equalTo(org.hamcrest.Matchers.equalTo) MapDocument(com.apple.foundationdb.record.TestRecordsTextProto.MapDocument) IndexTypes(com.apple.foundationdb.record.metadata.IndexTypes) Matchers.anyOf(org.hamcrest.Matchers.anyOf) IntStream(java.util.stream.IntStream) PlanMatchers.primaryKeyDistinct(com.apple.foundationdb.record.query.plan.match.PlanMatchers.primaryKeyDistinct) Descriptors(com.google.protobuf.Descriptors) CompletableFuture(java.util.concurrent.CompletableFuture) BooleanNormalizer(com.apple.foundationdb.record.query.plan.planning.BooleanNormalizer) PlanHashable(com.apple.foundationdb.record.PlanHashable) PlanMatchers.filter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.filter) HashSet(java.util.HashSet) ExecuteProperties(com.apple.foundationdb.record.ExecuteProperties) FDBRecordStore(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore) ScanProperties(com.apple.foundationdb.record.ScanProperties) RecordCursorIterator(com.apple.foundationdb.record.RecordCursorIterator) DefaultTextTokenizer(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizer) BY_TEXT_TOKEN(com.apple.foundationdb.record.IndexScanType.BY_TEXT_TOKEN) BunchedMapMultiIterator(com.apple.foundationdb.map.BunchedMapMultiIterator) Nonnull(javax.annotation.Nonnull) Expressions.field(com.apple.foundationdb.record.metadata.Key.Expressions.field) EmptyKeyExpression(com.apple.foundationdb.record.metadata.expressions.EmptyKeyExpression) SIMPLE_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.SIMPLE_DOC) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) RecordMetaDataBuilder(com.apple.foundationdb.record.RecordMetaDataBuilder) RecordTypeBuilder(com.apple.foundationdb.record.metadata.RecordTypeBuilder) BY_TIME_WINDOW(com.apple.foundationdb.record.IndexScanType.BY_TIME_WINDOW) FilteringTextTokenizer(com.apple.foundationdb.record.provider.common.text.FilteringTextTokenizer) ReadTransaction(com.apple.foundationdb.ReadTransaction) Normalizer(java.text.Normalizer) Matchers.any(org.hamcrest.Matchers.any) TimeUnit(java.util.concurrent.TimeUnit) DefaultTextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizerFactory) PlanMatchers.unbounded(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unbounded) FDBDatabaseFactory(com.apple.foundationdb.record.provider.foundationdb.FDBDatabaseFactory) Message(com.google.protobuf.Message) RecordCursor(com.apple.foundationdb.record.RecordCursor) QueryComponent(com.apple.foundationdb.record.query.expressions.QueryComponent) PlanMatchers.descendant(com.apple.foundationdb.record.query.plan.match.PlanMatchers.descendant) Comparator(java.util.Comparator) Collections(java.util.Collections) AllSuffixesTextTokenizer(com.apple.foundationdb.record.provider.common.text.AllSuffixesTextTokenizer) BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) IndexEntry(com.apple.foundationdb.record.IndexEntry) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Aggregations

SimpleDocument (com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument)23 FDBRecordContext (com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext)22 Test (org.junit.jupiter.api.Test)22 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)22 BunchedMapScanEntry (com.apple.foundationdb.map.BunchedMapScanEntry)12 IndexEntry (com.apple.foundationdb.record.IndexEntry)12 Random (java.util.Random)11 PlanMatchers.hasTupleString (com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString)9 Matchers.containsString (org.hamcrest.Matchers.containsString)9 Tag (org.junit.jupiter.api.Tag)9 Index (com.apple.foundationdb.record.metadata.Index)8 ExecuteProperties (com.apple.foundationdb.record.ExecuteProperties)6 RecordCoreArgumentException (com.apple.foundationdb.record.RecordCoreArgumentException)6 RecordCoreException (com.apple.foundationdb.record.RecordCoreException)6 KeyValue (com.apple.foundationdb.KeyValue)5 ReadTransaction (com.apple.foundationdb.ReadTransaction)5 AsyncUtil (com.apple.foundationdb.async.AsyncUtil)5 BunchedMap (com.apple.foundationdb.map.BunchedMap)5 BunchedMapMultiIterator (com.apple.foundationdb.map.BunchedMapMultiIterator)5 SubspaceSplitter (com.apple.foundationdb.map.SubspaceSplitter)5