Search in sources :

Example 16 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method querySimpleDocumentsMaybeCovering.

@Test
public void querySimpleDocumentsMaybeCovering() throws Exception {
    final List<SimpleDocument> documents = TextIndexTestUtils.toSimpleDocuments(Arrays.asList(TextSamples.ANGSTROM, TextSamples.AETHELRED, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.FRENCH));
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context);
        documents.forEach(recordStore::saveRecord);
        final QueryComponent filter1 = Query.field("text").text().containsPhrase("civil blood makes civil hands unclean");
        final Comparisons.Comparison comparison1 = new Comparisons.TextComparison(Comparisons.Type.TEXT_CONTAINS_PHRASE, "civil blood makes civil hands unclean", null, DefaultTextTokenizer.NAME);
        final QueryComponent filter2 = Query.field("text").text().containsPrefix("th");
        final Comparisons.Comparison comparison2 = new Comparisons.TextComparison(Comparisons.Type.TEXT_CONTAINS_PREFIX, Collections.singletonList("th"), null, DefaultTextTokenizer.NAME);
        // Query for full records
        RecordQuery query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setFilter(filter1).build();
        // TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null)
        RecordQueryPlan plan = planner.plan(query);
        assertThat(plan, textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1)))));
        assertEquals(814602491, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(1101247748, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(-1215587201, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        List<Long> primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
        assertEquals(Collections.singletonList(2L), primaryKeys);
        query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setFilter(filter2).build();
        // TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) | UnorderedPrimaryKeyDistinct()
        plan = planner.plan(query);
        assertThat(plan, primaryKeyDistinct(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2))))));
        assertEquals(1032989149, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(-1513880131, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(-1570861632, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
        assertEquals(Arrays.asList(0L, 1L, 2L, 3L), primaryKeys);
        // Query for just primary key
        query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(filter1).build();
        // Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null) -> [doc_id: KEY[1]])
        plan = planner.plan(query);
        assertThat(plan, coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1))))));
        assertEquals(814602491, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(-786467136, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(1191665211, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
        assertEquals(Collections.singletonList(2L), primaryKeys);
        query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(filter2).build();
        // Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) -> [doc_id: KEY[1]]) | UnorderedPrimaryKeyDistinct()
        plan = planner.plan(query);
        assertThat(plan, primaryKeyDistinct(coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2)))))));
        assertEquals(1032989149, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(893372281, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(836390780, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
        assertEquals(Arrays.asList(0L, 1L, 2L, 3L), primaryKeys);
        // Query for primary key but also have a filter on something outside the index
        query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(Query.and(filter1, Query.field("group").equalsValue(0L))).build();
        // Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) -> [doc_id: KEY[1]]) | UnorderedPrimaryKeyDistinct()
        plan = planner.plan(query);
        assertThat(plan, filter(Query.field("group").equalsValue(0L), textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1))))));
        assertEquals(-1328921799, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(390154904, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(-611539723, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
        assertEquals(Collections.singletonList(2L), primaryKeys);
        query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(Query.and(filter2, Query.field("group").equalsValue(0L))).build();
        // TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null) | group EQUALS 0
        plan = planner.plan(query);
        System.out.println(plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        System.out.println(plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        System.out.println(plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        assertThat(plan, filter(Query.field("group").equalsValue(0L), fetch(primaryKeyDistinct(coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2)))))))));
        assertEquals(792432470, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(-879354804, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(-545069279, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
        assertEquals(Arrays.asList(0L, 2L), primaryKeys);
        // Query for the text field, which produces the first token that matches
        // Arguably, this should produce an error, but that requires a more sophisticated
        // check when trying to determine if the index covers the query
        final Descriptors.FieldDescriptor docIdDescriptor = SimpleDocument.getDescriptor().findFieldByNumber(SimpleDocument.DOC_ID_FIELD_NUMBER);
        final Descriptors.FieldDescriptor textDescriptor = SimpleDocument.getDescriptor().findFieldByNumber(SimpleDocument.TEXT_FIELD_NUMBER);
        query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("text"))).setFilter(filter1).build();
        // Fetch(Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) -> [doc_id: KEY[1]]) | UnorderedPrimaryKeyDistinct()) | group EQUALS 0
        plan = planner.plan(query);
        assertThat(plan, textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1)))));
        assertEquals(814602491, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(1101247748, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(-1215587201, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        List<Tuple> idTextTuples = recordStore.executeQuery(plan).map(record -> {
            final Object docId = record.getRecord().getField(docIdDescriptor);
            final Object text = record.getRecord().getField(textDescriptor);
            return Tuple.from(docId, text);
        }).asList().get();
        assertEquals(Collections.singletonList(Tuple.from(2L, TextSamples.ROMEO_AND_JULIET_PROLOGUE)), idTextTuples);
        query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("text"))).setFilter(filter2).build();
        // TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null)
        plan = planner.plan(query);
        assertThat(plan, fetch(primaryKeyDistinct(coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2))))))));
        assertEquals(-1359010536, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
        assertEquals(-1017914160, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
        assertEquals(-1074895661, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
        idTextTuples = recordStore.executeQuery(plan).map(record -> {
            final Object docId = record.getRecord().getField(docIdDescriptor);
            final Object text = record.getRecord().getField(textDescriptor);
            return Tuple.from(docId, text);
        }).asList().get();
        assertEquals(Arrays.asList(Tuple.from(0L, TextSamples.ANGSTROM), Tuple.from(1L, TextSamples.AETHELRED), Tuple.from(2L, TextSamples.ROMEO_AND_JULIET_PROLOGUE), Tuple.from(3L, TextSamples.FRENCH)), idTextTuples);
        commit(context);
    }
}
Also used : RecordQueryPlan(com.apple.foundationdb.record.query.plan.plans.RecordQueryPlan) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) LogMessageKeys(com.apple.foundationdb.record.logging.LogMessageKeys) BY_GROUP(com.apple.foundationdb.record.IndexScanType.BY_GROUP) Matchers.not(org.hamcrest.Matchers.not) MetaDataException(com.apple.foundationdb.record.metadata.MetaDataException) TextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.TextTokenizerFactory) ComplexDocument(com.apple.foundationdb.record.TestRecordsTextProto.ComplexDocument) Subspace(com.apple.foundationdb.subspace.Subspace) TextSamples(com.apple.foundationdb.record.provider.common.text.TextSamples) RecordCursorResult(com.apple.foundationdb.record.RecordCursorResult) Pair(org.apache.commons.lang3.tuple.Pair) RecordCoreException(com.apple.foundationdb.record.RecordCoreException) COMPLEX_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.COMPLEX_DOC) VersionKeyExpression(com.apple.foundationdb.record.metadata.expressions.VersionKeyExpression) Map(java.util.Map) Expressions.concat(com.apple.foundationdb.record.metadata.Key.Expressions.concat) GroupingKeyExpression(com.apple.foundationdb.record.metadata.expressions.GroupingKeyExpression) Tag(org.junit.jupiter.api.Tag) Query(com.apple.foundationdb.record.query.expressions.Query) KeyExpression(com.apple.foundationdb.record.metadata.expressions.KeyExpression) IndexOptions(com.apple.foundationdb.record.metadata.IndexOptions) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Matchers.allOf(org.hamcrest.Matchers.allOf) Set(java.util.Set) PlanMatchers.textComparison(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textComparison) FanType(com.apple.foundationdb.record.metadata.expressions.KeyExpression.FanType) Arguments(org.junit.jupiter.params.provider.Arguments) BY_VALUE(com.apple.foundationdb.record.IndexScanType.BY_VALUE) TupleRange(com.apple.foundationdb.record.TupleRange) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestRecordsTextProto(com.apple.foundationdb.record.TestRecordsTextProto) Stream(java.util.stream.Stream) PlanMatchers.indexName(com.apple.foundationdb.record.query.plan.match.PlanMatchers.indexName) Matchers.anything(org.hamcrest.Matchers.anything) Matchers.contains(org.hamcrest.Matchers.contains) TupleHelpers(com.apple.foundationdb.tuple.TupleHelpers) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) PlanMatchers.typeFilter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.typeFilter) Matchers.containsString(org.hamcrest.Matchers.containsString) FDBIndexedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBIndexedRecord) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) RecordMetaData(com.apple.foundationdb.record.RecordMetaData) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) PlanMatchers.fetch(com.apple.foundationdb.record.query.plan.match.PlanMatchers.fetch) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) AsyncUtil(com.apple.foundationdb.async.AsyncUtil) RecordQuery(com.apple.foundationdb.record.query.RecordQuery) ComponentWithComparison(com.apple.foundationdb.record.query.expressions.ComponentWithComparison) RecordQueryPlan(com.apple.foundationdb.record.query.plan.plans.RecordQueryPlan) ArrayList(java.util.ArrayList) PlanMatchers(com.apple.foundationdb.record.query.plan.match.PlanMatchers) BunchedMap(com.apple.foundationdb.map.BunchedMap) TestLogMessageKeys(com.apple.foundationdb.record.logging.TestLogMessageKeys) LoggableException(com.apple.foundationdb.util.LoggableException) Matchers.lessThan(org.hamcrest.Matchers.lessThan) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Nullable(javax.annotation.Nullable) FDBStoredRecord(com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord) FieldWithComparison(com.apple.foundationdb.record.query.expressions.FieldWithComparison) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) SOURCE_EXHAUSTED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SOURCE_EXHAUSTED) Tags(com.apple.test.Tags) SCAN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.SCAN_LIMIT_REACHED) OrComponent(com.apple.foundationdb.record.query.expressions.OrComponent) BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) FDBRecordStoreTestBase(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase) ExecutionException(java.util.concurrent.ExecutionException) AndOrComponent(com.apple.foundationdb.record.query.expressions.AndOrComponent) Comparisons(com.apple.foundationdb.record.query.expressions.Comparisons) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Index(com.apple.foundationdb.record.metadata.Index) Matcher(org.hamcrest.Matcher) PlanMatchers.unorderedUnion(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unorderedUnion) TextIndexBunchedSerializerTest.entryOf(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexBunchedSerializerTest.entryOf) IndexEntry(com.apple.foundationdb.record.IndexEntry) PlanMatchers.groupingBounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.groupingBounds) StoreTimer(com.apple.foundationdb.record.provider.common.StoreTimer) LoggerFactory(org.slf4j.LoggerFactory) BY_RANK(com.apple.foundationdb.record.IndexScanType.BY_RANK) PrefixTextTokenizer(com.apple.foundationdb.record.provider.common.text.PrefixTextTokenizer) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) Random(java.util.Random) SubspaceSplitter(com.apple.foundationdb.map.SubspaceSplitter) PlanMatchers.bounds(com.apple.foundationdb.record.query.plan.match.PlanMatchers.bounds) RecordQueryPlanner(com.apple.foundationdb.record.query.plan.RecordQueryPlanner) Tuple(com.apple.foundationdb.tuple.Tuple) KeyValueLogMessage(com.apple.foundationdb.record.logging.KeyValueLogMessage) PlanMatchers.textIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.textIndexScan) TextTokenizerRegistryImpl(com.apple.foundationdb.record.provider.common.text.TextTokenizerRegistryImpl) Expressions.concatenateFields(com.apple.foundationdb.record.metadata.Key.Expressions.concatenateFields) RETURN_LIMIT_REACHED(com.apple.foundationdb.record.RecordCursor.NoNextReason.RETURN_LIMIT_REACHED) FDBExceptions(com.apple.foundationdb.record.provider.foundationdb.FDBExceptions) MethodSource(org.junit.jupiter.params.provider.MethodSource) PlanMatchers.coveringIndexScan(com.apple.foundationdb.record.query.plan.match.PlanMatchers.coveringIndexScan) ImmutableSet(com.google.common.collect.ImmutableSet) KeyValue(com.apple.foundationdb.KeyValue) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) FDBStoreTimer(com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer) ImmutableMap(com.google.common.collect.ImmutableMap) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) RecordCoreArgumentException(com.apple.foundationdb.record.RecordCoreArgumentException) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) TextTokenizer(com.apple.foundationdb.record.provider.common.text.TextTokenizer) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) List(java.util.List) EvaluationContext(com.apple.foundationdb.record.EvaluationContext) FDBQueriedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBQueriedRecord) Matchers.equalTo(org.hamcrest.Matchers.equalTo) MapDocument(com.apple.foundationdb.record.TestRecordsTextProto.MapDocument) IndexTypes(com.apple.foundationdb.record.metadata.IndexTypes) Matchers.anyOf(org.hamcrest.Matchers.anyOf) IntStream(java.util.stream.IntStream) PlanMatchers.primaryKeyDistinct(com.apple.foundationdb.record.query.plan.match.PlanMatchers.primaryKeyDistinct) Descriptors(com.google.protobuf.Descriptors) CompletableFuture(java.util.concurrent.CompletableFuture) BooleanNormalizer(com.apple.foundationdb.record.query.plan.planning.BooleanNormalizer) PlanHashable(com.apple.foundationdb.record.PlanHashable) PlanMatchers.filter(com.apple.foundationdb.record.query.plan.match.PlanMatchers.filter) HashSet(java.util.HashSet) ExecuteProperties(com.apple.foundationdb.record.ExecuteProperties) FDBRecordStore(com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore) ScanProperties(com.apple.foundationdb.record.ScanProperties) RecordCursorIterator(com.apple.foundationdb.record.RecordCursorIterator) DefaultTextTokenizer(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizer) BY_TEXT_TOKEN(com.apple.foundationdb.record.IndexScanType.BY_TEXT_TOKEN) BunchedMapMultiIterator(com.apple.foundationdb.map.BunchedMapMultiIterator) Nonnull(javax.annotation.Nonnull) Expressions.field(com.apple.foundationdb.record.metadata.Key.Expressions.field) EmptyKeyExpression(com.apple.foundationdb.record.metadata.expressions.EmptyKeyExpression) SIMPLE_DOC(com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.SIMPLE_DOC) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) RecordMetaDataBuilder(com.apple.foundationdb.record.RecordMetaDataBuilder) RecordTypeBuilder(com.apple.foundationdb.record.metadata.RecordTypeBuilder) BY_TIME_WINDOW(com.apple.foundationdb.record.IndexScanType.BY_TIME_WINDOW) FilteringTextTokenizer(com.apple.foundationdb.record.provider.common.text.FilteringTextTokenizer) ReadTransaction(com.apple.foundationdb.ReadTransaction) Normalizer(java.text.Normalizer) Matchers.any(org.hamcrest.Matchers.any) TimeUnit(java.util.concurrent.TimeUnit) DefaultTextTokenizerFactory(com.apple.foundationdb.record.provider.common.text.DefaultTextTokenizerFactory) PlanMatchers.unbounded(com.apple.foundationdb.record.query.plan.match.PlanMatchers.unbounded) FDBDatabaseFactory(com.apple.foundationdb.record.provider.foundationdb.FDBDatabaseFactory) Message(com.google.protobuf.Message) RecordCursor(com.apple.foundationdb.record.RecordCursor) QueryComponent(com.apple.foundationdb.record.query.expressions.QueryComponent) PlanMatchers.descendant(com.apple.foundationdb.record.query.plan.match.PlanMatchers.descendant) Comparator(java.util.Comparator) Collections(java.util.Collections) AllSuffixesTextTokenizer(com.apple.foundationdb.record.provider.common.text.AllSuffixesTextTokenizer) QueryComponent(com.apple.foundationdb.record.query.expressions.QueryComponent) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) FDBQueriedRecord(com.apple.foundationdb.record.provider.foundationdb.FDBQueriedRecord) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) Comparisons(com.apple.foundationdb.record.query.expressions.Comparisons) Descriptors(com.google.protobuf.Descriptors) RecordQuery(com.apple.foundationdb.record.query.RecordQuery) Tuple(com.apple.foundationdb.tuple.Tuple) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 17 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method textIndexPerf1000SerialInsert.

@Tag(Tags.Performance)
@Test
public void textIndexPerf1000SerialInsert() throws Exception {
    // Create 1000 records
    Random r = new Random();
    List<SimpleDocument> records = getRandomRecords(r, 1000);
    long startTime = System.nanoTime();
    for (int i = 0; i < records.size(); i += 10) {
        try (FDBRecordContext context = openContext()) {
            openRecordStore(context);
            for (SimpleDocument document : records.subList(i, i + 10)) {
                recordStore.saveRecord(document);
            }
            commit(context);
        }
    }
    long endTime = System.nanoTime();
    LOGGER.info("performed 1000 serial insertions in {} seconds.", (endTime - startTime) * 1e-9);
    printUsage();
}
Also used : Random(java.util.Random) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test) Tag(org.junit.jupiter.api.Tag)

Example 18 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method saveSimpleDocumentsWithSuffixes.

@Test
public void saveSimpleDocumentsWithSuffixes() throws Exception {
    final SimpleDocument germanDocument = SimpleDocument.newBuilder().setDocId(1623L).setText(TextSamples.GERMAN).setGroup(2).build();
    final SimpleDocument russianDocument = SimpleDocument.newBuilder().setDocId(1547L).setText(TextSamples.RUSSIAN).build();
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context, metaDataBuilder -> {
            metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
            metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_SUFFIXES);
        });
        recordStore.saveRecord(germanDocument);
        assertEquals(82, getSaveIndexKeyCount(recordStore));
        recordStore.saveRecord(russianDocument);
        assertEquals(82 + 45, getSaveIndexKeyCount(recordStore));
        List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_SUFFIXES, Tuple.from("mannschaft"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Collections.singletonList(11))), entryList);
        entryList = scanMapEntries(recordStore, SIMPLE_TEXT_SUFFIXES, Tuple.from("schaft"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(15, 38))), entryList);
        entryList = scanMapEntries(recordStore, SIMPLE_TEXT_SUFFIXES, Tuple.from("ности"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1547L), Collections.singletonList(34))), entryList);
        commit(context);
    }
}
Also used : BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) IndexEntry(com.apple.foundationdb.record.IndexEntry) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 19 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method saveSimpleDocumentsWithPrefix.

@Test
public void saveSimpleDocumentsWithPrefix() throws Exception {
    final SimpleDocument shakespeareDocument = SimpleDocument.newBuilder().setDocId(1623L).setText(TextSamples.ROMEO_AND_JULIET_PROLOGUE).setGroup(2).build();
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context, metaDataBuilder -> {
            metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
            metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_PREFIX_LEGACY);
        });
        recordStore.saveRecord(shakespeareDocument);
        assertEquals(74, getSaveIndexKeyCount(recordStore));
        List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_PREFIX_LEGACY, Tuple.from("par"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(57, 72))), entryList);
        commit(context);
    }
    try (FDBRecordContext context = openContext()) {
        FDBRecordStore.deleteStore(context, recordStore.getSubspace());
        commit(context);
    }
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context, metaDataBuilder -> {
            metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
            metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_PREFIX);
        });
        recordStore.saveRecord(shakespeareDocument);
        assertEquals(79, getSaveIndexKeyCount(recordStore));
        List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_PREFIX, Tuple.from("par"));
        assertEquals(Collections.emptyList(), entryList);
        entryList = scanMapEntries(recordStore, SIMPLE_TEXT_PREFIX, Tuple.from("pare"));
        assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(57, 72))), entryList);
        commit(context);
    }
}
Also used : BunchedMapScanEntry(com.apple.foundationdb.map.BunchedMapScanEntry) IndexEntry(com.apple.foundationdb.record.IndexEntry) FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 20 with SimpleDocument

use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.

the class TextIndexTest method querySimpleDocumentsWithDifferentTokenizers.

@Test
public void querySimpleDocumentsWithDifferentTokenizers() throws Exception {
    final List<SimpleDocument> documents = TextIndexTestUtils.toSimpleDocuments(Arrays.asList(TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.RUSSIAN, TextSamples.GERMAN, TextSamples.KOREAN));
    TextTokenizerRegistryImpl.instance().register(FILTERING_TOKENIZER);
    try (FDBRecordContext context = openContext()) {
        openRecordStore(context, metaDataBuilder -> {
            final RecordTypeBuilder simpleDocRecordType = metaDataBuilder.getRecordType(SIMPLE_DOC);
            metaDataBuilder.addIndex(simpleDocRecordType, SIMPLE_TEXT_PREFIX);
            metaDataBuilder.addIndex(simpleDocRecordType, SIMPLE_TEXT_FILTERING);
            metaDataBuilder.addIndex(simpleDocRecordType, SIMPLE_TEXT_SUFFIXES);
        });
        documents.forEach(recordStore::saveRecord);
        // Filtering tokenizer
        final String filteringTokenizerName = FILTERING_TOKENIZER.getName();
        assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).contains("weltmeisterschaft"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -1172646540, true));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).contains("weltmeisterschaft"), SIMPLE_TEXT_FILTERING.getName(), 835135314, true));
        assertEquals(Collections.singletonList(1L), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).contains("достопримечательности"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -1291535616, true));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).contains("достопримечательности"), SIMPLE_TEXT_FILTERING.getName(), 716246238, true));
        assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).containsAll("Weltmeisterschaft gewonnen"), SIMPLE_TEXT_FILTERING.getName(), 696188882, true));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).containsAll(Arrays.asList("weltmeisterschaft", "gewonnen")), SIMPLE_TEXT_FILTERING.getName(), 1945779923, true));
        assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAll("Weltmeisterschaft Nationalmannschaft Friedrichstraße"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 625333664, true));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).containsAll("Weltmeisterschaft Nationalmannschaft Friedrichstraße"), SIMPLE_TEXT_FILTERING.getName(), -1661851778, true));
        // Prefix tokenizer
        final String prefixTokenizerName = PrefixTextTokenizer.NAME;
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAny("civic lover"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 1358697044, true));
        assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text(prefixTokenizerName).containsAll("civic lover"), SIMPLE_TEXT_PREFIX.getName(), 2070491434, true));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAll("못핵"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -1414597326, true));
        assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithIndex(Query.field("text").text(prefixTokenizerName).containsAll("못핵"), SIMPLE_TEXT_PREFIX.getName(), 1444383389, true));
        // Suffixes tokenizer
        // Note that prefix scans using the suffixes tokenizer are equivalent to infix searches on the original tokens
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsPrefix("meister"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -2049073113, true));
        assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsPrefix("meister"), SIMPLE_TEXT_SUFFIXES.getName(), -628393471, true));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAnyPrefix("meister ivi"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 279029713, true));
        assertEquals(ImmutableSet.of(0L, 2L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAnyPrefix("meister ivi"), SIMPLE_TEXT_SUFFIXES.getName(), 1699709355, true)));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAllPrefixes("meister won", false), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 993745490, true));
        assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAllPrefixes("meister won", false), SIMPLE_TEXT_SUFFIXES.getName(), -1880542164, true));
        assertEquals(Arrays.asList(0L, 2L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAny("y e"), SIMPLE_TEXT_SUFFIXES.getName(), -1665999070, true));
        assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAny("bloody civilize"), SIMPLE_TEXT_SUFFIXES.getName(), 1290016358, true));
        assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAll("ood ivil nds"), SIMPLE_TEXT_SUFFIXES.getName(), -1619880168, true));
        commit(context);
    }
}
Also used : FDBRecordContext(com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext) SimpleDocument(com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument) Matchers.containsString(org.hamcrest.Matchers.containsString) PlanMatchers.hasTupleString(com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString) RecordTypeBuilder(com.apple.foundationdb.record.metadata.RecordTypeBuilder) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Aggregations

SimpleDocument (com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument)23 FDBRecordContext (com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext)22 Test (org.junit.jupiter.api.Test)22 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)22 BunchedMapScanEntry (com.apple.foundationdb.map.BunchedMapScanEntry)12 IndexEntry (com.apple.foundationdb.record.IndexEntry)12 Random (java.util.Random)11 PlanMatchers.hasTupleString (com.apple.foundationdb.record.query.plan.match.PlanMatchers.hasTupleString)9 Matchers.containsString (org.hamcrest.Matchers.containsString)9 Tag (org.junit.jupiter.api.Tag)9 Index (com.apple.foundationdb.record.metadata.Index)8 ExecuteProperties (com.apple.foundationdb.record.ExecuteProperties)6 RecordCoreArgumentException (com.apple.foundationdb.record.RecordCoreArgumentException)6 RecordCoreException (com.apple.foundationdb.record.RecordCoreException)6 KeyValue (com.apple.foundationdb.KeyValue)5 ReadTransaction (com.apple.foundationdb.ReadTransaction)5 AsyncUtil (com.apple.foundationdb.async.AsyncUtil)5 BunchedMap (com.apple.foundationdb.map.BunchedMap)5 BunchedMapMultiIterator (com.apple.foundationdb.map.BunchedMapMultiIterator)5 SubspaceSplitter (com.apple.foundationdb.map.SubspaceSplitter)5