use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method querySimpleDocumentsMaybeCovering.
@Test
public void querySimpleDocumentsMaybeCovering() throws Exception {
final List<SimpleDocument> documents = TextIndexTestUtils.toSimpleDocuments(Arrays.asList(TextSamples.ANGSTROM, TextSamples.AETHELRED, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.FRENCH));
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
documents.forEach(recordStore::saveRecord);
final QueryComponent filter1 = Query.field("text").text().containsPhrase("civil blood makes civil hands unclean");
final Comparisons.Comparison comparison1 = new Comparisons.TextComparison(Comparisons.Type.TEXT_CONTAINS_PHRASE, "civil blood makes civil hands unclean", null, DefaultTextTokenizer.NAME);
final QueryComponent filter2 = Query.field("text").text().containsPrefix("th");
final Comparisons.Comparison comparison2 = new Comparisons.TextComparison(Comparisons.Type.TEXT_CONTAINS_PREFIX, Collections.singletonList("th"), null, DefaultTextTokenizer.NAME);
// Query for full records
RecordQuery query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setFilter(filter1).build();
// TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null)
RecordQueryPlan plan = planner.plan(query);
assertThat(plan, textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1)))));
assertEquals(814602491, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(1101247748, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(-1215587201, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
List<Long> primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
assertEquals(Collections.singletonList(2L), primaryKeys);
query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setFilter(filter2).build();
// TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) | UnorderedPrimaryKeyDistinct()
plan = planner.plan(query);
assertThat(plan, primaryKeyDistinct(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2))))));
assertEquals(1032989149, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(-1513880131, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(-1570861632, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
assertEquals(Arrays.asList(0L, 1L, 2L, 3L), primaryKeys);
// Query for just primary key
query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(filter1).build();
// Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null) -> [doc_id: KEY[1]])
plan = planner.plan(query);
assertThat(plan, coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1))))));
assertEquals(814602491, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(-786467136, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(1191665211, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
assertEquals(Collections.singletonList(2L), primaryKeys);
query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(filter2).build();
// Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) -> [doc_id: KEY[1]]) | UnorderedPrimaryKeyDistinct()
plan = planner.plan(query);
assertThat(plan, primaryKeyDistinct(coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2)))))));
assertEquals(1032989149, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(893372281, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(836390780, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
assertEquals(Arrays.asList(0L, 1L, 2L, 3L), primaryKeys);
// Query for primary key but also have a filter on something outside the index
query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(Query.and(filter1, Query.field("group").equalsValue(0L))).build();
// Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) -> [doc_id: KEY[1]]) | UnorderedPrimaryKeyDistinct()
plan = planner.plan(query);
assertThat(plan, filter(Query.field("group").equalsValue(0L), textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1))))));
assertEquals(-1328921799, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(390154904, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(-611539723, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
assertEquals(Collections.singletonList(2L), primaryKeys);
query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("doc_id"))).setFilter(Query.and(filter2, Query.field("group").equalsValue(0L))).build();
// TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null) | group EQUALS 0
plan = planner.plan(query);
System.out.println(plan.planHash(PlanHashable.PlanHashKind.LEGACY));
System.out.println(plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
System.out.println(plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
assertThat(plan, filter(Query.field("group").equalsValue(0L), fetch(primaryKeyDistinct(coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2)))))))));
assertEquals(792432470, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(-879354804, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(-545069279, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
primaryKeys = recordStore.executeQuery(plan).map(FDBQueriedRecord::getPrimaryKey).map(t -> t.getLong(0)).asList().get();
assertEquals(Arrays.asList(0L, 2L), primaryKeys);
// Query for the text field, which produces the first token that matches
// Arguably, this should produce an error, but that requires a more sophisticated
// check when trying to determine if the index covers the query
final Descriptors.FieldDescriptor docIdDescriptor = SimpleDocument.getDescriptor().findFieldByNumber(SimpleDocument.DOC_ID_FIELD_NUMBER);
final Descriptors.FieldDescriptor textDescriptor = SimpleDocument.getDescriptor().findFieldByNumber(SimpleDocument.TEXT_FIELD_NUMBER);
query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("text"))).setFilter(filter1).build();
// Fetch(Covering(TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PREFIX [th], null) -> [doc_id: KEY[1]]) | UnorderedPrimaryKeyDistinct()) | group EQUALS 0
plan = planner.plan(query);
assertThat(plan, textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison1)))));
assertEquals(814602491, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(1101247748, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(-1215587201, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
List<Tuple> idTextTuples = recordStore.executeQuery(plan).map(record -> {
final Object docId = record.getRecord().getField(docIdDescriptor);
final Object text = record.getRecord().getField(textDescriptor);
return Tuple.from(docId, text);
}).asList().get();
assertEquals(Collections.singletonList(Tuple.from(2L, TextSamples.ROMEO_AND_JULIET_PROLOGUE)), idTextTuples);
query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setRequiredResults(Collections.singletonList(field("text"))).setFilter(filter2).build();
// TextIndex(SimpleDocument$text null, TEXT_CONTAINS_PHRASE civil blood makes civil hands unclean, null)
plan = planner.plan(query);
assertThat(plan, fetch(primaryKeyDistinct(coveringIndexScan(textIndexScan(allOf(indexName(TextIndexTestUtils.SIMPLE_DEFAULT_NAME), textComparison(equalTo(comparison2))))))));
assertEquals(-1359010536, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(-1017914160, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(-1074895661, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
idTextTuples = recordStore.executeQuery(plan).map(record -> {
final Object docId = record.getRecord().getField(docIdDescriptor);
final Object text = record.getRecord().getField(textDescriptor);
return Tuple.from(docId, text);
}).asList().get();
assertEquals(Arrays.asList(Tuple.from(0L, TextSamples.ANGSTROM), Tuple.from(1L, TextSamples.AETHELRED), Tuple.from(2L, TextSamples.ROMEO_AND_JULIET_PROLOGUE), Tuple.from(3L, TextSamples.FRENCH)), idTextTuples);
commit(context);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method textIndexPerf1000SerialInsert.
@Tag(Tags.Performance)
@Test
public void textIndexPerf1000SerialInsert() throws Exception {
// Create 1000 records
Random r = new Random();
List<SimpleDocument> records = getRandomRecords(r, 1000);
long startTime = System.nanoTime();
for (int i = 0; i < records.size(); i += 10) {
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
for (SimpleDocument document : records.subList(i, i + 10)) {
recordStore.saveRecord(document);
}
commit(context);
}
}
long endTime = System.nanoTime();
LOGGER.info("performed 1000 serial insertions in {} seconds.", (endTime - startTime) * 1e-9);
printUsage();
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method saveSimpleDocumentsWithSuffixes.
@Test
public void saveSimpleDocumentsWithSuffixes() throws Exception {
final SimpleDocument germanDocument = SimpleDocument.newBuilder().setDocId(1623L).setText(TextSamples.GERMAN).setGroup(2).build();
final SimpleDocument russianDocument = SimpleDocument.newBuilder().setDocId(1547L).setText(TextSamples.RUSSIAN).build();
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_SUFFIXES);
});
recordStore.saveRecord(germanDocument);
assertEquals(82, getSaveIndexKeyCount(recordStore));
recordStore.saveRecord(russianDocument);
assertEquals(82 + 45, getSaveIndexKeyCount(recordStore));
List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_SUFFIXES, Tuple.from("mannschaft"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Collections.singletonList(11))), entryList);
entryList = scanMapEntries(recordStore, SIMPLE_TEXT_SUFFIXES, Tuple.from("schaft"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(15, 38))), entryList);
entryList = scanMapEntries(recordStore, SIMPLE_TEXT_SUFFIXES, Tuple.from("ности"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1547L), Collections.singletonList(34))), entryList);
commit(context);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method saveSimpleDocumentsWithPrefix.
@Test
public void saveSimpleDocumentsWithPrefix() throws Exception {
final SimpleDocument shakespeareDocument = SimpleDocument.newBuilder().setDocId(1623L).setText(TextSamples.ROMEO_AND_JULIET_PROLOGUE).setGroup(2).build();
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_PREFIX_LEGACY);
});
recordStore.saveRecord(shakespeareDocument);
assertEquals(74, getSaveIndexKeyCount(recordStore));
List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_PREFIX_LEGACY, Tuple.from("par"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(57, 72))), entryList);
commit(context);
}
try (FDBRecordContext context = openContext()) {
FDBRecordStore.deleteStore(context, recordStore.getSubspace());
commit(context);
}
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_PREFIX);
});
recordStore.saveRecord(shakespeareDocument);
assertEquals(79, getSaveIndexKeyCount(recordStore));
List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_PREFIX, Tuple.from("par"));
assertEquals(Collections.emptyList(), entryList);
entryList = scanMapEntries(recordStore, SIMPLE_TEXT_PREFIX, Tuple.from("pare"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(57, 72))), entryList);
commit(context);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method querySimpleDocumentsWithDifferentTokenizers.
@Test
public void querySimpleDocumentsWithDifferentTokenizers() throws Exception {
final List<SimpleDocument> documents = TextIndexTestUtils.toSimpleDocuments(Arrays.asList(TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.RUSSIAN, TextSamples.GERMAN, TextSamples.KOREAN));
TextTokenizerRegistryImpl.instance().register(FILTERING_TOKENIZER);
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
final RecordTypeBuilder simpleDocRecordType = metaDataBuilder.getRecordType(SIMPLE_DOC);
metaDataBuilder.addIndex(simpleDocRecordType, SIMPLE_TEXT_PREFIX);
metaDataBuilder.addIndex(simpleDocRecordType, SIMPLE_TEXT_FILTERING);
metaDataBuilder.addIndex(simpleDocRecordType, SIMPLE_TEXT_SUFFIXES);
});
documents.forEach(recordStore::saveRecord);
// Filtering tokenizer
final String filteringTokenizerName = FILTERING_TOKENIZER.getName();
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).contains("weltmeisterschaft"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -1172646540, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).contains("weltmeisterschaft"), SIMPLE_TEXT_FILTERING.getName(), 835135314, true));
assertEquals(Collections.singletonList(1L), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).contains("достопримечательности"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -1291535616, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).contains("достопримечательности"), SIMPLE_TEXT_FILTERING.getName(), 716246238, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).containsAll("Weltmeisterschaft gewonnen"), SIMPLE_TEXT_FILTERING.getName(), 696188882, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).containsAll(Arrays.asList("weltmeisterschaft", "gewonnen")), SIMPLE_TEXT_FILTERING.getName(), 1945779923, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAll("Weltmeisterschaft Nationalmannschaft Friedrichstraße"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 625333664, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(filteringTokenizerName).containsAll("Weltmeisterschaft Nationalmannschaft Friedrichstraße"), SIMPLE_TEXT_FILTERING.getName(), -1661851778, true));
// Prefix tokenizer
final String prefixTokenizerName = PrefixTextTokenizer.NAME;
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAny("civic lover"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 1358697044, true));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text(prefixTokenizerName).containsAll("civic lover"), SIMPLE_TEXT_PREFIX.getName(), 2070491434, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAll("못핵"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -1414597326, true));
assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithIndex(Query.field("text").text(prefixTokenizerName).containsAll("못핵"), SIMPLE_TEXT_PREFIX.getName(), 1444383389, true));
// Suffixes tokenizer
// Note that prefix scans using the suffixes tokenizer are equivalent to infix searches on the original tokens
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsPrefix("meister"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, -2049073113, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsPrefix("meister"), SIMPLE_TEXT_SUFFIXES.getName(), -628393471, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAnyPrefix("meister ivi"), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 279029713, true));
assertEquals(ImmutableSet.of(0L, 2L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAnyPrefix("meister ivi"), SIMPLE_TEXT_SUFFIXES.getName(), 1699709355, true)));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(DefaultTextTokenizer.NAME).containsAllPrefixes("meister won", false), TextIndexTestUtils.SIMPLE_DEFAULT_NAME, 993745490, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAllPrefixes("meister won", false), SIMPLE_TEXT_SUFFIXES.getName(), -1880542164, true));
assertEquals(Arrays.asList(0L, 2L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAny("y e"), SIMPLE_TEXT_SUFFIXES.getName(), -1665999070, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAny("bloody civilize"), SIMPLE_TEXT_SUFFIXES.getName(), 1290016358, true));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text(AllSuffixesTextTokenizer.NAME).containsAll("ood ivil nds"), SIMPLE_TEXT_SUFFIXES.getName(), -1619880168, true));
commit(context);
}
}
Aggregations