use of com.apple.foundationdb.record.provider.common.text.TextSamples in project fdb-record-layer by FoundationDB.
the class TextIndexTest method queryComplexDocumentsWithAdditionalFilters.
@Test
public void queryComplexDocumentsWithAdditionalFilters() throws Exception {
final List<String> textSamples = Arrays.asList(TextSamples.ANGSTROM, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.AETHELRED, TextSamples.FRENCH, TextSamples.GERMAN, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.YIDDISH, "Napoleon and the Duke of Wellington met in Waterloo in 1815.");
final List<ComplexDocument> documents = IntStream.range(0, textSamples.size()).mapToObj(i -> ComplexDocument.newBuilder().setDocId(i).setGroup(i % 2).setText(textSamples.get(i)).addTag("3:" + (i % 3)).setScore(i).build()).collect(Collectors.toList());
final Index rankIndex = new Index("Complex$rank(score)", field("score").groupBy(field("group")), IndexTypes.RANK);
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
final RecordTypeBuilder complexDocRecordType = metaDataBuilder.getRecordType(COMPLEX_DOC);
metaDataBuilder.addIndex(complexDocRecordType, COMPLEX_TEXT_BY_GROUP);
metaDataBuilder.addIndex(complexDocRecordType, rankIndex);
});
documents.forEach(recordStore::saveRecord);
assertEquals(Collections.singletonList(Tuple.from(1L, 5L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("fearful passage love", 7), Query.field("tag").oneOfThem().equalsValue("3:2"), 1, 758136568));
assertEquals(Arrays.asList(Tuple.from(1L, 1L), Tuple.from(1L, 5L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("fearful passage love", 7), Query.field("text").text().containsPrefix("continu"), 1, -1043653062));
assertEquals(Collections.singletonList(Tuple.from(1L, 7L)), queryComplexDocumentsWithIndex(Query.field("text").text().contains("napoleon"), Query.and(Query.field("text").text().containsPrefix("th"), Query.field("text").text().contains("waterloo")), 1, -754900112));
assertEquals(Collections.singletonList(Tuple.from(1L, 1L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("fearful passage love", 7), Query.not(Query.field("tag").oneOfThem().equalsValue("3:2")), 1, 758136569));
assertEquals(Collections.singletonList(Tuple.from(1L, 1L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("fearful passage love", 7), Query.not(Query.field("tag").oneOfThem().equalsValue("3:2")), 1, 758136569));
assertEquals(Arrays.asList(Tuple.from(0L, 0L), Tuple.from(0L, 6L)), queryComplexDocumentsWithOr((OrComponent) Query.or(Query.field("text").text().containsAll("unit named after"), Query.field("text").text().containsPhrase("אן ארמיי און פלאט")), 0, -1558384887));
assertEquals(Collections.singletonList(Tuple.from(1L, 5L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("fearful passage love", 7), Query.or(Query.field("tag").oneOfThem().equalsValue("3:2"), Query.field("tag").oneOfThem().equalsValue("3:0")), true, 1, -27568755));
assertEquals(Collections.singletonList(Tuple.from(1L, 1L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("fearful passage love", 7), Query.rank(field("score").groupBy(field("group"))).lessThan(2L), true, 1, -2132208833));
assertEquals(Collections.singletonList(Tuple.from(1L, 5L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("fear pass love", true), Query.field("tag").oneOfThem().equalsValue("3:2"), true, 1, -419325379));
assertEquals(Collections.singletonList(Tuple.from(1L, 5L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("fear pass love", false), Query.field("tag").oneOfThem().equalsValue("3:2"), false, 1, -1902024530));
assertEquals(Collections.singletonList(Tuple.from(1L, 1L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("fear pass love"), Query.rank(field("score").groupBy(field("group"))).lessThan(2L), true, 1, 669157421));
commit(context);
}
}
use of com.apple.foundationdb.record.provider.common.text.TextSamples in project fdb-record-layer by FoundationDB.
the class TextIndexTest method queryMapsWithGroups.
@Test
public void queryMapsWithGroups() throws Exception {
final List<String> textSamples = Arrays.asList(TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.AETHELRED, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.ANGSTROM);
final List<MapDocument> documents = IntStream.range(0, textSamples.size() / 2).mapToObj(i -> MapDocument.newBuilder().setDocId(i).addEntry(MapDocument.Entry.newBuilder().setKey("a").setValue(textSamples.get(i * 2)).build()).addEntry(MapDocument.Entry.newBuilder().setKey("b").setValue(textSamples.get(i * 2 + 1)).build()).setGroup(i % 2).build()).collect(Collectors.toList());
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> metaDataBuilder.addIndex(MAP_DOC, MAP_ON_VALUE_GROUPED_INDEX));
documents.forEach(recordStore::saveRecord);
assertEquals(Collections.singletonList(1L), queryMapDocumentsWithGroupedIndex("a", Query.field("value").text().containsPhrase("both alike in dignity"), 1L, 1376087127));
assertEquals(Collections.singletonList(0L), queryMapDocumentsWithGroupedIndex("b", Query.field("value").text().containsAny("king anders"), 0L, -1204479544));
commit(context);
}
}
use of com.apple.foundationdb.record.provider.common.text.TextSamples in project fdb-record-layer by FoundationDB.
the class TextIndexTest method queryMultiTypeDocuments.
@Test
public void queryMultiTypeDocuments() throws Exception {
final List<String> bothTypes = Arrays.asList(SIMPLE_DOC, COMPLEX_DOC);
final List<String> simpleTypes = Collections.singletonList(SIMPLE_DOC);
final List<String> complexTypes = Collections.singletonList(COMPLEX_DOC);
final List<String> textSamples = Arrays.asList(TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.ANGSTROM, TextSamples.AETHELRED, TextSamples.FRENCH, TextSamples.GERMAN);
final List<Message> documents = IntStream.range(0, textSamples.size()).mapToObj(i -> {
final String text = textSamples.get(i);
if (i % 2 == 0) {
return SimpleDocument.newBuilder().setDocId(i).setText(text).setGroup(i % 4).build();
} else {
return ComplexDocument.newBuilder().setDocId(i).setText(text).setGroup(i % 4).build();
}
}).collect(Collectors.toList());
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.getRecordType(COMPLEX_DOC).setPrimaryKey(field("doc_id"));
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
metaDataBuilder.addMultiTypeIndex(Arrays.asList(metaDataBuilder.getRecordType(SIMPLE_DOC), metaDataBuilder.getRecordType(COMPLEX_DOC)), MULTI_TYPE_INDEX);
});
documents.forEach(recordStore::saveRecord);
assertEquals(Arrays.asList(0L, 1L), queryMultiTypeDocuments(Query.field("text").text().containsPhrase("where we lay our scene"), bothTypes, 1755757799));
assertEquals(Collections.singletonList(0L), queryMultiTypeDocuments(Query.field("text").text().containsPhrase("where we lay our scene"), simpleTypes, -1489953261));
assertEquals(Collections.singletonList(1L), queryMultiTypeDocuments(Query.field("text").text().containsPhrase("where we lay our scene"), complexTypes, -1333764399));
assertEquals(Arrays.asList(2L, 4L, 5L), queryMultiTypeDocuments(Query.field("text").text().containsPrefix("na"), bothTypes, -714642562));
assertEquals(Arrays.asList(2L, 4L), queryMultiTypeDocuments(Query.field("text").text().containsPrefix("na"), simpleTypes, 334613674));
assertEquals(Collections.singletonList(5L), queryMultiTypeDocuments(Query.field("text").text().containsPrefix("na"), complexTypes, 490802536));
commit(context);
}
}
use of com.apple.foundationdb.record.provider.common.text.TextSamples in project fdb-record-layer by FoundationDB.
the class TextIndexTest method queryComplexDocuments.
@Test
public void queryComplexDocuments() throws Exception {
final List<String> textSamples = Arrays.asList(TextSamples.ANGSTROM, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.AETHELRED, TextSamples.FRENCH);
final List<ComplexDocument> documents = IntStream.range(0, textSamples.size()).mapToObj(i -> ComplexDocument.newBuilder().setDocId(i).setGroup(i % 2).setText(textSamples.get(i)).build()).collect(Collectors.toList());
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
final RecordTypeBuilder complexDocRecordType = metaDataBuilder.getRecordType(COMPLEX_DOC);
metaDataBuilder.addIndex(complexDocRecordType, COMPLEX_TEXT_BY_GROUP);
});
documents.forEach(recordStore::saveRecord);
assertEquals(Collections.singletonList(Tuple.from(0L, 0L)), queryComplexDocumentsWithIndex(Query.field("text").text().contains("angstrom"), 0, 372972877));
assertEquals(Collections.emptyList(), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("civil blood parents' strife"), 0L, -1615886689));
assertEquals(Collections.singletonList(Tuple.from(1L, 1L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("civil blood parents' strife"), 1L, -1615886658));
assertEquals(Collections.emptyList(), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("civil blood parents' strife", 4), 1L, -1436111364));
assertEquals(Collections.singletonList(Tuple.from(1L, 1L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAll("civil blood parents' strife", 35), 1L, -1436081573));
assertEquals(Arrays.asList(Tuple.from(0L, 0L), Tuple.from(0L, 2L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAny("angstrom parents king napoleons"), 0L, -1092421072));
assertEquals(Collections.singletonList(Tuple.from(1L, 3L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsPhrase("recu un Thiers"), 1L, 1395848801));
assertEquals(Collections.singletonList(Tuple.from(0L, 0L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsPrefix("ang"), 0L, -1013515738));
assertEquals(Arrays.asList(Tuple.from(1L, 3L), Tuple.from(1L, 1L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsPrefix("un"), 1L, -995158140));
assertEquals(ImmutableSet.of(Tuple.from(0L, 0L), Tuple.from(0L, 2L)), ImmutableSet.copyOf(queryComplexDocumentsWithIndex(Query.field("text").text().containsAnyPrefix("ang par nap kin"), 0L, -1089713854)));
assertEquals(Collections.singletonList(Tuple.from(0L, 0L)), queryComplexDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("ang uni name", false), 0L, 646414402));
commit(context);
}
}
use of com.apple.foundationdb.record.provider.common.text.TextSamples in project fdb-record-layer by FoundationDB.
the class TextIndexTest method queryComplexDocumentsCovering.
@Test
public void queryComplexDocumentsCovering() throws Exception {
final List<String> textSamples = Arrays.asList(TextSamples.FRENCH, TextSamples.GERMAN, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.YIDDISH);
final List<ComplexDocument> documents = IntStream.range(0, textSamples.size()).mapToObj(i -> ComplexDocument.newBuilder().setDocId(i).setGroup(i % 2).setText(textSamples.get(i)).setScore(i).build()).collect(Collectors.toList());
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
metaDataBuilder.addIndex(COMPLEX_DOC, COMPLEX_TEXT_BY_GROUP);
});
documents.forEach(recordStore::saveRecord);
// Try to plan a covered query with separate group and doc_id fields
RecordQuery query = RecordQuery.newBuilder().setRecordType(COMPLEX_DOC).setRequiredResults(Arrays.asList(field("group"), field("doc_id"))).setFilter(Query.and(Query.field("group").equalsValue(0L), Query.field("text").text().containsPhrase("continuance of their parents' rage"))).build();
RecordQueryPlan plan = planner.plan(query);
assertThat(plan, coveringIndexScan(textIndexScan(allOf(indexName(COMPLEX_TEXT_BY_GROUP.getName()), groupingBounds(hasTupleString("[[0],[0]]")), textComparison(equalTo(new Comparisons.TextComparison(Comparisons.Type.TEXT_CONTAINS_PHRASE, "continuance of their parents' rage", null, DefaultTextTokenizer.NAME)))))));
assertEquals(822541560, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(-1798902497, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(770172924, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
List<ComplexDocument> results = recordStore.executeQuery(plan).map(rec -> ComplexDocument.newBuilder().mergeFrom(rec.getRecord()).build()).asList().get();
assertEquals(results.size(), 1);
ComplexDocument result = results.get(0);
assertEquals(result.getGroup(), 0L);
assertEquals(result.getDocId(), 2L);
assertThat(result.hasScore(), is(false));
// Try to plan a covered query with one concatenated field
query = RecordQuery.newBuilder().setRecordType(COMPLEX_DOC).setRequiredResults(Collections.singletonList(concatenateFields("group", "doc_id"))).setFilter(Query.and(Query.field("group").equalsValue(0L), Query.field("text").text().containsPhrase("continuance of their parents' rage"))).build();
plan = planner.plan(query);
assertThat(plan, coveringIndexScan(textIndexScan(allOf(indexName(COMPLEX_TEXT_BY_GROUP.getName()), groupingBounds(hasTupleString("[[0],[0]]")), textComparison(equalTo(new Comparisons.TextComparison(Comparisons.Type.TEXT_CONTAINS_PHRASE, "continuance of their parents' rage", null, DefaultTextTokenizer.NAME)))))));
assertEquals(822541560, plan.planHash(PlanHashable.PlanHashKind.LEGACY));
assertEquals(-1798902497, plan.planHash(PlanHashable.PlanHashKind.FOR_CONTINUATION));
assertEquals(770172924, plan.planHash(PlanHashable.PlanHashKind.STRUCTURAL_WITHOUT_LITERALS));
results = recordStore.executeQuery(plan).map(rec -> ComplexDocument.newBuilder().mergeFrom(rec.getRecord()).build()).asList().get();
assertEquals(results.size(), 1);
result = results.get(0);
assertEquals(result.getGroup(), 0L);
assertEquals(result.getDocId(), 2L);
assertThat(result.hasScore(), is(false));
commit(context);
}
}
Aggregations