use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method querySimpleDocumentsWithoutPositions.
@Test
public void querySimpleDocumentsWithoutPositions() throws Exception {
final List<SimpleDocument> documents = TextIndexTestUtils.toSimpleDocuments(Arrays.asList(TextSamples.ANGSTROM, TextSamples.AETHELRED, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.FRENCH));
// Query but make sure
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_NO_POSITIONS);
});
documents.forEach(recordStore::saveRecord);
// Queries that *don't* require position information should be planned to use the index
assertEquals(Arrays.asList(1L, 2L, 3L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAny("king civil récu"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("unclean verona"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
assertEquals(Arrays.asList(0L, 1L, 2L, 3L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPrefix("th"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
// Queries that *do* require position information must be planned as scans
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithScan(Query.field("text").text().containsPhrase("civil blood makes civil hands unclean"), 0));
assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithScan(Query.field("text").text().containsAll("France Napoleons", 3), 0));
commit(context);
}
final List<SimpleDocument> newDocuments = documents.stream().map(doc -> doc.toBuilder().setDocId(doc.getDocId() + documents.size()).build()).collect(Collectors.toList());
// Upgrade to writing position information
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
metaDataBuilder.addIndex(SIMPLE_DOC, new Index(SIMPLE_TEXT_NO_POSITIONS.getName(), SIMPLE_TEXT_NO_POSITIONS.getRootExpression(), IndexTypes.TEXT));
});
newDocuments.forEach(recordStore::saveRecord);
// Queries that *don't* require position information produce the same plan
assertEquals(Arrays.asList(1L, 2L, 3L, 5L, 6L, 7L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAny("king civil récu"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
assertEquals(Arrays.asList(2L, 6L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("unclean verona"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
assertEquals(Arrays.asList(0L, 1L, 2L, 4L, 5L, 6L, 3L, 7L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPrefix("th"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
// Queries that *do* require position information now use the index, but previously written documents show up in the
// query spuriously
assertEquals(Arrays.asList(2L, 6L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPhrase("civil blood makes civil hands unclean"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPhrase("unclean verona"), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
assertEquals(Arrays.asList(3L, 7L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("France Napoleons", 3), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("Thiers Napoleons", 3), SIMPLE_TEXT_NO_POSITIONS.getName(), 0, true));
commit(context);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method textIndexPerf100InsertOneBatch.
@Tag(Tags.Performance)
@Test
public void textIndexPerf100InsertOneBatch() throws Exception {
// Create 1000 records
Random r = new Random();
List<SimpleDocument> records = getRandomRecords(r, 100);
long startTime = System.nanoTime();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
for (int i = 0; i < records.size(); i++) {
recordStore.saveRecord(records.get(i));
}
commit(context);
}
long endTime = System.nanoTime();
LOGGER.info("performed 100 serial insertions in {} seconds.", (endTime - startTime) * 1e-9);
printUsage();
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method saveSimpleDocumentsWithPositionsOptionChange.
@Test
public void saveSimpleDocumentsWithPositionsOptionChange() throws Exception {
final SimpleDocument shakespeareDocument = SimpleDocument.newBuilder().setDocId(1623L).setText(TextSamples.ROMEO_AND_JULIET_PROLOGUE).build();
final SimpleDocument yiddishDocument = SimpleDocument.newBuilder().setDocId(1945L).setText(TextSamples.YIDDISH).build();
final SimpleDocument frenchDocument = SimpleDocument.newBuilder().setDocId(1871L).setText(TextSamples.FRENCH).build();
// Save one document *with* positions
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.addIndex(SIMPLE_DOC, new Index(SIMPLE_TEXT_NO_POSITIONS.getName(), SIMPLE_TEXT_NO_POSITIONS.getRootExpression(), IndexTypes.TEXT));
});
recordStore.saveRecord(shakespeareDocument);
commit(context);
}
// Save one document *without* positions
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.addIndex(SIMPLE_DOC, SIMPLE_TEXT_NO_POSITIONS);
});
recordStore.saveRecord(yiddishDocument);
commit(context);
}
// Save one more document *with* positions
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> {
metaDataBuilder.addIndex(SIMPLE_DOC, new Index(SIMPLE_TEXT_NO_POSITIONS.getName(), SIMPLE_TEXT_NO_POSITIONS.getRootExpression(), IndexTypes.TEXT));
});
recordStore.saveRecord(frenchDocument);
List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, SIMPLE_TEXT_NO_POSITIONS, Tuple.from("civil"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(22, 25))), entryList);
entryList = scanMapEntries(recordStore, SIMPLE_TEXT_NO_POSITIONS, Tuple.from("דיאלעקט"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1945L), Collections.emptyList())), entryList);
entryList = scanMapEntries(recordStore, SIMPLE_TEXT_NO_POSITIONS, Tuple.from("recu"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1871L), Collections.singletonList(5))), entryList);
commit(context);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method backwardsRangeScanRaceCondition.
// An older implementation did reverse range scan to find the keys before and after in order
// to find where insertions should go. This was able to reproduce an error where two keys could
// be returned after the scan that were both greater than the map key due to a race condition.
// This was able to reproduce the error when run alone.
@Test
public void backwardsRangeScanRaceCondition() throws Exception {
final Random r = new Random(0x5ca1ab1e);
final List<String> lexicon = Arrays.asList(TextSamples.ROMEO_AND_JULIET_PROLOGUE.split(" "));
final SimpleDocument bigDocument = getRandomRecords(r, 1, lexicon, 100, 0).get(0);
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> metaDataBuilder.setSplitLongRecords(true));
LOGGER.info(KeyValueLogMessage.of("saving document", LogMessageKeys.DOCUMENT, bigDocument));
recordStore.saveRecord(bigDocument);
commit(context);
}
try (FDBRecordContext context = openContext()) {
openRecordStore(context, metaDataBuilder -> metaDataBuilder.setSplitLongRecords(true));
recordStore.deleteRecord(Tuple.from(bigDocument.getDocId()));
recordStore.saveRecord(bigDocument);
// do not commit
} catch (RuntimeException e) {
Throwable err = e;
while (!(err instanceof LoggableException) && err != null) {
err = err.getCause();
}
if (err != null) {
LoggableException logE = (LoggableException) err;
LOGGER.error(KeyValueLogMessage.build("unable to save record").addKeysAndValues(logE.getLogInfo()).toString(), err);
throw logE;
} else {
throw e;
}
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method querySimpleDocumentsWithAdditionalFilters.
@Test
public void querySimpleDocumentsWithAdditionalFilters() throws Exception {
final List<SimpleDocument> documents = TextIndexTestUtils.toSimpleDocuments(Arrays.asList(TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.AETHELRED, TextSamples.ANGSTROM));
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
documents.forEach(recordStore::saveRecord);
// Equality text predicates
assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithIndex(Query.and(Query.field("group").equalsValue(1L), Query.field("text").text().contains("was")), 661433949, false));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.and(Query.field("group").equalsValue(0L), Query.field("text").text().containsPhrase("bury their parents' strife")), -1454788243, false));
assertEquals(Collections.singletonList(1L), querySimpleDocumentsWithIndex(Query.and(Query.field("group").equalsValue(1L), Query.field("text").text().containsPhrase("bury their parents' strife")), -1454788242, false));
assertEquals(Arrays.asList(0L, 1L), querySimpleDocumentsWithIndex(Query.and(Query.field("group").lessThanOrEquals(2L), Query.field("text").text().containsAny("bury their parents' strife")), -1259238340, false));
// In theory, this could be an index intersection, but it is not.
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.and(Query.field("text").text().contains("the"), Query.field("text").text().contains("king")), 742257848, false));
// Prefix text predicates
assertEquals(Arrays.asList(0L, 1L), querySimpleDocumentsWithIndex(Query.and(Query.field("group").lessThanOrEquals(2L), Query.field("text").text().containsPrefix("par"), Query.field("text").text().containsPrefix("blo")), -416906621, false));
assertEquals(Arrays.asList(1L, 3L), querySimpleDocumentsWithIndex(Query.and(Query.field("group").equalsValue(1L), Query.field("text").text().containsPrefix("an")), 1318510566, false));
assertEquals(Arrays.asList(0L, 1L), querySimpleDocumentsWithIndex(Query.and(Query.field("text").text().containsAll("civil unclean blood"), Query.field("text").text().containsPrefix("blo")), 912028198, false));
// Performs a union of the two text queries.
assertEquals(ImmutableSet.of(0L, 1L, 2L), ImmutableSet.copyOf(querySimpleDocumentsWithIndex(Query.or(Query.field("text").text().containsPrefix("ency"), Query.field("text").text().containsPrefix("civ")), -1250585991, false)));
assertEquals(Arrays.asList(0L, 2L), querySimpleDocumentsWithIndex(Query.and(Query.field("group").equalsValue(0L), Query.or(Query.field("text").text().containsAll("civil unclean blood", 4), Query.field("text").text().containsAll("king was 1016"))), 1313228370, false));
assertEquals(ImmutableSet.of(0L, 2L), ImmutableSet.copyOf(querySimpleDocumentsWithIndex(Query.and(Query.field("group").equalsValue(0L), Query.or(Query.field("text").text().containsAll("civil unclean blood", 4), Query.field("text").text().containsPrefix("ency"))), 873750052, false)));
// Just a not. There's not a lot this could query could do to be performed because it can return
// a lot of results by its very nature.
assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithScan(Query.not(Query.field("text").text().containsAny("king unclean")), 784296935));
// Scans the index for the first predicate and then applies the second as a not.
// In theory, it could scan the index twice and filter out the "not".
assertEquals(Arrays.asList(0L, 1L, 3L), querySimpleDocumentsWithIndex(Query.and(Query.field("text").text().contains("the"), Query.not(Query.field("text").text().contains("king"))), 742257849, false));
commit(context);
}
}
Aggregations