use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method textIndexPerf1000ParallelInsert.
@Tag(Tags.Performance)
@Test
public void textIndexPerf1000ParallelInsert() throws Exception {
// Create 1000 records
Random r = new Random();
List<SimpleDocument> records = getRandomRecords(r, 1000);
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
recordStore.asBuilder().create();
commit(context);
}
final FDBRecordStore.Builder storeBuilder = recordStore.asBuilder();
long startTime = System.nanoTime();
int oldMaxAttempts = FDBDatabaseFactory.instance().getMaxAttempts();
FDBDatabaseFactory.instance().setMaxAttempts(Integer.MAX_VALUE);
try {
CompletableFuture<?>[] workerFutures = new CompletableFuture<?>[10];
int recordsPerWorker = records.size() / workerFutures.length;
for (int i = 0; i < workerFutures.length; i++) {
List<SimpleDocument> workerDocs = records.subList(i * recordsPerWorker, (i + 1) * recordsPerWorker);
CompletableFuture<Void> workerFuture = new CompletableFuture<>();
Thread workerThread = new Thread(() -> {
try {
for (int j = 0; j < workerDocs.size(); j += 10) {
// Use retry loop to catch not_committed errors
List<SimpleDocument> batchDocuments = workerDocs.subList(j, j + 10);
fdb.run(context -> {
try {
FDBRecordStore store = storeBuilder.copyBuilder().setContext(context).open();
for (SimpleDocument document : batchDocuments) {
store.saveRecord(document);
}
return null;
} catch (RecordCoreException e) {
throw e;
} catch (Exception e) {
throw new RecordCoreException(e);
}
});
}
workerFuture.complete(null);
} catch (RuntimeException e) {
workerFuture.completeExceptionally(e);
}
});
workerThread.setName("insert-worker-" + i);
workerThread.start();
workerFutures[i] = workerFuture;
}
CompletableFuture.allOf(workerFutures).get();
long endTime = System.nanoTime();
LOGGER.info("performed 1000 parallel insertions in {} seconds.", (endTime - startTime) * 1e-9);
printUsage();
} finally {
FDBDatabaseFactory.instance().setMaxAttempts(oldMaxAttempts);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method saveSimpleDocuments.
@Test
public void saveSimpleDocuments() throws Exception {
final SimpleDocument simpleDocument = SimpleDocument.newBuilder().setDocId(1066L).setText("This is a simple document. There isn't much going on here, if I'm honest.").setGroup(0).build();
final SimpleDocument buffaloDocument = SimpleDocument.newBuilder().setDocId(1415L).setText("Buffalo buffalo Buffalo buffalo buffalo buffalo Buffalo buffalo Buffalo buffalo buffalo.").setGroup(1).build();
final SimpleDocument shakespeareDocument = SimpleDocument.newBuilder().setDocId(1623L).setText(TextSamples.ROMEO_AND_JULIET_PROLOGUE).setGroup(2).build();
final SimpleDocument noTextDocument = SimpleDocument.newBuilder().setDocId(0L).setGroup(0).build();
final SimpleDocument emptyDocument = SimpleDocument.newBuilder().setDocId(1L).setGroup(1).setText("").build();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
Index index = recordStore.getRecordMetaData().getIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
recordStore.saveRecord(simpleDocument);
final int firstKeys = getSaveIndexKeyCount(recordStore);
assertEquals(simpleDocument.getText().split(" ").length, firstKeys);
final int firstKeyBytesWritten = getSaveIndexKeyBytes(recordStore);
final int firstValueBytesWritten = getSaveIndexValueBytes(recordStore);
List<Map.Entry<Tuple, List<Integer>>> entryList = scanMapEntries(recordStore, index, Tuple.from("document"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1066L), Collections.singletonList(4))), entryList);
resetTimer(recordStore);
recordStore.saveRecord(buffaloDocument);
final int secondKeys = getSaveIndexKeyCount(recordStore);
assertEquals(1, secondKeys);
entryList = scanMapEntries(recordStore, index, Tuple.from("buffalo"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1415L), IntStream.range(0, 11).boxed().collect(Collectors.toList()))), entryList);
resetTimer(recordStore);
recordStore.saveRecord(shakespeareDocument);
final int thirdKeys = getSaveIndexKeyCount(recordStore);
assertEquals(82, thirdKeys);
final int thirdBytesWritten = getSaveIndexKeyBytes(recordStore) + getSaveIndexValueBytes(recordStore);
entryList = scanMapEntries(recordStore, index, Tuple.from("parents"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1623L), Arrays.asList(57, 72))), entryList);
entryList = toMapEntries(scanIndex(recordStore, index, TupleRange.prefixedBy("h")), null);
assertEquals(Arrays.asList(entryOf(Tuple.from("hands", 1623), Collections.singletonList(26)), entryOf(Tuple.from("here", 1066), Collections.singletonList(10)), entryOf(Tuple.from("here", 1623), Collections.singletonList(101)), entryOf(Tuple.from("honest", 1066), Collections.singletonList(13)), entryOf(Tuple.from("hours", 1623), Collections.singletonList(87)), entryOf(Tuple.from("households", 1623), Collections.singletonList(1))), entryList);
List<Message> recordList = recordStore.scanIndexRecords(index.getName(), BY_TEXT_TOKEN, TupleRange.prefixedBy("h"), null, ScanProperties.FORWARD_SCAN).map(FDBIndexedRecord::getRecord).asList().get();
assertEquals(Arrays.asList(shakespeareDocument, simpleDocument, shakespeareDocument, simpleDocument, shakespeareDocument, shakespeareDocument), recordList);
resetTimer(recordStore);
recordStore.saveRecord(noTextDocument);
assertEquals(0, getSaveIndexKeyCount(recordStore));
assertEquals(0, getLoadIndexKeyCount(recordStore));
resetTimer(recordStore);
recordStore.saveRecord(emptyDocument);
assertEquals(0, getSaveIndexKeyCount(recordStore));
assertEquals(0, getLoadIndexKeyCount(recordStore));
resetTimer(recordStore);
recordStore.deleteRecord(Tuple.from(1623L));
// all deleted but four overlaps with first record
assertEquals(thirdKeys - 4, getDeleteIndexKeyCount(recordStore));
// four keys of overlap overwritten
assertEquals(4, getSaveIndexKeyCount(recordStore));
assertThat(getDeleteIndexKeyBytes(recordStore) + getDeleteIndexValueBytes(recordStore), allOf(greaterThan(thirdKeys - 1), lessThan(thirdBytesWritten)));
entryList = scanMapEntries(recordStore, index, Tuple.from("parents"));
assertEquals(Collections.emptyList(), entryList);
resetTimer(recordStore);
recordStore.saveRecord(simpleDocument.toBuilder().setDocId(1707L).build());
assertEquals(firstKeys * 2, getLoadIndexKeyCount(recordStore));
assertEquals(firstKeys, getSaveIndexKeyCount(recordStore));
// should overwrite all the same keys
assertEquals(firstKeyBytesWritten, getSaveIndexKeyBytes(recordStore));
final int seventhValueBytesWritten = getSaveIndexValueBytes(recordStore);
// contains same info as first value bytes + extra keys, but not key prefixes
assertThat(seventhValueBytesWritten, allOf(greaterThan(firstValueBytesWritten), lessThan(firstKeyBytesWritten + firstValueBytesWritten)));
entryList = scanMapEntries(recordStore, index, Tuple.from("document"));
assertEquals(Arrays.asList(entryOf(Tuple.from(1066L), Collections.singletonList(4)), entryOf(Tuple.from(1707L), Collections.singletonList(4))), entryList);
resetTimer(recordStore);
recordStore.deleteRecord(Tuple.from(1066L));
assertEquals(firstKeys, getLoadIndexKeyCount(recordStore));
// each of the original keys are deleted
assertEquals(firstKeys, getDeleteIndexKeyCount(recordStore));
assertEquals(firstKeyBytesWritten, getDeleteIndexKeyBytes(recordStore));
assertEquals(firstValueBytesWritten + seventhValueBytesWritten, getDeleteIndexValueBytes(recordStore));
// a new set of keys are all written
assertEquals(firstKeys, getSaveIndexKeyCount(recordStore));
// they should have the same size (though their contents are different)
assertEquals(firstKeyBytesWritten, getSaveIndexKeyBytes(recordStore));
assertEquals(firstValueBytesWritten, getSaveIndexValueBytes(recordStore));
entryList = scanMapEntries(recordStore, index, Tuple.from("document"));
assertEquals(Collections.singletonList(entryOf(Tuple.from(1707L), Collections.singletonList(4))), entryList);
commit(context);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method querySimpleDocuments.
@Test
public void querySimpleDocuments() throws Exception {
final List<SimpleDocument> documents = TextIndexTestUtils.toSimpleDocuments(Arrays.asList(TextSamples.ANGSTROM, TextSamples.AETHELRED, TextSamples.ROMEO_AND_JULIET_PROLOGUE, TextSamples.YIDDISH, TextSamples.CHINESE_SIMPLIFIED, TextSamples.KOREAN, "a b a b a b c"));
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
documents.forEach(recordStore::saveRecord);
// Contains
assertEquals(Arrays.asList(0L, 1L, 2L), querySimpleDocumentsWithIndex(Query.field("text").text().contains("the"), 329921958, true));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().contains("angstrom"), -1859676822, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text().contains("Ångström"), 2028628575, true));
assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithIndex(Query.field("text").text().contains("שפראך"), 1151275308, true));
// Contains all
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("Ångström"), 1999999424, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll(Collections.singletonList("Ångström")), 2028628575, true));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("the angstrom"), 865061914, true));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll(Arrays.asList("the", "angstrom")), 4380219, true));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll(Arrays.asList("", "angstrom")), -1000802292, true));
assertEquals(Collections.singletonList(5L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("한국어를"), -1046915537, true));
// Contains all within a distance
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("Ångström named", 4), -1408252035, true));
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("Ångström named", 3), -1408252996, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("Ångström named", 2), -1408253957, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll(Arrays.asList("Ångström", "named"), 4), -2041874864, true));
assertEquals(Collections.singletonList(6L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("a c", 2), 2135218554, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("a c", 1), 2135217593, true));
assertEquals(Collections.singletonList(6L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("b c", 2), -416938407, true));
assertEquals(Collections.singletonList(6L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAll("b c", 1), -416939368, true));
// Contains any
assertEquals(Collections.singletonList(0L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAny("Ångström"), -147781547, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text().containsAny(Collections.singletonList("Ångström")), -119152396, true));
assertEquals(Arrays.asList(0L, 1L, 2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAny("the angstrom"), -1282719057, true));
assertEquals(Arrays.asList(0L, 1L, 2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAny(Arrays.asList("the", "angstrom")), -2143400752, true));
// Contains phrase
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPhrase("Civil blood makes. Civil hands unclean"), -993768059, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPhrase(Arrays.asList("civil", "blood", "makes", "civil", "", "unclean")), 1855137352, true));
assertEquals(Collections.emptyList(), querySimpleDocumentsWithIndex(Query.field("text").text().containsPhrase(Arrays.asList("Civil", "blood", "makes", "civil", "", "unclean")), 853144168, true));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPhrase(Arrays.asList("", "civil", "blood", "makes", "civil", "", "unclean", "")), 930039198, true));
assertEquals(Collections.singletonList(6L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPhrase("a b a b c"), -623744405, true));
// Contains prefix
assertEquals(Arrays.asList(2L, 0L, 1L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPrefix("un"), 1067159426, true));
assertEquals(Collections.singletonList(3L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPrefix("א"), -1009839303, true));
assertEquals(Collections.singletonList(4L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPrefix("苹果"), -1529274452, true));
assertEquals(Collections.singletonList(5L), querySimpleDocumentsWithIndex(Query.field("text").text().containsPrefix(Normalizer.normalize("한국", Normalizer.Form.NFKD)), -1860545817, true));
assertEquals(Collections.singletonList(5L), // note that the second character is only 2 of the 3 Jamo components
querySimpleDocumentsWithIndex(Query.field("text").text().containsPrefix("한구"), 1377518291, true));
// Contains any prefix
assertEquals(ImmutableSet.of(0L, 1L, 2L, 3L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text().containsAnyPrefix("civ א un"), 1227233680, true)));
assertEquals(ImmutableSet.of(0L, 1L, 2L, 3L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text().containsAnyPrefix("cIv ַא Un"), -794472473, true)));
assertEquals(ImmutableSet.of(0L, 1L, 2L, 3L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text().containsAnyPrefix(Arrays.asList("civ", "א", "un")), 1486849487, true)));
assertEquals(ImmutableSet.of(2L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text().containsAnyPrefix(Arrays.asList("civ", "אַ", "Un")), 1905505336, true)));
// Contains all prefixes
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("civ un"), 1757831895, false));
assertEquals(Collections.singletonList(2L), querySimpleDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("civ un", false), -900079353, true));
assertEquals(ImmutableSet.of(0L, 1L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("wa th"), -1203466155, false)));
assertEquals(ImmutableSet.of(0L, 1L), new HashSet<>(querySimpleDocumentsWithIndex(Query.field("text").text().containsAllPrefixes("wa th", false), -433119192, true)));
commit(context);
}
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method getRandomRecords.
@Nonnull
private List<SimpleDocument> getRandomRecords(@Nonnull Random r, int count, @Nonnull List<String> lexicon, int tokenAverage, int tokenSd) {
List<SimpleDocument> list = new ArrayList<>(count);
double[] proportions = getZipfProportions(lexicon);
for (int i = 0; i < count; i++) {
long id = r.nextLong();
List<String> words = getRandomWords(r, lexicon, proportions, tokenAverage, tokenSd);
SimpleDocument document = SimpleDocument.newBuilder().setDocId(id).setText(String.join(" ", words)).build();
list.add(document);
}
return list;
}
use of com.apple.foundationdb.record.TestRecordsTextProto.SimpleDocument in project fdb-record-layer by FoundationDB.
the class TextIndexTest method queryDocumentsWithScanLimit.
@Test
public void queryDocumentsWithScanLimit() throws Exception {
// Load a big (ish) data set
final int recordCount = 100;
final int batchSize = 10;
for (int i = 0; i < recordCount; i += batchSize) {
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
for (int j = 0; j < batchSize; j++) {
SimpleDocument document = SimpleDocument.newBuilder().setDocId(i + j).setText((i + j) % 2 == 0 ? "some" : "text").build();
recordStore.saveRecord(document);
}
commit(context);
}
}
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
RecordQuery query = RecordQuery.newBuilder().setRecordType(SIMPLE_DOC).setFilter(Query.field("text").text().containsAll("some text")).build();
RecordQueryPlan plan = planner.plan(query);
boolean done = false;
int totalKeysLoaded = 0;
byte[] continuation = null;
while (!done) {
final int priorKeysLoaded = getLoadTextEntryCount(recordStore);
ExecuteProperties executeProperties = ExecuteProperties.newBuilder().setScannedRecordsLimit(50).build();
RecordCursor<FDBQueriedRecord<Message>> cursor = recordStore.executeQuery(plan, continuation, executeProperties);
assertEquals(Collections.emptyList(), cursor.asList().get());
RecordCursorResult<FDBQueriedRecord<Message>> noNextResult = cursor.getNext();
assertThat(noNextResult.hasNext(), is(false));
final int newKeysLoaded = getLoadTextEntryCount(recordStore);
totalKeysLoaded += newKeysLoaded - priorKeysLoaded;
if (!noNextResult.getNoNextReason().isSourceExhausted()) {
assertEquals(50, newKeysLoaded - priorKeysLoaded);
assertEquals(RecordCursor.NoNextReason.SCAN_LIMIT_REACHED, noNextResult.getNoNextReason());
assertNotNull(noNextResult.getContinuation().toBytes());
} else {
assertNull(noNextResult.getContinuation().toBytes());
done = true;
}
continuation = noNextResult.getContinuation().toBytes();
}
assertEquals(recordCount + 2, totalKeysLoaded);
commit(context);
}
}
Aggregations