use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteWithErrors.
void testWriteWithErrors() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxBatchSize(BATCH_SIZE);
List<String> input = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.INJECT_SOME_INVALID_DOCS);
expectedException.expect(isA(IOException.class));
expectedException.expectMessage(new CustomMatcher<String>("RegExp matcher") {
@Override
public boolean matches(Object o) {
String message = (String) o;
// the other messages are matched using .+
return message.matches("(?is).*Error writing to Elasticsearch, some elements could not be inserted" + ".*Document id .+: failed to parse \\(.+\\).*Caused by: .+ \\(.+\\).*" + "Document id .+: failed to parse \\(.+\\).*Caused by: .+ \\(.+\\).*");
}
});
// so we test the Writer as a DoFn outside of a runner.
try (DoFnTester<Document, Document> fnTester = DoFnTester.of(new BulkIO.BulkIOBundleFn(write.getBulkIO()))) {
// inserts into Elasticsearch
fnTester.processBundle(serializeDocs(write, input));
}
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testDocumentCoder.
void testDocumentCoder() throws Exception {
List<String> data = ElasticsearchIOTestUtils.createDocuments(numDocs, InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
int randomNum = ThreadLocalRandom.current().nextInt(0, data.size());
Instant now = Instant.now();
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration);
Document expected = serializeDocs(write, data).get(randomNum).withTimestamp(now).withHasError(randomNum % 2 == 0);
PipedInputStream in = new PipedInputStream();
PipedOutputStream out = new PipedOutputStream(in);
DocumentCoder coder = DocumentCoder.of();
coder.encode(expected, out);
Document actual = coder.decode(in);
assertEquals(expected, actual);
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteWithDocVersion.
void testWriteWithDocVersion() throws Exception {
List<ObjectNode> jsonData = ElasticsearchIOTestUtils.createJsonDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
List<String> data = new ArrayList<>();
for (ObjectNode doc : jsonData) {
doc.put("my_version", "1");
data.add(doc.toString());
}
insertTestDocuments(connectionConfiguration, data, restClient);
long currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
assertEquals(numDocs, currentNumDocs);
// Check that all docs have the same "my_version"
assertEquals(numDocs, countByMatch(connectionConfiguration, restClient, "my_version", "1", null, KV.of(1, numDocs)));
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withIdFn(new ExtractValueFn("id")).withDocVersionFn(new ExtractValueFn("my_version")).withDocVersionType("external");
data = new ArrayList<>();
for (ObjectNode doc : jsonData) {
// Set version to larger number than originally set, and larger than next logical version
// number set by default by ES.
doc.put("my_version", "3");
data.add(doc.toString());
}
// Test that documents with lower version are rejected, but rejections ignored when specified
pipeline.apply(Create.of(data)).apply(write);
pipeline.run();
currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
assertEquals(numDocs, currentNumDocs);
// my_version and doc version should have changed
assertEquals(0, countByMatch(connectionConfiguration, restClient, "my_version", "1", null, KV.of(1, numDocs)));
assertEquals(numDocs, countByMatch(connectionConfiguration, restClient, "my_version", "3", null, KV.of(3, numDocs)));
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteWithMaxBatchSize.
void testWriteWithMaxBatchSize() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxBatchSize(BATCH_SIZE);
// so we test the Writer as a DoFn outside of a runner.
try (DoFnTester<Document, Document> fnTester = DoFnTester.of(new BulkIO.BulkIOBundleFn(write.getBulkIO()))) {
List<String> input = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
List<Document> serializedInput = new ArrayList<>();
for (String doc : input) {
String bulkDoc = DocToBulk.createBulkApiEntity(write.getDocToBulk(), doc, getBackendVersion(connectionConfiguration));
Document r = Document.create().withInputDoc(doc).withBulkDirective(bulkDoc).withTimestamp(Instant.now());
serializedInput.add(r);
}
long numDocsProcessed = 0;
long numDocsInserted = 0;
for (Document document : serializedInput) {
fnTester.processElement(document);
numDocsProcessed++;
// test every 100 docs to avoid overloading ES
if ((numDocsProcessed % 100) == 0) {
// force the index to upgrade after inserting for the inserted docs
// to be searchable immediately
long currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
if ((numDocsProcessed % BATCH_SIZE) == 0) {
/* bundle end */
assertEquals("we are at the end of a bundle, we should have inserted all processed documents", numDocsProcessed, currentNumDocs);
numDocsInserted = currentNumDocs;
} else {
/* not bundle end */
assertEquals("we are not at the end of a bundle, we should have inserted no more documents", numDocsInserted, currentNumDocs);
}
}
}
}
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteRetryValidRequest.
void testWriteRetryValidRequest() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withRetryConfiguration(ElasticsearchIO.RetryConfiguration.create(MAX_ATTEMPTS, Duration.millis(35000)).withRetryPredicate(CUSTOM_RETRY_PREDICATE));
executeWriteTest(write);
}
Aggregations