use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteWithAllowedErrors.
void testWriteWithAllowedErrors() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxBatchSize(BATCH_SIZE).withAllowableResponseErrors(Collections.singleton("json_parse_exception"));
List<String> input = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.INJECT_SOME_INVALID_DOCS);
// so we test the Writer as a DoFn outside of a runner.
try (DoFnTester<Document, Document> fnTester = DoFnTester.of(new BulkIO.BulkIOBundleFn(write.getBulkIO()))) {
// inserts into Elasticsearch
fnTester.processBundle(serializeDocs(write, input));
}
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteRetry.
/**
* Test that retries are invoked when Elasticsearch returns a specific error code. We invoke this
* by issuing corrupt data and retrying on the `400` error code. Normal behaviour is to retry on
* `429` only but that is difficult to simulate reliably. The logger is used to verify expected
* behavior.
*/
void testWriteRetry() throws Throwable {
expectedException.expectCause(isA(IOException.class));
// max attempt is 3, but retry is 2 which excludes 1st attempt when error was identified and
// retry started.
expectedException.expectMessage(String.format(ElasticsearchIO.BulkIO.RETRY_FAILED_LOG, EXPECTED_RETRIES));
ElasticsearchIO.Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withRetryConfiguration(ElasticsearchIO.RetryConfiguration.create(MAX_ATTEMPTS, Duration.millis(35000)).withRetryPredicate(CUSTOM_RETRY_PREDICATE));
pipeline.apply(Create.of(Arrays.asList(BAD_FORMATTED_DOC))).apply(write);
pipeline.run();
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteWithMaxBatchSizeBytes.
void testWriteWithMaxBatchSizeBytes() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxBatchSizeBytes(BATCH_SIZE_BYTES);
// so we test the Writer as a DoFn outside of a runner.
try (DoFnTester<Document, Document> fnTester = DoFnTester.of(new BulkIO.BulkIOBundleFn(write.getBulkIO()))) {
List<String> input = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
List<Document> serializedInput = new ArrayList<>();
for (String doc : input) {
String bulkDoc = DocToBulk.createBulkApiEntity(write.getDocToBulk(), doc, getBackendVersion(connectionConfiguration));
Document r = Document.create().withInputDoc(doc).withBulkDirective(bulkDoc).withTimestamp(Instant.now());
serializedInput.add(r);
}
long numDocsProcessed = 0;
long sizeProcessed = 0;
long numDocsInserted = 0;
long batchInserted = 0;
for (Document document : serializedInput) {
fnTester.processElement(document);
numDocsProcessed++;
sizeProcessed += document.getBulkDirective().getBytes(StandardCharsets.UTF_8).length;
// test every 40 docs to avoid overloading ES
if ((numDocsProcessed % 40) == 0) {
// force the index to upgrade after inserting for the inserted docs
// to be searchable immediately
long currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
if (sizeProcessed / BATCH_SIZE_BYTES > batchInserted) {
/* bundle end */
assertThat("we have passed a bundle size, we should have inserted some documents", currentNumDocs, greaterThan(numDocsInserted));
numDocsInserted = currentNumDocs;
batchInserted = (sizeProcessed / BATCH_SIZE_BYTES);
} else {
/* not bundle end */
assertEquals("we are not at the end of a bundle, we should have inserted no more documents", numDocsInserted, currentNumDocs);
}
}
}
}
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testPipelineDone.
void testPipelineDone() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration);
List<String> data = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
pipeline.apply(Create.of(data)).apply(write);
assertEquals(State.DONE, pipeline.run().waitUntilFinish());
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteAppendOnlyDeleteNotAllowed.
void testWriteAppendOnlyDeleteNotAllowed() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withIdFn(new ExtractValueFn("id")).withAppendOnly(true).withIsDeleteFn(doc -> true);
executeWriteTest(write);
}
Aggregations