use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document in project beam by apache.
the class ElasticsearchIOTestCommon method testDocumentCoder.
void testDocumentCoder() throws Exception {
List<String> data = ElasticsearchIOTestUtils.createDocuments(numDocs, InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
int randomNum = ThreadLocalRandom.current().nextInt(0, data.size());
Instant now = Instant.now();
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration);
Document expected = serializeDocs(write, data).get(randomNum).withTimestamp(now).withHasError(randomNum % 2 == 0);
PipedInputStream in = new PipedInputStream();
PipedOutputStream out = new PipedOutputStream(in);
DocumentCoder coder = DocumentCoder.of();
coder.encode(expected, out);
Document actual = coder.decode(in);
assertEquals(expected, actual);
}
use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document in project beam by apache.
the class ElasticsearchIOTestCommon method testWriteWithMaxBatchSize.
void testWriteWithMaxBatchSize() throws Exception {
Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxBatchSize(BATCH_SIZE);
// so we test the Writer as a DoFn outside of a runner.
try (DoFnTester<Document, Document> fnTester = DoFnTester.of(new BulkIO.BulkIOBundleFn(write.getBulkIO()))) {
List<String> input = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
List<Document> serializedInput = new ArrayList<>();
for (String doc : input) {
String bulkDoc = DocToBulk.createBulkApiEntity(write.getDocToBulk(), doc, getBackendVersion(connectionConfiguration));
Document r = Document.create().withInputDoc(doc).withBulkDirective(bulkDoc).withTimestamp(Instant.now());
serializedInput.add(r);
}
long numDocsProcessed = 0;
long numDocsInserted = 0;
for (Document document : serializedInput) {
fnTester.processElement(document);
numDocsProcessed++;
// test every 100 docs to avoid overloading ES
if ((numDocsProcessed % 100) == 0) {
// force the index to upgrade after inserting for the inserted docs
// to be searchable immediately
long currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
if ((numDocsProcessed % BATCH_SIZE) == 0) {
/* bundle end */
assertEquals("we are at the end of a bundle, we should have inserted all processed documents", numDocsProcessed, currentNumDocs);
numDocsInserted = currentNumDocs;
} else {
/* not bundle end */
assertEquals("we are not at the end of a bundle, we should have inserted no more documents", numDocsInserted, currentNumDocs);
}
}
}
}
}
Aggregations