Search in sources :

Example 11 with Write

use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.

the class ElasticsearchIOTestCommon method testWriteWithErrors.

void testWriteWithErrors() throws Exception {
    Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxBatchSize(BATCH_SIZE);
    List<String> input = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.INJECT_SOME_INVALID_DOCS);
    expectedException.expect(isA(IOException.class));
    expectedException.expectMessage(new CustomMatcher<String>("RegExp matcher") {

        @Override
        public boolean matches(Object o) {
            String message = (String) o;
            // the other messages are matched using .+
            return message.matches("(?is).*Error writing to Elasticsearch, some elements could not be inserted" + ".*Document id .+: failed to parse \\(.+\\).*Caused by: .+ \\(.+\\).*" + "Document id .+: failed to parse \\(.+\\).*Caused by: .+ \\(.+\\).*");
        }
    });
    // so we test the Writer as a DoFn outside of a runner.
    try (DoFnTester<Document, Document> fnTester = DoFnTester.of(new BulkIO.BulkIOBundleFn(write.getBulkIO()))) {
        // inserts into Elasticsearch
        fnTester.processBundle(serializeDocs(write, input));
    }
}
Also used : Write(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write) IOException(java.io.IOException) BulkIO(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BulkIO) Document(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document)

Example 12 with Write

use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.

the class ElasticsearchIOTestCommon method testDocumentCoder.

void testDocumentCoder() throws Exception {
    List<String> data = ElasticsearchIOTestUtils.createDocuments(numDocs, InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
    int randomNum = ThreadLocalRandom.current().nextInt(0, data.size());
    Instant now = Instant.now();
    Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration);
    Document expected = serializeDocs(write, data).get(randomNum).withTimestamp(now).withHasError(randomNum % 2 == 0);
    PipedInputStream in = new PipedInputStream();
    PipedOutputStream out = new PipedOutputStream(in);
    DocumentCoder coder = DocumentCoder.of();
    coder.encode(expected, out);
    Document actual = coder.decode(in);
    assertEquals(expected, actual);
}
Also used : Write(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write) Instant(org.joda.time.Instant) DocumentCoder(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.DocumentCoder) PipedOutputStream(java.io.PipedOutputStream) PipedInputStream(java.io.PipedInputStream) Document(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document)

Example 13 with Write

use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.

the class ElasticsearchIOTestCommon method testWriteWithDocVersion.

void testWriteWithDocVersion() throws Exception {
    List<ObjectNode> jsonData = ElasticsearchIOTestUtils.createJsonDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
    List<String> data = new ArrayList<>();
    for (ObjectNode doc : jsonData) {
        doc.put("my_version", "1");
        data.add(doc.toString());
    }
    insertTestDocuments(connectionConfiguration, data, restClient);
    long currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
    assertEquals(numDocs, currentNumDocs);
    // Check that all docs have the same "my_version"
    assertEquals(numDocs, countByMatch(connectionConfiguration, restClient, "my_version", "1", null, KV.of(1, numDocs)));
    Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withIdFn(new ExtractValueFn("id")).withDocVersionFn(new ExtractValueFn("my_version")).withDocVersionType("external");
    data = new ArrayList<>();
    for (ObjectNode doc : jsonData) {
        // Set version to larger number than originally set, and larger than next logical version
        // number set by default by ES.
        doc.put("my_version", "3");
        data.add(doc.toString());
    }
    // Test that documents with lower version are rejected, but rejections ignored when specified
    pipeline.apply(Create.of(data)).apply(write);
    pipeline.run();
    currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
    assertEquals(numDocs, currentNumDocs);
    // my_version and doc version should have changed
    assertEquals(0, countByMatch(connectionConfiguration, restClient, "my_version", "1", null, KV.of(1, numDocs)));
    assertEquals(numDocs, countByMatch(connectionConfiguration, restClient, "my_version", "3", null, KV.of(3, numDocs)));
}
Also used : Write(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ArrayList(java.util.ArrayList)

Example 14 with Write

use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.

the class ElasticsearchIOTestCommon method testWriteWithMaxBatchSize.

void testWriteWithMaxBatchSize() throws Exception {
    Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxBatchSize(BATCH_SIZE);
    // so we test the Writer as a DoFn outside of a runner.
    try (DoFnTester<Document, Document> fnTester = DoFnTester.of(new BulkIO.BulkIOBundleFn(write.getBulkIO()))) {
        List<String> input = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS);
        List<Document> serializedInput = new ArrayList<>();
        for (String doc : input) {
            String bulkDoc = DocToBulk.createBulkApiEntity(write.getDocToBulk(), doc, getBackendVersion(connectionConfiguration));
            Document r = Document.create().withInputDoc(doc).withBulkDirective(bulkDoc).withTimestamp(Instant.now());
            serializedInput.add(r);
        }
        long numDocsProcessed = 0;
        long numDocsInserted = 0;
        for (Document document : serializedInput) {
            fnTester.processElement(document);
            numDocsProcessed++;
            // test every 100 docs to avoid overloading ES
            if ((numDocsProcessed % 100) == 0) {
                // force the index to upgrade after inserting for the inserted docs
                // to be searchable immediately
                long currentNumDocs = refreshIndexAndGetCurrentNumDocs(connectionConfiguration, restClient);
                if ((numDocsProcessed % BATCH_SIZE) == 0) {
                    /* bundle end */
                    assertEquals("we are at the end of a bundle, we should have inserted all processed documents", numDocsProcessed, currentNumDocs);
                    numDocsInserted = currentNumDocs;
                } else {
                    /* not bundle end */
                    assertEquals("we are not at the end of a bundle, we should have inserted no more documents", numDocsInserted, currentNumDocs);
                }
            }
        }
    }
}
Also used : Write(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write) ArrayList(java.util.ArrayList) BulkIO(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BulkIO) Document(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document)

Example 15 with Write

use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write in project beam by apache.

the class ElasticsearchIOTestCommon method testWriteRetryValidRequest.

void testWriteRetryValidRequest() throws Exception {
    Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withRetryConfiguration(ElasticsearchIO.RetryConfiguration.create(MAX_ATTEMPTS, Duration.millis(35000)).withRetryPredicate(CUSTOM_RETRY_PREDICATE));
    executeWriteTest(write);
}
Also used : Write(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write)

Aggregations

Write (org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write)17 Document (org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document)6 BulkIO (org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BulkIO)5 ArrayList (java.util.ArrayList)4 IOException (java.io.IOException)3 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)3 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)2 PipedInputStream (java.io.PipedInputStream)2 PipedOutputStream (java.io.PipedOutputStream)2 DocumentCoder (org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.DocumentCoder)2 Instant (org.joda.time.Instant)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 Serializable (java.io.Serializable)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ThreadLocalRandom (java.util.concurrent.ThreadLocalRandom)1