Search in sources :

Example 1 with ConnectionConfiguration

use of org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration in project beam by apache.

the class ElasticsearchIOTestCommon method testMaxParallelRequestsPerWindow.

void testMaxParallelRequestsPerWindow() throws Exception {
    List<Document> data = ElasticsearchIOTestUtils.createDocuments(numDocs, ElasticsearchIOTestUtils.InjectionMode.DO_NOT_INJECT_INVALID_DOCS).stream().map(doc -> Document.create().withInputDoc(doc).withTimestamp(Instant.now())).collect(Collectors.toList());
    Write write = ElasticsearchIO.write().withConnectionConfiguration(connectionConfiguration).withMaxParallelRequestsPerWindow(1);
    PCollection<KV<Integer, Iterable<Document>>> batches = pipeline.apply(Create.of(data)).apply(StatefulBatching.fromSpec(write.getBulkIO()));
    PCollection<Integer> keyValues = batches.apply(MapElements.into(integers()).via((SerializableFunction<KV<Integer, Iterable<Document>>, Integer>) KV::getKey));
    // Number of unique keys produced should be number of maxParallelRequestsPerWindow * numWindows
    // There is only 1 request (key) per window, and 1 (global) window ie. one key total where
    // key value is 0
    PAssert.that(keyValues).containsInAnyOrder(0);
    PAssert.that(batches).satisfies(new AssertThatHasExpectedContents(0, data));
    pipeline.run();
}
Also used : Read(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Read) Count(org.apache.beam.sdk.transforms.Count) Arrays(java.util.Arrays) BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) InjectionMode(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.InjectionMode) ElasticsearchIOTestUtils.countByMatch(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.countByMatch) DoFnTester(org.apache.beam.sdk.transforms.DoFnTester) FAMOUS_SCIENTISTS(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.FAMOUS_SCIENTISTS) PipedInputStream(java.io.PipedInputStream) Create(org.apache.beam.sdk.transforms.Create) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Assert.fail(org.junit.Assert.fail) JsonNode(com.fasterxml.jackson.databind.JsonNode) Document(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document) NStringEntity(org.apache.http.nio.entity.NStringEntity) ElasticsearchIO.getBackendVersion(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.getBackendVersion) ValueProvider(org.apache.beam.sdk.options.ValueProvider) DocToBulk(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.DocToBulk) MapElements(org.apache.beam.sdk.transforms.MapElements) ElasticsearchIOTestUtils.mapToInputId(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.mapToInputId) SourceTestUtils.readFromSource(org.apache.beam.sdk.testing.SourceTestUtils.readFromSource) CustomMatcher(org.hamcrest.CustomMatcher) ConnectionConfiguration(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.ConnectionConfiguration) HttpEntity(org.apache.http.HttpEntity) ContentType(org.apache.http.entity.ContentType) Set(java.util.Set) ElasticsearchIOTestUtils.flushAndRefreshAllIndices(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.flushAndRefreshAllIndices) Collectors(java.util.stream.Collectors) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Response(org.elasticsearch.client.Response) State(org.apache.beam.sdk.PipelineResult.State) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) SCRIPT_SOURCE(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.SCRIPT_SOURCE) BulkIO(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BulkIO) IntStream(java.util.stream.IntStream) RestClient(org.elasticsearch.client.RestClient) TypeDescriptors.integers(org.apache.beam.sdk.values.TypeDescriptors.integers) KV(org.apache.beam.sdk.values.KV) ElasticsearchIOTestUtils.refreshIndexAndGetCurrentNumDocs(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.refreshIndexAndGetCurrentNumDocs) Duration(org.joda.time.Duration) RetryPredicate(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.RetryConfiguration.RetryPredicate) ElasticsearchIOTestUtils.insertTestDocuments(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.insertTestDocuments) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) INVALID_DOCS_IDS(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.INVALID_DOCS_IDS) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) ArrayList(java.util.ArrayList) Write(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write) DefaultRetryPredicate(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.RetryConfiguration.DefaultRetryPredicate) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Matchers.lessThan(org.hamcrest.Matchers.lessThan) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExpectedException(org.junit.rules.ExpectedException) BoundedElasticsearchSource(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BoundedElasticsearchSource) Description(org.hamcrest.Description) SourceTestUtils(org.apache.beam.sdk.testing.SourceTestUtils) Logger(org.slf4j.Logger) StatefulBatching(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BulkIO.StatefulBatching) PAssert(org.apache.beam.sdk.testing.PAssert) ElasticsearchIOTestUtils.countByScientistName(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.countByScientistName) DocumentCoder(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.DocumentCoder) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) PipedOutputStream(java.io.PipedOutputStream) PCollection(org.apache.beam.sdk.values.PCollection) Request(org.elasticsearch.client.Request) Is.isA(org.hamcrest.core.Is.isA) NUM_SCIENTISTS(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIOTestUtils.NUM_SCIENTISTS) BoundedSource(org.apache.beam.sdk.io.BoundedSource) DEFAULT_RETRY_PREDICATE(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.RetryConfiguration.DEFAULT_RETRY_PREDICATE) Matcher(org.hamcrest.Matcher) Instant(org.joda.time.Instant) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) IsIterableContainingInAnyOrder(org.hamcrest.collection.IsIterableContainingInAnyOrder) Write(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Write) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) KV(org.apache.beam.sdk.values.KV) Document(org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.Document)

Aggregations

JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 IOException (java.io.IOException)1 PipedInputStream (java.io.PipedInputStream)1 PipedOutputStream (java.io.PipedOutputStream)1 Serializable (java.io.Serializable)1 StandardCharsets (java.nio.charset.StandardCharsets)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ThreadLocalRandom (java.util.concurrent.ThreadLocalRandom)1 BiFunction (java.util.function.BiFunction)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 State (org.apache.beam.sdk.PipelineResult.State)1 BoundedSource (org.apache.beam.sdk.io.BoundedSource)1 BoundedElasticsearchSource (org.apache.beam.sdk.io.elasticsearch.ElasticsearchIO.BoundedElasticsearchSource)1