Search in sources :

Example 76 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class DynamoDBIOWriteTest method testWritePutItemsWithPartialSuccess.

@Test
public void testWritePutItemsWithPartialSuccess() {
    List<WriteRequest> writes = putRequests(Item.range(0, 10));
    when(client.batchWriteItem(any(BatchWriteItemRequest.class))).thenReturn(partialWriteSuccess(writes.subList(4, 10))).thenReturn(partialWriteSuccess(writes.subList(8, 10))).thenReturn(new BatchWriteItemResult().withUnprocessedItems(ImmutableMap.of()));
    pipeline.apply(// number if items to produce
    Create.of(10)).apply(// 10 items in one bundle
    ParDo.of(new GenerateItems())).apply("write", DynamoDBIO.<Item>write().withWriteRequestMapperFn(putRequestMapper).withAwsClientsProvider(StaticAwsClientsProvider.of(client)).withRetryConfiguration(try4Times));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    verify(client, times(3)).batchWriteItem(any(BatchWriteItemRequest.class));
    InOrder ordered = inOrder(client);
    ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes)));
    ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes.subList(4, 10))));
    ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes.subList(8, 10))));
}
Also used : BatchWriteItemResult(com.amazonaws.services.dynamodbv2.model.BatchWriteItemResult) InOrder(org.mockito.InOrder) WriteRequest(com.amazonaws.services.dynamodbv2.model.WriteRequest) PipelineResult(org.apache.beam.sdk.PipelineResult) BatchWriteItemRequest(com.amazonaws.services.dynamodbv2.model.BatchWriteItemRequest) Test(org.junit.Test)

Example 77 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class KafkaIOTest method testUnboundedSourceWithoutBoundedWrapper.

@Test
// TODO : BEAM-4086 : enable once flakiness is fixed.
@Ignore
public void testUnboundedSourceWithoutBoundedWrapper() {
    // This is same as testUnboundedSource() without the BoundedSource wrapper.
    // Most of the tests in this file set 'maxNumRecords' on the source, which wraps
    // the unbounded source in a bounded source. As a result, the test pipeline run as
    // bounded/batch pipelines under direct-runner.
    // This tests runs without such a wrapper over unbounded wrapper, and depends on watermark
    // progressing to infinity to end the test (see TimestampPolicyWithEndOfSource above).
    final int numElements = 1000;
    final int numPartitions = 10;
    String topic = "testUnboundedSourceWithoutBoundedWrapper";
    KafkaIO.Read<byte[], Long> reader = KafkaIO.<byte[], Long>read().withBootstrapServers(topic).withTopic(topic).withConsumerFactoryFn(new ConsumerFactoryFn(ImmutableList.of(topic), numPartitions, numElements, OffsetResetStrategy.EARLIEST)).withKeyDeserializer(ByteArrayDeserializer.class).withValueDeserializer(LongDeserializer.class).withTimestampPolicyFactory(new TimestampPolicyWithEndOfSource<>(numElements / numPartitions - 1));
    p.apply("readFromKafka", reader.withoutMetadata()).apply(Values.create()).apply(Window.into(FixedWindows.of(Duration.standardDays(100))));
    PipelineResult result = p.run();
    MetricName elementsRead = SourceMetrics.elementsRead().getName();
    MetricQueryResults metrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.inNamespace(elementsRead.getNamespace())).build());
    assertThat(metrics.getCounters(), hasItem(attemptedMetricsResult(elementsRead.getNamespace(), elementsRead.getName(), "readFromKafka", (long) numElements)));
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) PipelineResult(org.apache.beam.sdk.PipelineResult) Matchers.containsString(org.hamcrest.Matchers.containsString) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 78 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class JdbcIOIT method testWriteThenRead.

/**
 * Tests writing then reading data for a postgres database.
 */
@Test
public void testWriteThenRead() {
    PipelineResult writeResult = runWrite();
    writeResult.waitUntilFinish();
    PipelineResult readResult = runRead();
    readResult.waitUntilFinish();
    gatherAndPublishMetrics(writeResult, readResult);
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 79 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class KafkaIOIT method testKafkaIOReadsAndWritesCorrectlyInBatch.

@Test
public void testKafkaIOReadsAndWritesCorrectlyInBatch() throws IOException {
    // Map of hashes of set size collections with 100b records - 10b key, 90b values.
    Map<Long, String> expectedHashes = ImmutableMap.of(1000L, "4507649971ee7c51abbb446e65a5c660", 100_000_000L, "0f12c27c9a7672e14775594be66cad9a");
    expectedHashcode = getHashForRecordCount(sourceOptions.numRecords, expectedHashes);
    writePipeline.apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))).apply("Write to Kafka", writeToKafka());
    PCollection<String> hashcode = readPipeline.apply("Read from bounded Kafka", readFromBoundedKafka()).apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))).apply("Map records to strings", MapElements.via(new MapKafkaRecordsToStrings())).apply("Calculate hashcode", Combine.globally(new HashingFn()).withoutDefaults());
    PAssert.thatSingleton(hashcode).isEqualTo(expectedHashcode);
    PipelineResult writeResult = writePipeline.run();
    writeResult.waitUntilFinish();
    PipelineResult readResult = readPipeline.run();
    PipelineResult.State readState = readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
    cancelIfTimeouted(readResult, readState);
    if (!options.isWithTestcontainers()) {
        Set<NamedTestResult> metrics = readMetrics(writeResult, readResult);
        IOITMetrics.publishToInflux(TEST_ID, TIMESTAMP, metrics, settings);
    }
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) PipelineResult(org.apache.beam.sdk.PipelineResult) SyntheticOptions.fromJsonString(org.apache.beam.sdk.io.synthetic.SyntheticOptions.fromJsonString) HashingFn(org.apache.beam.sdk.io.common.HashingFn) SyntheticBoundedSource(org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource) NamedTestResult(org.apache.beam.sdk.testutils.NamedTestResult) Test(org.junit.Test)

Example 80 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class XmlIOIT method writeThenReadAll.

@Test
public void writeThenReadAll() {
    PCollection<String> testFileNames = pipeline.apply("Generate sequence", GenerateSequence.from(0).to(numberOfTextLines)).apply("Create xml records", MapElements.via(new LongToBird())).apply("Gather write start time", ParDo.of(new TimeMonitor<>(XMLIOIT_NAMESPACE, "writeStart"))).apply("Write xml files", FileIO.<Bird>write().via(XmlIO.sink(Bird.class).withRootElement("birds").withCharset(charset)).to(filenamePrefix).withPrefix("birds").withSuffix(".xml")).getPerDestinationOutputFilenames().apply("Gather write end time", ParDo.of(new TimeMonitor<>(XMLIOIT_NAMESPACE, "writeEnd"))).apply("Get file names", Values.create());
    PCollection<Bird> birds = testFileNames.apply("Find files", FileIO.matchAll()).apply("Read matched files", FileIO.readMatches()).apply("Gather read start time", ParDo.of(new TimeMonitor<>(XMLIOIT_NAMESPACE, "readStart"))).apply("Read xml files", XmlIO.<Bird>readFiles().withRecordClass(Bird.class).withRootElement("birds").withRecordElement("bird").withCharset(charset)).apply("Gather read end time", ParDo.of(new TimeMonitor<>(XMLIOIT_NAMESPACE, "readEnd")));
    PCollection<String> consolidatedHashcode = birds.apply("Map xml records to strings", MapElements.via(new BirdToString())).apply("Calculate hashcode", Combine.globally(new HashingFn()));
    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
    testFileNames.apply("Delete test files", ParDo.of(new FileBasedIOITHelper.DeleteFileFn()).withSideInputs(consolidatedHashcode.apply(View.asSingleton())));
    PipelineResult result = pipeline.run();
    result.waitUntilFinish();
    collectAndPublishResults(result);
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) FileBasedIOITHelper(org.apache.beam.sdk.io.common.FileBasedIOITHelper) PipelineResult(org.apache.beam.sdk.PipelineResult) HashingFn(org.apache.beam.sdk.io.common.HashingFn) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6