Search in sources :

Example 26 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class KafkaIOTest method testUnboundedSourceMetrics.

@Test
public void testUnboundedSourceMetrics() {
    int numElements = 1000;
    String readStep = "readFromKafka";
    p.apply(readStep, mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withConsumerConfigUpdates(ImmutableMap.of(ConsumerConfig.GROUP_ID_CONFIG, "test.group")).commitOffsetsInFinalize().withoutMetadata());
    PipelineResult result = p.run();
    String splitId = "0";
    MetricName elementsRead = SourceMetrics.elementsRead().getName();
    MetricName elementsReadBySplit = SourceMetrics.elementsReadBySplit(splitId).getName();
    MetricName bytesRead = SourceMetrics.bytesRead().getName();
    MetricName bytesReadBySplit = SourceMetrics.bytesReadBySplit(splitId).getName();
    MetricName backlogElementsOfSplit = SourceMetrics.backlogElementsOfSplit(splitId).getName();
    MetricName backlogBytesOfSplit = SourceMetrics.backlogBytesOfSplit(splitId).getName();
    MetricQueryResults metrics = result.metrics().allMetrics();
    Iterable<MetricResult<Long>> counters = metrics.getCounters();
    assertThat(counters, hasItem(attemptedMetricsResult(elementsRead.getNamespace(), elementsRead.getName(), readStep, 1000L)));
    assertThat(counters, hasItem(attemptedMetricsResult(elementsReadBySplit.getNamespace(), elementsReadBySplit.getName(), readStep, 1000L)));
    assertThat(counters, hasItem(attemptedMetricsResult(bytesRead.getNamespace(), bytesRead.getName(), readStep, 12000L)));
    assertThat(counters, hasItem(attemptedMetricsResult(bytesReadBySplit.getNamespace(), bytesReadBySplit.getName(), readStep, 12000L)));
    MetricQueryResults backlogElementsMetrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(backlogElementsOfSplit.getNamespace(), backlogElementsOfSplit.getName())).build());
    // since gauge values may be inconsistent in some environments assert only on their existence.
    assertThat(backlogElementsMetrics.getGauges(), IsIterableWithSize.iterableWithSize(1));
    MetricQueryResults backlogBytesMetrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(backlogBytesOfSplit.getNamespace(), backlogBytesOfSplit.getName())).build());
    // since gauge values may be inconsistent in some environments assert only on their existence.
    assertThat(backlogBytesMetrics.getGauges(), IsIterableWithSize.iterableWithSize(1));
    // Check checkpointMarkCommitsEnqueued metric.
    MetricQueryResults commitsEnqueuedMetrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(KafkaUnboundedReader.METRIC_NAMESPACE, KafkaUnboundedReader.CHECKPOINT_MARK_COMMITS_ENQUEUED_METRIC)).build());
    assertThat(commitsEnqueuedMetrics.getCounters(), IsIterableWithSize.iterableWithSize(1));
    assertThat(commitsEnqueuedMetrics.getCounters().iterator().next().getAttempted(), greaterThan(0L));
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) PipelineResult(org.apache.beam.sdk.PipelineResult) Matchers.containsString(org.hamcrest.Matchers.containsString) MetricResult(org.apache.beam.sdk.metrics.MetricResult) Test(org.junit.Test)

Example 27 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class KafkaIOTest method testSinkMetrics.

@Test
public void testSinkMetrics() throws Exception {
    // Simply read from kafka source and write to kafka sink. Then verify the metrics are reported.
    int numElements = 1000;
    try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) {
        ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start();
        String topic = "test";
        p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()).apply("writeToKafka", KafkaIO.<Integer, Long>write().withBootstrapServers("none").withTopic(topic).withKeySerializer(IntegerSerializer.class).withValueSerializer(LongSerializer.class).withProducerFactoryFn(new ProducerFactoryFn(producerWrapper.producerKey)));
        PipelineResult result = p.run();
        MetricName elementsWritten = SinkMetrics.elementsWritten().getName();
        MetricQueryResults metrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.inNamespace(elementsWritten.getNamespace())).build());
        assertThat(metrics.getCounters(), hasItem(attemptedMetricsResult(elementsWritten.getNamespace(), elementsWritten.getName(), "writeToKafka", 1000L)));
        completionThread.shutdown();
    }
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) PipelineResult(org.apache.beam.sdk.PipelineResult) Matchers.containsString(org.hamcrest.Matchers.containsString) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) Test(org.junit.Test)

Example 28 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class KafkaIOIT method readMetrics.

private Set<NamedTestResult> readMetrics(PipelineResult writeResult, PipelineResult readResult) {
    BiFunction<MetricsReader, String, NamedTestResult> supplier = (reader, metricName) -> {
        long start = reader.getStartTimeMetric(metricName);
        long end = reader.getEndTimeMetric(metricName);
        return NamedTestResult.create(TEST_ID, TIMESTAMP, metricName, (end - start) / 1e3);
    };
    NamedTestResult writeTime = supplier.apply(new MetricsReader(writeResult, NAMESPACE), WRITE_TIME_METRIC_NAME);
    NamedTestResult readTime = supplier.apply(new MetricsReader(readResult, NAMESPACE), READ_TIME_METRIC_NAME);
    NamedTestResult runTime = NamedTestResult.create(TEST_ID, TIMESTAMP, RUN_TIME_METRIC_NAME, writeTime.getValue() + readTime.getValue());
    return ImmutableSet.of(readTime, writeTime, runTime);
}
Also used : Arrays(java.util.Arrays) BeforeClass(org.junit.BeforeClass) DockerImageName(org.testcontainers.utility.DockerImageName) BiFunction(java.util.function.BiFunction) PipelineResult(org.apache.beam.sdk.PipelineResult) Default(org.apache.beam.sdk.options.Default) MetricsReader(org.apache.beam.sdk.testutils.metrics.MetricsReader) Combine(org.apache.beam.sdk.transforms.Combine) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Timestamp(com.google.cloud.Timestamp) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) Metrics(org.apache.beam.sdk.metrics.Metrics) Description(org.apache.beam.sdk.options.Description) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) Read(org.apache.beam.sdk.io.Read) IOITHelper(org.apache.beam.sdk.io.common.IOITHelper) ByteArraySerializer(org.apache.kafka.common.serialization.ByteArraySerializer) InfluxDBSettings(org.apache.beam.sdk.testutils.publishing.InfluxDBSettings) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) NamedTestResult(org.apache.beam.sdk.testutils.NamedTestResult) Nullable(org.checkerframework.checker.nullness.qual.Nullable) DoFn(org.apache.beam.sdk.transforms.DoFn) MapElements(org.apache.beam.sdk.transforms.MapElements) KafkaContainer(org.testcontainers.containers.KafkaContainer) HashingFn(org.apache.beam.sdk.io.common.HashingFn) AfterClass(org.junit.AfterClass) PAssert(org.apache.beam.sdk.testing.PAssert) Counter(org.apache.beam.sdk.metrics.Counter) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) Set(java.util.Set) IOException(java.io.IOException) SyntheticSourceOptions(org.apache.beam.sdk.io.synthetic.SyntheticSourceOptions) Test(org.junit.Test) UUID(java.util.UUID) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) SyntheticBoundedSource(org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource) IOITMetrics(org.apache.beam.sdk.testutils.metrics.IOITMetrics) Rule(org.junit.Rule) SyntheticOptions.fromJsonString(org.apache.beam.sdk.io.synthetic.SyntheticOptions.fromJsonString) ParDo(org.apache.beam.sdk.transforms.ParDo) Validation(org.apache.beam.sdk.options.Validation) Assert.assertEquals(org.junit.Assert.assertEquals) IOTestPipelineOptions(org.apache.beam.sdk.io.common.IOTestPipelineOptions) MetricsReader(org.apache.beam.sdk.testutils.metrics.MetricsReader) NamedTestResult(org.apache.beam.sdk.testutils.NamedTestResult) SyntheticOptions.fromJsonString(org.apache.beam.sdk.io.synthetic.SyntheticOptions.fromJsonString)

Example 29 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class KafkaIOIT method testKafkaIOReadsAndWritesCorrectlyInStreaming.

@Test
public void testKafkaIOReadsAndWritesCorrectlyInStreaming() throws IOException {
    // Use batch pipeline to write records.
    writePipeline.apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))).apply("Write to Kafka", writeToKafka());
    // Use streaming pipeline to read Kafka records.
    readPipeline.getOptions().as(Options.class).setStreaming(true);
    readPipeline.apply("Read from unbounded Kafka", readFromKafka()).apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))).apply("Map records to strings", MapElements.via(new MapKafkaRecordsToStrings())).apply("Counting element", ParDo.of(new CountingFn(NAMESPACE, READ_ELEMENT_METRIC_NAME)));
    PipelineResult writeResult = writePipeline.run();
    writeResult.waitUntilFinish();
    PipelineResult readResult = readPipeline.run();
    PipelineResult.State readState = readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
    cancelIfTimeouted(readResult, readState);
    assertEquals(sourceOptions.numRecords, readElementMetric(readResult, NAMESPACE, READ_ELEMENT_METRIC_NAME));
    if (!options.isWithTestcontainers()) {
        Set<NamedTestResult> metrics = readMetrics(writeResult, readResult);
        IOITMetrics.publishToInflux(TEST_ID, TIMESTAMP, metrics, settings);
    }
}
Also used : SyntheticBoundedSource(org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource) TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) SyntheticSourceOptions(org.apache.beam.sdk.io.synthetic.SyntheticSourceOptions) IOTestPipelineOptions(org.apache.beam.sdk.io.common.IOTestPipelineOptions) NamedTestResult(org.apache.beam.sdk.testutils.NamedTestResult) PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 30 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class PubsubReadIT method testReadPubsubMessageId.

@Test
public void testReadPubsubMessageId() throws Exception {
    // The pipeline will never terminate on its own
    pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
    PCollection<PubsubMessage> messages = pipeline.apply(PubsubIO.readMessagesWithAttributesAndMessageId().fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));
    messages.apply("isMessageIdNonNull", signal.signalSuccessWhen(messages.getCoder(), new NonEmptyMessageIdCheck()));
    Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
    pipeline.apply(signal.signalStart());
    PipelineResult job = pipeline.run();
    start.get();
    signal.waitForSuccess(Duration.standardMinutes(5));
    // A runner may not support cancel
    try {
        job.cancel();
    } catch (UnsupportedOperationException exc) {
    // noop
    }
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6