use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class KafkaIOTest method testUnboundedSourceMetrics.
@Test
public void testUnboundedSourceMetrics() {
int numElements = 1000;
String readStep = "readFromKafka";
p.apply(readStep, mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withConsumerConfigUpdates(ImmutableMap.of(ConsumerConfig.GROUP_ID_CONFIG, "test.group")).commitOffsetsInFinalize().withoutMetadata());
PipelineResult result = p.run();
String splitId = "0";
MetricName elementsRead = SourceMetrics.elementsRead().getName();
MetricName elementsReadBySplit = SourceMetrics.elementsReadBySplit(splitId).getName();
MetricName bytesRead = SourceMetrics.bytesRead().getName();
MetricName bytesReadBySplit = SourceMetrics.bytesReadBySplit(splitId).getName();
MetricName backlogElementsOfSplit = SourceMetrics.backlogElementsOfSplit(splitId).getName();
MetricName backlogBytesOfSplit = SourceMetrics.backlogBytesOfSplit(splitId).getName();
MetricQueryResults metrics = result.metrics().allMetrics();
Iterable<MetricResult<Long>> counters = metrics.getCounters();
assertThat(counters, hasItem(attemptedMetricsResult(elementsRead.getNamespace(), elementsRead.getName(), readStep, 1000L)));
assertThat(counters, hasItem(attemptedMetricsResult(elementsReadBySplit.getNamespace(), elementsReadBySplit.getName(), readStep, 1000L)));
assertThat(counters, hasItem(attemptedMetricsResult(bytesRead.getNamespace(), bytesRead.getName(), readStep, 12000L)));
assertThat(counters, hasItem(attemptedMetricsResult(bytesReadBySplit.getNamespace(), bytesReadBySplit.getName(), readStep, 12000L)));
MetricQueryResults backlogElementsMetrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(backlogElementsOfSplit.getNamespace(), backlogElementsOfSplit.getName())).build());
// since gauge values may be inconsistent in some environments assert only on their existence.
assertThat(backlogElementsMetrics.getGauges(), IsIterableWithSize.iterableWithSize(1));
MetricQueryResults backlogBytesMetrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(backlogBytesOfSplit.getNamespace(), backlogBytesOfSplit.getName())).build());
// since gauge values may be inconsistent in some environments assert only on their existence.
assertThat(backlogBytesMetrics.getGauges(), IsIterableWithSize.iterableWithSize(1));
// Check checkpointMarkCommitsEnqueued metric.
MetricQueryResults commitsEnqueuedMetrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(KafkaUnboundedReader.METRIC_NAMESPACE, KafkaUnboundedReader.CHECKPOINT_MARK_COMMITS_ENQUEUED_METRIC)).build());
assertThat(commitsEnqueuedMetrics.getCounters(), IsIterableWithSize.iterableWithSize(1));
assertThat(commitsEnqueuedMetrics.getCounters().iterator().next().getAttempted(), greaterThan(0L));
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class KafkaIOTest method testSinkMetrics.
@Test
public void testSinkMetrics() throws Exception {
// Simply read from kafka source and write to kafka sink. Then verify the metrics are reported.
int numElements = 1000;
try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) {
ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start();
String topic = "test";
p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()).apply("writeToKafka", KafkaIO.<Integer, Long>write().withBootstrapServers("none").withTopic(topic).withKeySerializer(IntegerSerializer.class).withValueSerializer(LongSerializer.class).withProducerFactoryFn(new ProducerFactoryFn(producerWrapper.producerKey)));
PipelineResult result = p.run();
MetricName elementsWritten = SinkMetrics.elementsWritten().getName();
MetricQueryResults metrics = result.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.inNamespace(elementsWritten.getNamespace())).build());
assertThat(metrics.getCounters(), hasItem(attemptedMetricsResult(elementsWritten.getNamespace(), elementsWritten.getName(), "writeToKafka", 1000L)));
completionThread.shutdown();
}
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class KafkaIOIT method readMetrics.
private Set<NamedTestResult> readMetrics(PipelineResult writeResult, PipelineResult readResult) {
BiFunction<MetricsReader, String, NamedTestResult> supplier = (reader, metricName) -> {
long start = reader.getStartTimeMetric(metricName);
long end = reader.getEndTimeMetric(metricName);
return NamedTestResult.create(TEST_ID, TIMESTAMP, metricName, (end - start) / 1e3);
};
NamedTestResult writeTime = supplier.apply(new MetricsReader(writeResult, NAMESPACE), WRITE_TIME_METRIC_NAME);
NamedTestResult readTime = supplier.apply(new MetricsReader(readResult, NAMESPACE), READ_TIME_METRIC_NAME);
NamedTestResult runTime = NamedTestResult.create(TEST_ID, TIMESTAMP, RUN_TIME_METRIC_NAME, writeTime.getValue() + readTime.getValue());
return ImmutableSet.of(readTime, writeTime, runTime);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class KafkaIOIT method testKafkaIOReadsAndWritesCorrectlyInStreaming.
@Test
public void testKafkaIOReadsAndWritesCorrectlyInStreaming() throws IOException {
// Use batch pipeline to write records.
writePipeline.apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))).apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))).apply("Write to Kafka", writeToKafka());
// Use streaming pipeline to read Kafka records.
readPipeline.getOptions().as(Options.class).setStreaming(true);
readPipeline.apply("Read from unbounded Kafka", readFromKafka()).apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))).apply("Map records to strings", MapElements.via(new MapKafkaRecordsToStrings())).apply("Counting element", ParDo.of(new CountingFn(NAMESPACE, READ_ELEMENT_METRIC_NAME)));
PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
PipelineResult readResult = readPipeline.run();
PipelineResult.State readState = readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout()));
cancelIfTimeouted(readResult, readState);
assertEquals(sourceOptions.numRecords, readElementMetric(readResult, NAMESPACE, READ_ELEMENT_METRIC_NAME));
if (!options.isWithTestcontainers()) {
Set<NamedTestResult> metrics = readMetrics(writeResult, readResult);
IOITMetrics.publishToInflux(TEST_ID, TIMESTAMP, metrics, settings);
}
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class PubsubReadIT method testReadPubsubMessageId.
@Test
public void testReadPubsubMessageId() throws Exception {
// The pipeline will never terminate on its own
pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
PCollection<PubsubMessage> messages = pipeline.apply(PubsubIO.readMessagesWithAttributesAndMessageId().fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));
messages.apply("isMessageIdNonNull", signal.signalSuccessWhen(messages.getCoder(), new NonEmptyMessageIdCheck()));
Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
pipeline.apply(signal.signalStart());
PipelineResult job = pipeline.run();
start.get();
signal.waitForSuccess(Duration.standardMinutes(5));
// A runner may not support cancel
try {
job.cancel();
} catch (UnsupportedOperationException exc) {
// noop
}
}
Aggregations