use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class PubsubReadIT method testReadPublicData.
@Test
public void testReadPublicData() throws Exception {
// The pipeline will never terminate on its own
pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
PCollection<String> messages = pipeline.apply(PubsubIO.readStrings().fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));
messages.apply("waitForAnyMessage", signal.signalSuccessWhen(messages.getCoder(), anyMessages -> true));
Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
pipeline.apply(signal.signalStart());
PipelineResult job = pipeline.run();
start.get();
signal.waitForSuccess(Duration.standardMinutes(5));
// A runner may not support cancel
try {
job.cancel();
} catch (UnsupportedOperationException exc) {
// noop
}
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class MongoDBIOIT method testWriteAndRead.
@Test
public void testWriteAndRead() {
initialCollectionSize = getCollectionSizeInBytes(collection);
writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords())).apply("Produce documents", MapElements.via(new LongToDocumentFn())).apply("Collect write time metric", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time"))).apply("Write documents to MongoDB", MongoDbIO.write().withUri(mongoUrl).withDatabase(options.getMongoDBDatabaseName()).withCollection(collection));
PipelineResult writeResult = writePipeline.run();
writeResult.waitUntilFinish();
finalCollectionSize = getCollectionSizeInBytes(collection);
PCollection<String> consolidatedHashcode = readPipeline.apply("Read all documents", MongoDbIO.read().withUri(mongoUrl).withDatabase(options.getMongoDBDatabaseName()).withCollection(collection)).apply("Collect read time metrics", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time"))).apply("Map documents to Strings", MapElements.via(new DocumentToStringFn())).apply("Calculate hashcode", Combine.globally(new HashingFn()));
String expectedHash = getHashForRecordCount(options.getNumberOfRecords(), EXPECTED_HASHES);
PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
PipelineResult readResult = readPipeline.run();
readResult.waitUntilFinish();
collectAndPublishMetrics(writeResult, readResult);
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class StreamingSourceMetricsTest method testUnboundedSourceMetrics.
@Test
@Category(StreamingTest.class)
public void testUnboundedSourceMetrics() {
final long minElements = 1000;
// Use a GenerateSequence for the UnboundedSequence, but push the watermark to infinity at
// minElements to let the test pipeline cleanly shut it down. Shutdown will occur shortly
// afterwards, but at least minElements will be reported in the metrics.
PCollection<Long> pc = pipeline.apply(GenerateSequence.from(1).withRate(minElements / 10, Duration.millis(500L)).withTimestampFn(t -> t < minElements ? Instant.now() : BoundedWindow.TIMESTAMP_MAX_VALUE));
assertThat(pc.isBounded(), is(PCollection.IsBounded.UNBOUNDED));
PipelineResult pipelineResult = pipeline.run();
MetricQueryResults metrics = pipelineResult.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName())).build());
assertThat(metrics.getCounters(), hasItem(metricsResult(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName(), "GenerateSequence/Read(UnboundedCountingSource)", greaterThanOrEqualTo(minElements), false)));
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class V1WriteIT method testDatastoreWriterFnWithDuplicatedEntities.
/**
* Tests {@link DatastoreV1.DatastoreWriterFn} with duplicated entries. Once a duplicated entry is
* found the batch gets flushed.
*/
@Test
public void testDatastoreWriterFnWithDuplicatedEntities() throws Exception {
List<Mutation> mutations = new ArrayList<>(200);
V1TestOptions options = TestPipeline.testingPipelineOptions().as(V1TestOptions.class);
Pipeline pipeline = TestPipeline.create(options);
for (int i = 1; i <= 200; i++) {
Key key = makeKey("key" + i, i + 1).build();
mutations.add(makeUpsert(Entity.newBuilder().setKey(key).build()).build());
if (i % 30 == 0) {
mutations.add(makeUpsert(Entity.newBuilder().setKey(key).build()).build());
}
}
DatastoreV1.DatastoreWriterFn datastoreWriter = new DatastoreV1.DatastoreWriterFn(TestPipeline.testingPipelineOptions().as(GcpOptions.class).getProject(), null);
PTransform<PCollection<? extends Mutation>, PCollection<Void>> datastoreWriterTransform = ParDo.of(datastoreWriter);
/**
* Following three lines turn the original arrayList into a member of the first PCollection
*/
List<Mutation> newArrayList = new ArrayList<>(mutations);
Create.Values<Iterable<Mutation>> mutationIterable = Create.of(Collections.singleton(newArrayList));
PCollection<Iterable<Mutation>> input = pipeline.apply(mutationIterable);
/**
* Flatten divides the PCollection into several elements of the same bundle. By doing this we're
* forcing the processing of the List of mutation in the same order the mutations were added to
* the original List.
*/
input.apply(Flatten.<Mutation>iterables()).apply(datastoreWriterTransform);
PipelineResult pResult = pipeline.run();
MetricQueryResults metricResults = pResult.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(DatastoreV1.DatastoreWriterFn.class, "batchSize")).build());
AtomicLong timesCommitted = new AtomicLong();
metricResults.getDistributions().forEach(distribution -> {
if (distribution.getName().getName().equals("batchSize")) {
timesCommitted.set(distribution.getCommitted().getCount());
}
});
assertEquals(7, timesCommitted.get());
}
use of org.apache.beam.sdk.PipelineResult in project beam by apache.
the class LoadTest method run.
/**
* Runs the load test, collects and publishes test results to various data store and/or console.
*/
public PipelineResult run() throws IOException {
final Timestamp timestamp = Timestamp.now();
loadTest();
final PipelineResult pipelineResult = pipeline.run();
pipelineResult.waitUntilFinish(Duration.standardMinutes(options.getLoadTestTimeout()));
final String testId = UUID.randomUUID().toString();
final List<NamedTestResult> metrics = readMetrics(timestamp, pipelineResult, testId);
ConsoleResultPublisher.publish(metrics, testId, timestamp.toString());
handleFailure(pipelineResult, metrics);
if (options.getPublishToInfluxDB()) {
InfluxDBPublisher.publishWithSettings(metrics, settings);
}
return pipelineResult;
}
Aggregations