Search in sources :

Example 31 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class PubsubReadIT method testReadPublicData.

@Test
public void testReadPublicData() throws Exception {
    // The pipeline will never terminate on its own
    pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
    PCollection<String> messages = pipeline.apply(PubsubIO.readStrings().fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));
    messages.apply("waitForAnyMessage", signal.signalSuccessWhen(messages.getCoder(), anyMessages -> true));
    Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
    pipeline.apply(signal.signalStart());
    PipelineResult job = pipeline.run();
    start.get();
    signal.waitForSuccess(Duration.standardMinutes(5));
    // A runner may not support cancel
    try {
        job.cancel();
    } catch (UnsupportedOperationException exc) {
    // noop
    }
}
Also used : TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) PipelineResult(org.apache.beam.sdk.PipelineResult) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Set(java.util.Set) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Supplier(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Supplier) Rule(org.junit.Rule) Strings(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) PipelineResult(org.apache.beam.sdk.PipelineResult) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) Test(org.junit.Test)

Example 32 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class MongoDBIOIT method testWriteAndRead.

@Test
public void testWriteAndRead() {
    initialCollectionSize = getCollectionSizeInBytes(collection);
    writePipeline.apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords())).apply("Produce documents", MapElements.via(new LongToDocumentFn())).apply("Collect write time metric", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time"))).apply("Write documents to MongoDB", MongoDbIO.write().withUri(mongoUrl).withDatabase(options.getMongoDBDatabaseName()).withCollection(collection));
    PipelineResult writeResult = writePipeline.run();
    writeResult.waitUntilFinish();
    finalCollectionSize = getCollectionSizeInBytes(collection);
    PCollection<String> consolidatedHashcode = readPipeline.apply("Read all documents", MongoDbIO.read().withUri(mongoUrl).withDatabase(options.getMongoDBDatabaseName()).withCollection(collection)).apply("Collect read time metrics", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time"))).apply("Map documents to Strings", MapElements.via(new DocumentToStringFn())).apply("Calculate hashcode", Combine.globally(new HashingFn()));
    String expectedHash = getHashForRecordCount(options.getNumberOfRecords(), EXPECTED_HASHES);
    PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);
    PipelineResult readResult = readPipeline.run();
    readResult.waitUntilFinish();
    collectAndPublishMetrics(writeResult, readResult);
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) PipelineResult(org.apache.beam.sdk.PipelineResult) HashingFn(org.apache.beam.sdk.io.common.HashingFn) Test(org.junit.Test)

Example 33 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class StreamingSourceMetricsTest method testUnboundedSourceMetrics.

@Test
@Category(StreamingTest.class)
public void testUnboundedSourceMetrics() {
    final long minElements = 1000;
    // Use a GenerateSequence for the UnboundedSequence, but push the watermark to infinity at
    // minElements to let the test pipeline cleanly shut it down.  Shutdown will occur shortly
    // afterwards, but at least minElements will be reported in the metrics.
    PCollection<Long> pc = pipeline.apply(GenerateSequence.from(1).withRate(minElements / 10, Duration.millis(500L)).withTimestampFn(t -> t < minElements ? Instant.now() : BoundedWindow.TIMESTAMP_MAX_VALUE));
    assertThat(pc.isBounded(), is(PCollection.IsBounded.UNBOUNDED));
    PipelineResult pipelineResult = pipeline.run();
    MetricQueryResults metrics = pipelineResult.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName())).build());
    assertThat(metrics.getCounters(), hasItem(metricsResult(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName(), "GenerateSequence/Read(UnboundedCountingSource)", greaterThanOrEqualTo(minElements), false)));
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) MetricNameFilter(org.apache.beam.sdk.metrics.MetricNameFilter) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) SourceMetrics(org.apache.beam.sdk.metrics.SourceMetrics) MetricResultsMatchers.metricsResult(org.apache.beam.sdk.metrics.MetricResultsMatchers.metricsResult) StreamingTest(org.apache.beam.runners.spark.StreamingTest) PipelineResult(org.apache.beam.sdk.PipelineResult) Duration(org.joda.time.Duration) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) Serializable(java.io.Serializable) MetricsFilter(org.apache.beam.sdk.metrics.MetricsFilter) Source(org.apache.beam.sdk.io.Source) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Rule(org.junit.Rule) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) Matchers.is(org.hamcrest.Matchers.is) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) PipelineResult(org.apache.beam.sdk.PipelineResult) Category(org.junit.experimental.categories.Category) StreamingTest(org.apache.beam.runners.spark.StreamingTest) Test(org.junit.Test)

Example 34 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class V1WriteIT method testDatastoreWriterFnWithDuplicatedEntities.

/**
 * Tests {@link DatastoreV1.DatastoreWriterFn} with duplicated entries. Once a duplicated entry is
 * found the batch gets flushed.
 */
@Test
public void testDatastoreWriterFnWithDuplicatedEntities() throws Exception {
    List<Mutation> mutations = new ArrayList<>(200);
    V1TestOptions options = TestPipeline.testingPipelineOptions().as(V1TestOptions.class);
    Pipeline pipeline = TestPipeline.create(options);
    for (int i = 1; i <= 200; i++) {
        Key key = makeKey("key" + i, i + 1).build();
        mutations.add(makeUpsert(Entity.newBuilder().setKey(key).build()).build());
        if (i % 30 == 0) {
            mutations.add(makeUpsert(Entity.newBuilder().setKey(key).build()).build());
        }
    }
    DatastoreV1.DatastoreWriterFn datastoreWriter = new DatastoreV1.DatastoreWriterFn(TestPipeline.testingPipelineOptions().as(GcpOptions.class).getProject(), null);
    PTransform<PCollection<? extends Mutation>, PCollection<Void>> datastoreWriterTransform = ParDo.of(datastoreWriter);
    /**
     * Following three lines turn the original arrayList into a member of the first PCollection
     */
    List<Mutation> newArrayList = new ArrayList<>(mutations);
    Create.Values<Iterable<Mutation>> mutationIterable = Create.of(Collections.singleton(newArrayList));
    PCollection<Iterable<Mutation>> input = pipeline.apply(mutationIterable);
    /**
     * Flatten divides the PCollection into several elements of the same bundle. By doing this we're
     * forcing the processing of the List of mutation in the same order the mutations were added to
     * the original List.
     */
    input.apply(Flatten.<Mutation>iterables()).apply(datastoreWriterTransform);
    PipelineResult pResult = pipeline.run();
    MetricQueryResults metricResults = pResult.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(DatastoreV1.DatastoreWriterFn.class, "batchSize")).build());
    AtomicLong timesCommitted = new AtomicLong();
    metricResults.getDistributions().forEach(distribution -> {
        if (distribution.getName().getName().equals("batchSize")) {
            timesCommitted.set(distribution.getCommitted().getCount());
        }
    });
    assertEquals(7, timesCommitted.get());
}
Also used : ArrayList(java.util.ArrayList) MetricQueryResults(org.apache.beam.sdk.metrics.MetricQueryResults) PipelineResult(org.apache.beam.sdk.PipelineResult) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) AtomicLong(java.util.concurrent.atomic.AtomicLong) Create(org.apache.beam.sdk.transforms.Create) Mutation(com.google.datastore.v1.Mutation) Key(com.google.datastore.v1.Key) DatastoreHelper.makeKey(com.google.datastore.v1.client.DatastoreHelper.makeKey) Test(org.junit.Test)

Example 35 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class LoadTest method run.

/**
 * Runs the load test, collects and publishes test results to various data store and/or console.
 */
public PipelineResult run() throws IOException {
    final Timestamp timestamp = Timestamp.now();
    loadTest();
    final PipelineResult pipelineResult = pipeline.run();
    pipelineResult.waitUntilFinish(Duration.standardMinutes(options.getLoadTestTimeout()));
    final String testId = UUID.randomUUID().toString();
    final List<NamedTestResult> metrics = readMetrics(timestamp, pipelineResult, testId);
    ConsoleResultPublisher.publish(metrics, testId, timestamp.toString());
    handleFailure(pipelineResult, metrics);
    if (options.getPublishToInfluxDB()) {
        InfluxDBPublisher.publishWithSettings(metrics, settings);
    }
    return pipelineResult;
}
Also used : NamedTestResult(org.apache.beam.sdk.testutils.NamedTestResult) PipelineResult(org.apache.beam.sdk.PipelineResult) SyntheticOptions.fromJsonString(org.apache.beam.sdk.io.synthetic.SyntheticOptions.fromJsonString) Timestamp(com.google.cloud.Timestamp)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6