Search in sources :

Example 46 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class KafkaToPubsubE2ETest method testKafkaToPubsubE2E.

@Test
public void testKafkaToPubsubE2E() throws Exception {
    PipelineResult job = KafkaToPubsub.run(pipeline, OPTIONS.as(KafkaToPubsubOptions.class));
    sendKafkaMessage();
    testPubsub.assertThatTopicEventuallyReceives(hasProperty("payload", equalTo(PUBSUB_MESSAGE.getBytes(StandardCharsets.UTF_8)))).waitForUpTo(Duration.standardMinutes(1));
    try {
        job.cancel();
    } catch (UnsupportedOperationException e) {
        throw new AssertionError("Could not stop pipeline.", e);
    }
}
Also used : KafkaToPubsubOptions(org.apache.beam.examples.complete.kafkatopubsub.options.KafkaToPubsubOptions) PipelineResult(org.apache.beam.sdk.PipelineResult) Test(org.junit.Test)

Example 47 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class TestJetRunner method run.

@Override
public PipelineResult run(Pipeline pipeline) {
    Collection<JetInstance> instances = initMemberInstances(factory);
    try {
        PipelineResult result = delegate.run(pipeline);
        if (result instanceof FailedRunningPipelineResults) {
            throw ((FailedRunningPipelineResults) result).getCause();
        }
        result.waitUntilFinish();
        return result;
    } finally {
        killMemberInstances(instances, factory);
    }
}
Also used : JetInstance(com.hazelcast.jet.JetInstance) PipelineResult(org.apache.beam.sdk.PipelineResult)

Example 48 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class WindowedWordCount method runWindowedWordCount.

static void runWindowedWordCount(Options options) throws IOException {
    final String output = options.getOutput();
    final Instant minTimestamp = new Instant(options.getMinTimestampMillis());
    final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis());
    Pipeline pipeline = Pipeline.create(options);
    /*
     * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
     * unbounded input source.
     */
    PCollection<String> input = pipeline.apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp)));
    /*
     * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1
     * minute (you can change this with a command-line option). See the documentation for more
     * information on how fixed windows work, and for information on the other types of windowing
     * available (e.g., sliding windows).
     */
    PCollection<String> windowedWords = input.apply(Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
    /*
     * Concept #4: Re-use our existing CountWords transform that does not have knowledge of
     * windows over a PCollection containing windowed values.
     */
    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
    /*
     * Concept #5: Format the results and write to a sharded file partitioned by window, using a
     * simple ParDo operation. Because there may be failures followed by retries, the
     * writes must be idempotent, but the details of writing to files is elided here.
     */
    wordCounts.apply(MapElements.via(new WordCount.FormatAsTextFn())).apply(new WriteOneFilePerWindow(output, options.getNumShards()));
    PipelineResult result = pipeline.run();
    try {
        result.waitUntilFinish();
    } catch (Exception exc) {
        result.cancel();
    }
}
Also used : WriteOneFilePerWindow(org.apache.beam.examples.common.WriteOneFilePerWindow) Instant(org.joda.time.Instant) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV) IOException(java.io.IOException) Pipeline(org.apache.beam.sdk.Pipeline)

Example 49 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class TrafficRoutes method runTrafficRoutes.

public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractStationSpeedFn())).apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new TrackSpeed()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 50 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class DirectRunnerTest method testTwoPOutputsInPipelineWithCascade.

/**
 * Test running of {@link Pipeline} which has two {@link POutput POutputs} and finishing the first
 * one triggers data being fed into the second one.
 */
@Test(timeout = 10000)
public void testTwoPOutputsInPipelineWithCascade() throws InterruptedException {
    StaticQueue<Integer> start = StaticQueue.of("start", VarIntCoder.of());
    StaticQueue<Integer> messages = StaticQueue.of("messages", VarIntCoder.of());
    Pipeline pipeline = getPipeline(false);
    pipeline.begin().apply("outputStartSignal", outputStartTo(start));
    PCollection<Integer> result = pipeline.apply("processMessages", messages.read()).apply(Window.<Integer>into(new GlobalWindows()).triggering(AfterWatermark.pastEndOfWindow()).discardingFiredPanes().withAllowedLateness(Duration.ZERO)).apply(Sum.integersGlobally());
    // the result should be 6, after the data will have been written
    PAssert.that(result).containsInAnyOrder(6);
    PipelineResult run = pipeline.run();
    // wait until a message has been written to the start queue
    while (start.take() == null) {
    }
    // and publish messages
    messages.add(1).add(2).add(3).terminate();
    run.waitUntilFinish();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) PipelineResult(org.apache.beam.sdk.PipelineResult) DirectPipelineResult(org.apache.beam.runners.direct.DirectRunner.DirectPipelineResult) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6