Search in sources :

Example 91 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class AutoComplete method runAutocompletePipeline.

public static void runAutocompletePipeline(Options options) throws IOException {
    options.setBigQuerySchema(FormatForBigquery.getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);
    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {
        windowFn = new GlobalWindows();
    }
    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags())).apply(Window.into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));
    if (options.getOutputToDatastore()) {
        toWrite.apply("FormatForDatastore", ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))).apply(DatastoreIO.v1().write().withProjectId(MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
    }
    if (options.getOutputToBigQuery()) {
        exampleUtils.setupBigQueryTable();
        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());
        toWrite.apply(ParDo.of(new FormatForBigquery())).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema()).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }
    if (options.getOutputToChecksum()) {
        PCollection<Long> checksum = toWrite.apply(ParDo.of(new DoFn<KV<String, List<CompletionCandidate>>, Long>() {

            @ProcessElement
            public void process(ProcessContext c) {
                KV<String, List<CompletionCandidate>> elm = c.element();
                Long listHash = c.element().getValue().stream().mapToLong(cc -> cc.hashCode()).sum();
                c.output(Long.valueOf(elm.getKey().hashCode()) + listHash);
            }
        })).apply(Sum.longsGlobally());
        PAssert.that(checksum).containsInAnyOrder(options.getExpectedChecksum());
    }
    // Run the pipeline.
    PipelineResult result = p.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) TableReference(com.google.api.services.bigquery.model.TableReference)

Example 92 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class TrafficMaxLaneFlow method runTrafficMaxLaneFlow.

public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractFlowInfoFn())).apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new MaxLaneFlow()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 93 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class StreamingWordExtract method main.

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 */
public static void main(String[] args) throws IOException {
    StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(StreamingWordExtractOptions.class);
    options.setStreaming(true);
    options.setBigQuerySchema(StringToRowConverter.getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    String tableSpec = new StringBuilder().append(options.getProject()).append(":").append(options.getBigQueryDataset()).append(".").append(options.getBigQueryTable()).toString();
    pipeline.apply("ReadLines", TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractWords())).apply(ParDo.of(new Uppercase())).apply(ParDo.of(new StringToRowConverter())).apply(BigQueryIO.writeTableRows().to(tableSpec).withSchema(StringToRowConverter.getSchema()));
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 94 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class DirectRunnerTest method tearsDownFnsBeforeFinishing.

@Test
public void tearsDownFnsBeforeFinishing() {
    TEARDOWN_CALL.set(-1);
    final Pipeline pipeline = getPipeline();
    pipeline.apply(Create.of("a")).apply(ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void onElement(final ProcessContext ctx) {
        // no-op
        }

        @Teardown
        public void teardown() {
            // callback
            try {
                Thread.sleep(1000);
            } catch (final InterruptedException e) {
                throw new AssertionError(e);
            }
            TEARDOWN_CALL.set(System.nanoTime());
        }
    }));
    final PipelineResult pipelineResult = pipeline.run();
    pipelineResult.waitUntilFinish();
    final long doneTs = System.nanoTime();
    final long tearDownTs = TEARDOWN_CALL.get();
    assertThat(tearDownTs, greaterThan(0L));
    assertThat(doneTs, greaterThan(tearDownTs));
}
Also used : DoFn(org.apache.beam.sdk.transforms.DoFn) PipelineResult(org.apache.beam.sdk.PipelineResult) DirectPipelineResult(org.apache.beam.runners.direct.DirectRunner.DirectPipelineResult) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 95 with PipelineResult

use of org.apache.beam.sdk.PipelineResult in project beam by apache.

the class DirectRunnerTest method cancelShouldStopPipeline.

@Test
public void cancelShouldStopPipeline() throws Exception {
    PipelineOptions opts = TestPipeline.testingPipelineOptions();
    opts.as(DirectOptions.class).setBlockOnRun(false);
    opts.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(opts);
    p.apply(GenerateSequence.from(0).withRate(1L, Duration.standardSeconds(1)));
    final BlockingQueue<PipelineResult> resultExchange = new ArrayBlockingQueue<>(1);
    Runnable cancelRunnable = () -> {
        try {
            resultExchange.take().cancel();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IllegalStateException(e);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    };
    Callable<PipelineResult> runPipelineRunnable = () -> {
        PipelineResult res = p.run();
        try {
            resultExchange.put(res);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IllegalStateException(e);
        }
        return res;
    };
    ExecutorService executor = Executors.newCachedThreadPool();
    Future<?> cancelResult = executor.submit(cancelRunnable);
    Future<PipelineResult> result = executor.submit(runPipelineRunnable);
    cancelResult.get();
    // If cancel doesn't work, this will hang forever
    result.get().waitUntilFinish();
}
Also used : PipelineResult(org.apache.beam.sdk.PipelineResult) DirectPipelineResult(org.apache.beam.runners.direct.DirectRunner.DirectPipelineResult) IOException(java.io.IOException) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) Test(org.junit.Test)

Aggregations

PipelineResult (org.apache.beam.sdk.PipelineResult)105 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)29 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)18 PCollection (org.apache.beam.sdk.values.PCollection)18 TimeMonitor (org.apache.beam.sdk.testutils.metrics.TimeMonitor)14 ArrayList (java.util.ArrayList)12 Category (org.junit.experimental.categories.Category)12 KV (org.apache.beam.sdk.values.KV)11 Rule (org.junit.Rule)11 IOException (java.io.IOException)10 ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 DoFn (org.apache.beam.sdk.transforms.DoFn)10 HashingFn (org.apache.beam.sdk.io.common.HashingFn)9 RunWith (org.junit.runner.RunWith)9 MetricQueryResults (org.apache.beam.sdk.metrics.MetricQueryResults)8 ParDo (org.apache.beam.sdk.transforms.ParDo)8 Duration (org.joda.time.Duration)8 Map (java.util.Map)7 TableReference (com.google.api.services.bigquery.model.TableReference)6