Search in sources :

Example 6 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class LeaderBoard method main.

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);
    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
    // data elements, and parse the data.
    PCollection<GameActionInfo> gameEvents = pipeline.apply(PubsubIO.readStrings().withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())).apply("ParseGameEvent", ParDo.of(new ParseEventFn()));
    gameEvents.apply("CalculateTeamScores", new CalculateTeamScores(Duration.standardMinutes(options.getTeamWindowDuration()), Duration.standardMinutes(options.getAllowedLateness()))).apply("WriteTeamScoreSums", new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_team", configureWindowedTableWrite()));
    gameEvents.apply("CalculateUserScores", new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness()))).apply("WriteUserScoreSums", new WriteToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_user", configureGlobalWindowBigQueryWrite()));
    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}
Also used : GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) ExampleOptions(org.apache.beam.examples.common.ExampleOptions) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 7 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class StatefulTeamScore method main.

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(PubsubIO.readStrings().withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())).apply("ParseGameEvent", ParDo.of(new ParseEventFn())).apply("MapTeamAsKey", MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptor.of(GameActionInfo.class))).via((GameActionInfo gInfo) -> KV.of(gInfo.team, gInfo))).apply("UpdateTeamScore", ParDo.of(new UpdateTeamScoreFn(options.getThresholdScore()))).apply("WriteTeamLeaders", new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_team_leader", configureCompleteWindowedTableWrite()));
    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}
Also used : GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 8 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class AutoComplete method runAutocompletePipeline.

public static void runAutocompletePipeline(Options options) throws IOException {
    options.setBigQuerySchema(FormatForBigquery.getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);
    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {
        windowFn = new GlobalWindows();
    }
    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags())).apply(Window.into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));
    if (options.getOutputToDatastore()) {
        toWrite.apply("FormatForDatastore", ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))).apply(DatastoreIO.v1().write().withProjectId(MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
    }
    if (options.getOutputToBigQuery()) {
        exampleUtils.setupBigQueryTable();
        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());
        toWrite.apply(ParDo.of(new FormatForBigquery())).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema()).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }
    if (options.getOutputToChecksum()) {
        PCollection<Long> checksum = toWrite.apply(ParDo.of(new DoFn<KV<String, List<CompletionCandidate>>, Long>() {

            @ProcessElement
            public void process(ProcessContext c) {
                KV<String, List<CompletionCandidate>> elm = c.element();
                Long listHash = c.element().getValue().stream().mapToLong(cc -> cc.hashCode()).sum();
                c.output(Long.valueOf(elm.getKey().hashCode()) + listHash);
            }
        })).apply(Sum.longsGlobally());
        PAssert.that(checksum).containsInAnyOrder(options.getExpectedChecksum());
    }
    // Run the pipeline.
    PipelineResult result = p.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) TableReference(com.google.api.services.bigquery.model.TableReference)

Example 9 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class TrafficMaxLaneFlow method runTrafficMaxLaneFlow.

public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractFlowInfoFn())).apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new MaxLaneFlow()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 10 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class StreamingWordExtract method main.

/**
 * Sets up and starts streaming pipeline.
 *
 * @throws IOException if there is a problem setting up resources
 */
public static void main(String[] args) throws IOException {
    StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(StreamingWordExtractOptions.class);
    options.setStreaming(true);
    options.setBigQuerySchema(StringToRowConverter.getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    String tableSpec = new StringBuilder().append(options.getProject()).append(":").append(options.getBigQueryDataset()).append(".").append(options.getBigQueryTable()).toString();
    pipeline.apply("ReadLines", TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractWords())).apply(ParDo.of(new Uppercase())).apply(ParDo.of(new StringToRowConverter())).apply(BigQueryIO.writeTableRows().to(tableSpec).withSchema(StringToRowConverter.getSchema()));
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 Pipeline (org.apache.beam.sdk.Pipeline)10 PipelineResult (org.apache.beam.sdk.PipelineResult)10 TableReference (com.google.api.services.bigquery.model.TableReference)6 GcpOptions (org.apache.beam.sdk.extensions.gcp.options.GcpOptions)3 KV (org.apache.beam.sdk.values.KV)2 TableRow (com.google.api.services.bigquery.model.TableRow)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ExampleOptions (org.apache.beam.examples.common.ExampleOptions)1 GameConstants (org.apache.beam.examples.complete.game.utils.GameConstants)1 WriteWindowedToBigQuery (org.apache.beam.examples.complete.game.utils.WriteWindowedToBigQuery)1 PubsubIO (org.apache.beam.sdk.io.gcp.pubsub.PubsubIO)1 Counter (org.apache.beam.sdk.metrics.Counter)1 Metrics (org.apache.beam.sdk.metrics.Metrics)1 Default (org.apache.beam.sdk.options.Default)1 Description (org.apache.beam.sdk.options.Description)1 PipelineOptionsFactory (org.apache.beam.sdk.options.PipelineOptionsFactory)1 StreamingOptions (org.apache.beam.sdk.options.StreamingOptions)1 Combine (org.apache.beam.sdk.transforms.Combine)1