Search in sources :

Example 1 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class TrafficMaxLaneFlow method main.

/**
   * Sets up and starts streaming pipeline.
   *
   * @throws IOException if there is a problem setting up resources
   */
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficMaxLaneFlowOptions.class);
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractFlowInfoFn())).apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new MaxLaneFlow()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 2 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class TrafficRoutes method main.

/**
   * Sets up and starts streaming pipeline.
   *
   * @throws IOException if there is a problem setting up resources
   */
public static void main(String[] args) throws IOException {
    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficRoutesOptions.class);
    options.setBigQuerySchema(FormatStatsFn.getSchema());
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractStationSpeedFn())).apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new TrackSpeed()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 3 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class TrafficRoutes method runTrafficRoutes.

public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractStationSpeedFn())).apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new TrackSpeed()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 4 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class TriggerExample method main.

public static void main(String[] args) throws Exception {
    TrafficFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficFlowOptions.class);
    options.setStreaming(true);
    options.setBigQuerySchema(getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = getTableReference(options.getProject(), options.getBigQueryDataset(), options.getBigQueryTable());
    PCollectionList<TableRow> resultList = pipeline.apply("ReadMyFile", TextIO.read().from(options.getInput())).apply("InsertRandomDelays", ParDo.of(new InsertDelays())).apply(ParDo.of(new ExtractFlowInfo())).apply(new CalculateTotalFlow(options.getWindowDuration()));
    for (int i = 0; i < resultList.size(); i++) {
        resultList.get(i).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(getSchema()));
    }
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exits.
    exampleUtils.waitToFinish(result);
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 5 with ExampleUtils

use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.

the class GameStats method main.

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    // Enforce that this pipeline is always run in streaming mode.
    options.setStreaming(true);
    ExampleUtils exampleUtils = new ExampleUtils(options);
    Pipeline pipeline = Pipeline.create(options);
    // Read Events from Pub/Sub using custom timestamps
    PCollection<GameActionInfo> rawEvents = pipeline.apply(PubsubIO.readStrings().withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())).apply("ParseGameEvent", ParDo.of(new ParseEventFn()));
    // Extract username/score pairs from the event stream
    PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore", MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())).via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));
    // Calculate the total score per user over fixed windows, and
    // cumulative updates for late data.
    final PCollectionView<Map<String, Integer>> spammersView = userEvents.apply("FixedWindowsUser", Window.into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))).apply("CalculateSpammyUsers", new CalculateSpammyUsers()).apply("CreateSpammersView", View.asMap());
    // [START DocInclude_FilterAndCalc]
    // Calculate the total score per team over fixed windows,
    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
    // suspected robots-- to filter out scores from those users from the sum.
    // Write the results to BigQuery.
    rawEvents.apply("WindowIntoFixedWindows", Window.into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))).apply("FilterOutSpammers", ParDo.of(new DoFn<GameActionInfo, GameActionInfo>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            // If the user is not in the spammers Map, output the data element.
            if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
                c.output(c.element());
            }
        }
    }).withSideInputs(spammersView)).apply("ExtractTeamScore", new ExtractAndSumScore("team")).apply("WriteTeamSums", new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getGameStatsTablePrefix() + "_team", configureWindowedWrite()));
    // [START DocInclude_SessionCalc]
    // Detect user sessions-- that is, a burst of activity separated by a gap from further
    // activity. Find and record the mean session lengths.
    // This information could help the game designers track the changing user engagement
    // as their set of games changes.
    userEvents.apply("WindowIntoSessions", Window.<KV<String, Integer>>into(Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))).withTimestampCombiner(TimestampCombiner.END_OF_WINDOW)).apply(Combine.perKey(x -> 0)).apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn())).apply("WindowToExtractSessionMean", Window.into(FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))).apply(Mean.<Integer>globally().withoutDefaults()).apply("WriteAvgSessionLength", new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getGameStatsTablePrefix() + "_sessions", configureSessionWindowWrite()));
    // [END DocInclude_Rewindow]
    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
    // command line.
    PipelineResult result = pipeline.run();
    exampleUtils.waitToFinish(result);
}
Also used : KV(org.apache.beam.sdk.values.KV) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) PipelineResult(org.apache.beam.sdk.PipelineResult) Default(org.apache.beam.sdk.options.Default) Combine(org.apache.beam.sdk.transforms.Combine) Duration(org.joda.time.Duration) LoggerFactory(org.slf4j.LoggerFactory) WriteWindowedToBigQuery(org.apache.beam.examples.complete.game.utils.WriteWindowedToBigQuery) HashMap(java.util.HashMap) View(org.apache.beam.sdk.transforms.View) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Metrics(org.apache.beam.sdk.metrics.Metrics) Description(org.apache.beam.sdk.options.Description) PTransform(org.apache.beam.sdk.transforms.PTransform) GameConstants(org.apache.beam.examples.complete.game.utils.GameConstants) Sessions(org.apache.beam.sdk.transforms.windowing.Sessions) Map(java.util.Map) Window(org.apache.beam.sdk.transforms.windowing.Window) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) MapElements(org.apache.beam.sdk.transforms.MapElements) Logger(org.slf4j.Logger) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Counter(org.apache.beam.sdk.metrics.Counter) Sum(org.apache.beam.sdk.transforms.Sum) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) PCollection(org.apache.beam.sdk.values.PCollection) Mean(org.apache.beam.sdk.transforms.Mean) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PubsubIO(org.apache.beam.sdk.io.gcp.pubsub.PubsubIO) ParDo(org.apache.beam.sdk.transforms.ParDo) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Instant(org.joda.time.Instant) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Values(org.apache.beam.sdk.transforms.Values) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ExampleUtils (org.apache.beam.examples.common.ExampleUtils)10 Pipeline (org.apache.beam.sdk.Pipeline)10 PipelineResult (org.apache.beam.sdk.PipelineResult)10 TableReference (com.google.api.services.bigquery.model.TableReference)6 GcpOptions (org.apache.beam.sdk.extensions.gcp.options.GcpOptions)3 KV (org.apache.beam.sdk.values.KV)2 TableRow (com.google.api.services.bigquery.model.TableRow)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ExampleOptions (org.apache.beam.examples.common.ExampleOptions)1 GameConstants (org.apache.beam.examples.complete.game.utils.GameConstants)1 WriteWindowedToBigQuery (org.apache.beam.examples.complete.game.utils.WriteWindowedToBigQuery)1 PubsubIO (org.apache.beam.sdk.io.gcp.pubsub.PubsubIO)1 Counter (org.apache.beam.sdk.metrics.Counter)1 Metrics (org.apache.beam.sdk.metrics.Metrics)1 Default (org.apache.beam.sdk.options.Default)1 Description (org.apache.beam.sdk.options.Description)1 PipelineOptionsFactory (org.apache.beam.sdk.options.PipelineOptionsFactory)1 StreamingOptions (org.apache.beam.sdk.options.StreamingOptions)1 Combine (org.apache.beam.sdk.transforms.Combine)1