use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.
the class LeaderBoard method main.
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
ExampleUtils exampleUtils = new ExampleUtils(options);
Pipeline pipeline = Pipeline.create(options);
// Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
// data elements, and parse the data.
PCollection<GameActionInfo> gameEvents = pipeline.apply(PubsubIO.readStrings().withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())).apply("ParseGameEvent", ParDo.of(new ParseEventFn()));
gameEvents.apply("CalculateTeamScores", new CalculateTeamScores(Duration.standardMinutes(options.getTeamWindowDuration()), Duration.standardMinutes(options.getAllowedLateness()))).apply("WriteTeamScoreSums", new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_team", configureWindowedTableWrite()));
gameEvents.apply("CalculateUserScores", new CalculateUserScores(Duration.standardMinutes(options.getAllowedLateness()))).apply("WriteUserScoreSums", new WriteToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_user", configureGlobalWindowBigQueryWrite()));
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.
the class StatefulTeamScore method main.
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
ExampleUtils exampleUtils = new ExampleUtils(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(PubsubIO.readStrings().withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())).apply("ParseGameEvent", ParDo.of(new ParseEventFn())).apply("MapTeamAsKey", MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptor.of(GameActionInfo.class))).via((GameActionInfo gInfo) -> KV.of(gInfo.team, gInfo))).apply("UpdateTeamScore", ParDo.of(new UpdateTeamScoreFn(options.getThresholdScore()))).apply("WriteTeamLeaders", new WriteWindowedToBigQuery<>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getLeaderBoardTableName() + "_team_leader", configureCompleteWindowedTableWrite()));
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.
the class AutoComplete method runAutocompletePipeline.
public static void runAutocompletePipeline(Options options) throws IOException {
options.setBigQuerySchema(FormatForBigquery.getSchema());
ExampleUtils exampleUtils = new ExampleUtils(options);
// We support running the same pipeline in either
// batch or windowed streaming mode.
WindowFn<Object, ?> windowFn;
if (options.isStreaming()) {
checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
} else {
windowFn = new GlobalWindows();
}
// Create the pipeline.
Pipeline p = Pipeline.create(options);
PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags())).apply(Window.into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));
if (options.getOutputToDatastore()) {
toWrite.apply("FormatForDatastore", ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))).apply(DatastoreIO.v1().write().withProjectId(MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
}
if (options.getOutputToBigQuery()) {
exampleUtils.setupBigQueryTable();
TableReference tableRef = new TableReference();
tableRef.setProjectId(options.getProject());
tableRef.setDatasetId(options.getBigQueryDataset());
tableRef.setTableId(options.getBigQueryTable());
toWrite.apply(ParDo.of(new FormatForBigquery())).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema()).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
}
if (options.getOutputToChecksum()) {
PCollection<Long> checksum = toWrite.apply(ParDo.of(new DoFn<KV<String, List<CompletionCandidate>>, Long>() {
@ProcessElement
public void process(ProcessContext c) {
KV<String, List<CompletionCandidate>> elm = c.element();
Long listHash = c.element().getValue().stream().mapToLong(cc -> cc.hashCode()).sum();
c.output(Long.valueOf(elm.getKey().hashCode()) + listHash);
}
})).apply(Sum.longsGlobally());
PAssert.that(checksum).containsInAnyOrder(options.getExpectedChecksum());
}
// Run the pipeline.
PipelineResult result = p.run();
// ExampleUtils will try to cancel the pipeline and the injector before the program exists.
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.
the class TrafficMaxLaneFlow method runTrafficMaxLaneFlow.
public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
// Using ExampleUtils to set up required resources.
ExampleUtils exampleUtils = new ExampleUtils(options);
exampleUtils.setup();
Pipeline pipeline = Pipeline.create(options);
TableReference tableRef = new TableReference();
tableRef.setProjectId(options.getProject());
tableRef.setDatasetId(options.getBigQueryDataset());
tableRef.setTableId(options.getBigQueryTable());
pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractFlowInfoFn())).apply(Window.into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new MaxLaneFlow()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));
// Run the pipeline.
PipelineResult result = pipeline.run();
// ExampleUtils will try to cancel the pipeline and the injector before the program exists.
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.examples.common.ExampleUtils in project beam by apache.
the class StreamingWordExtract method main.
/**
* Sets up and starts streaming pipeline.
*
* @throws IOException if there is a problem setting up resources
*/
public static void main(String[] args) throws IOException {
StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(StreamingWordExtractOptions.class);
options.setStreaming(true);
options.setBigQuerySchema(StringToRowConverter.getSchema());
ExampleUtils exampleUtils = new ExampleUtils(options);
exampleUtils.setup();
Pipeline pipeline = Pipeline.create(options);
String tableSpec = new StringBuilder().append(options.getProject()).append(":").append(options.getBigQueryDataset()).append(".").append(options.getBigQueryTable()).toString();
pipeline.apply("ReadLines", TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractWords())).apply(ParDo.of(new Uppercase())).apply(ParDo.of(new StringToRowConverter())).apply(BigQueryIO.writeTableRows().to(tableSpec).withSchema(StringToRowConverter.getSchema()));
PipelineResult result = pipeline.run();
// ExampleUtils will try to cancel the pipeline before the program exists.
exampleUtils.waitToFinish(result);
}
Aggregations