Examples with Pipeline - org.apache.beam.sdk.Pipeline

Example 21 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class CrashingRunnerTest method runThrows.

@Test
public void runThrows() {
    PipelineOptions opts = PipelineOptionsFactory.create();
    opts.setRunner(CrashingRunner.class);
    Pipeline p = Pipeline.create(opts);
    p.apply(Create.of(1, 2, 3));
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Cannot call #run");
    thrown.expectMessage(TestPipeline.PROPERTY_BEAM_TEST_PIPELINE_OPTIONS);
    p.run();
}

Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 22 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class TrafficMaxLaneFlow method main.

/**
   * Sets up and starts streaming pipeline.
   *
   * @throws IOException if there is a problem setting up resources
   */
public static void main(String[] args) throws IOException {
    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficMaxLaneFlowOptions.class);
    options.setBigQuerySchema(FormatMaxesFn.getSchema());
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractFlowInfoFn())).apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new MaxLaneFlow()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline)

Example 23 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class TrafficRoutes method main.

/**
   * Sets up and starts streaming pipeline.
   *
   * @throws IOException if there is a problem setting up resources
   */
public static void main(String[] args) throws IOException {
    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficRoutesOptions.class);
    options.setBigQuerySchema(FormatStatsFn.getSchema());
    // Using ExampleUtils to set up required resources.
    ExampleUtils exampleUtils = new ExampleUtils(options);
    exampleUtils.setup();
    Pipeline pipeline = Pipeline.create(options);
    TableReference tableRef = new TableReference();
    tableRef.setProjectId(options.getProject());
    tableRef.setDatasetId(options.getBigQueryDataset());
    tableRef.setTableId(options.getBigQueryTable());
    pipeline.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())).apply(ParDo.of(new ExtractStationSpeedFn())).apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())).every(Duration.standardMinutes(options.getWindowSlideEvery())))).apply(new TrackSpeed()).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));
    // Run the pipeline.
    PipelineResult result = pipeline.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}

Example 24 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class BigQueryTornadoes method main.

public static void main(String[] args) {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline p = Pipeline.create(options);
    // Build the table schema for the output table.
    List<TableFieldSchema> fields = new ArrayList<>();
    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
    fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER"));
    TableSchema schema = new TableSchema().setFields(fields);
    p.apply(BigQueryIO.read().from(options.getInput())).apply(new CountTornadoes()).apply(BigQueryIO.writeTableRows().to(options.getOutput()).withSchema(schema).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    p.run().waitUntilFinish();
}

Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TableSchema(com.google.api.services.bigquery.model.TableSchema) ArrayList(java.util.ArrayList) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Pipeline(org.apache.beam.sdk.Pipeline)

Example 25 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class DistinctExample method main.

public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline p = Pipeline.create(options);
    p.apply("ReadLines", TextIO.read().from(options.getInput())).apply(Distinct.<String>create()).apply("DedupedShakespeare", TextIO.write().to(options.getOutput()));
    p.run().waitUntilFinish();
}

Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

Pipeline (org.apache.beam.sdk.Pipeline)184 Test (org.junit.Test)123 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)86 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)39 KV (org.apache.beam.sdk.values.KV)35 Job (com.google.api.services.dataflow.model.Job)26 DoFn (org.apache.beam.sdk.transforms.DoFn)24 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)22 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)21 TableRow (com.google.api.services.bigquery.model.TableRow)16 PipelineResult (org.apache.beam.sdk.PipelineResult)14 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)13 TableSchema (com.google.api.services.bigquery.model.TableSchema)12 ApexPipelineOptions (org.apache.beam.runners.apex.ApexPipelineOptions)12 Map (java.util.Map)11 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)10 ArrayList (java.util.ArrayList)10 Instant (org.joda.time.Instant)10 TableReference (com.google.api.services.bigquery.model.TableReference)9 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)9