Search in sources :

Example 51 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class UnboundedReadFromBoundedSourceTest method testBoundedToUnboundedSourceAdapterCheckpoint.

private <T> void testBoundedToUnboundedSourceAdapterCheckpoint(BoundedSource<T> boundedSource, List<T> expectedElements) throws Exception {
    BoundedToUnboundedSourceAdapter<T> unboundedSource = new BoundedToUnboundedSourceAdapter<>(boundedSource);
    PipelineOptions options = PipelineOptionsFactory.create();
    BoundedToUnboundedSourceAdapter<T>.Reader<T> reader = unboundedSource.createReader(options, null);
    List<T> actual = Lists.newArrayList();
    for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) {
        actual.add(reader.getCurrent());
        // checkpoint every 9 elements
        if (actual.size() % 9 == 0) {
            Checkpoint<T> checkpoint = reader.getCheckpointMark();
            checkpoint.finalizeCheckpoint();
        }
    }
    Checkpoint<T> checkpointDone = reader.getCheckpointMark();
    assertTrue(checkpointDone.getResidualElements() == null || checkpointDone.getResidualElements().isEmpty());
    assertEquals(expectedElements.size(), actual.size());
    assertEquals(Sets.newHashSet(expectedElements), Sets.newHashSet(actual));
}
Also used : BoundedToUnboundedSourceAdapter(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Example 52 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class TestPipeline method testingPipelineOptions.

/** Creates {@link PipelineOptions} for testing. */
public static PipelineOptions testingPipelineOptions() {
    try {
        @Nullable String beamTestPipelineOptions = System.getProperty(PROPERTY_BEAM_TEST_PIPELINE_OPTIONS);
        PipelineOptions options = Strings.isNullOrEmpty(beamTestPipelineOptions) ? PipelineOptionsFactory.create() : PipelineOptionsFactory.fromArgs(MAPPER.readValue(beamTestPipelineOptions, String[].class)).as(TestPipelineOptions.class);
        options.as(ApplicationNameOptions.class).setAppName(getAppName());
        // If no options were specified, set some reasonable defaults
        if (Strings.isNullOrEmpty(beamTestPipelineOptions)) {
            // If there are no provided options, check to see if a dummy runner should be used.
            String useDefaultDummy = System.getProperty(PROPERTY_USE_DEFAULT_DUMMY_RUNNER);
            if (!Strings.isNullOrEmpty(useDefaultDummy) && Boolean.valueOf(useDefaultDummy)) {
                options.setRunner(CrashingRunner.class);
            }
        }
        options.setStableUniqueNames(CheckEnabled.ERROR);
        FileSystems.setDefaultPipelineOptions(options);
        return options;
    } catch (IOException e) {
        throw new RuntimeException("Unable to instantiate test options from system property " + PROPERTY_BEAM_TEST_PIPELINE_OPTIONS + ":" + System.getProperty(PROPERTY_BEAM_TEST_PIPELINE_OPTIONS), e);
    }
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) IOException(java.io.IOException) ApplicationNameOptions(org.apache.beam.sdk.options.ApplicationNameOptions) Nullable(javax.annotation.Nullable)

Example 53 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class MinimalWordCount method main.

public static void main(String[] args) {
    // Create a PipelineOptions object. This object lets us set various execution
    // options for our pipeline, such as the runner you wish to use. This example
    // will run with the DirectRunner by default, based on the class path configured
    // in its dependencies.
    PipelineOptions options = PipelineOptionsFactory.create();
    // Create the Pipeline object with the options we defined above.
    Pipeline p = Pipeline.create(options);
    // Apply the pipeline's transforms.
    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
    // of input text files. TextIO.Read returns a PCollection where each element is one line from
    // the input text (a set of Shakespeare's texts).
    // This example reads a public data set consisting of the complete works of Shakespeare.
    p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")).apply("ExtractWords", ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) {
                if (!word.isEmpty()) {
                    c.output(word);
                }
            }
        }
    })).apply(Count.<String>perElement()).apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {

        @Override
        public String apply(KV<String, Long> input) {
            return input.getKey() + ": " + input.getValue();
        }
    })).apply(TextIO.write().to("wordcounts"));
    // Run the pipeline.
    p.run().waitUntilFinish();
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline)

Example 54 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FlinkRunnerRegistrarTest method testFullName.

@Test
public void testFullName() {
    String[] args = new String[] { String.format("--runner=%s", FlinkRunner.class.getName()) };
    PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create();
    assertEquals(opts.getRunner(), FlinkRunner.class);
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Example 55 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FlinkRunnerRegistrarTest method testClassName.

@Test
public void testClassName() {
    String[] args = new String[] { String.format("--runner=%s", FlinkRunner.class.getSimpleName()) };
    PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create();
    assertEquals(opts.getRunner(), FlinkRunner.class);
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Aggregations

PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)92 Test (org.junit.Test)79 File (java.io.File)26 ArrayList (java.util.ArrayList)16 Pipeline (org.apache.beam.sdk.Pipeline)10 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)9 Path (java.nio.file.Path)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)6 SerializedPipelineOptions (org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions)5 KV (org.apache.beam.sdk.values.KV)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 Table (com.google.api.services.bigquery.model.Table)4 TableReference (com.google.api.services.bigquery.model.TableReference)4 TableRow (com.google.api.services.bigquery.model.TableRow)4 HashBasedTable (com.google.common.collect.HashBasedTable)4 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)4 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)4 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3