Search in sources :

Example 61 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class BigQueryIOTest method testBigQueryTableSourceThroughJsonAPI.

@Test
public void testBigQueryTableSourceThroughJsonAPI() throws Exception {
    FakeDatasetService datasetService = new FakeDatasetService();
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
    List<TableRow> expected = ImmutableList.of(new TableRow().set("name", "a").set("number", "1"), new TableRow().set("name", "b").set("number", "2"), new TableRow().set("name", "c").set("number", "3"), new TableRow().set("name", "d").set("number", "4"), new TableRow().set("name", "e").set("number", "5"), new TableRow().set("name", "f").set("number", "6"));
    TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
    datasetService.createDataset(table.getProjectId(), table.getDatasetId(), "", "");
    datasetService.createTable(new Table().setTableReference(table));
    datasetService.insertAll(table, expected, null);
    Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryTableSourceThroughJsonAPI");
    String stepUuid = "testStepUuid";
    BoundedSource<TableRow> bqSource = BigQueryTableSource.create(stepUuid, StaticValueProvider.of(table), fakeBqServices);
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setTempLocation(baseDir.toString());
    Assert.assertThat(SourceTestUtils.readFromSource(bqSource, options), CoreMatchers.is(expected));
    SourceTestUtils.assertSplitAtFractionBehavior(bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
}
Also used : Path(java.nio.file.Path) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) HashBasedTable(com.google.common.collect.HashBasedTable) Table(com.google.api.services.bigquery.model.Table) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TableRow(com.google.api.services.bigquery.model.TableRow) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Example 62 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FlinkReduceFunction method reduce.

@Override
public void reduce(Iterable<WindowedValue<KV<K, AccumT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
    PipelineOptions options = serializedOptions.getPipelineOptions();
    FlinkSideInputReader sideInputReader = new FlinkSideInputReader(sideInputs, getRuntimeContext());
    AbstractFlinkCombineRunner<K, AccumT, AccumT, OutputT, W> reduceRunner;
    if (!windowingStrategy.getWindowFn().isNonMerging() && !windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
        reduceRunner = new HashingFlinkCombineRunner<>();
    } else {
        reduceRunner = new SortingFlinkCombineRunner<>();
    }
    reduceRunner.combine(new AbstractFlinkCombineRunner.FinalFlinkCombiner<K, AccumT, OutputT>(combineFn), windowingStrategy, sideInputReader, options, elements, out);
}
Also used : SerializedPipelineOptions(org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Example 63 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FlinkMergingNonShuffleReduceFunction method reduce.

@Override
public void reduce(Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
    PipelineOptions options = serializedOptions.getPipelineOptions();
    FlinkSideInputReader sideInputReader = new FlinkSideInputReader(sideInputs, getRuntimeContext());
    AbstractFlinkCombineRunner<K, InputT, AccumT, OutputT, W> reduceRunner;
    if (windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
        reduceRunner = new SortingFlinkCombineRunner<>();
    } else {
        reduceRunner = new HashingFlinkCombineRunner<>();
    }
    reduceRunner.combine(new AbstractFlinkCombineRunner.CompleteFlinkCombiner<K, InputT, AccumT, OutputT>(combineFn), windowingStrategy, sideInputReader, options, elements, out);
}
Also used : SerializedPipelineOptions(org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Example 64 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class TestCountingSourceTest method testRespectsCheckpointContract.

@Test
public void testRespectsCheckpointContract() throws IOException {
    TestCountingSource source = new TestCountingSource(3);
    PipelineOptions options = PipelineOptionsFactory.create();
    TestCountingSource.CountingSourceReader reader = source.createReader(options, null);
    assertTrue(reader.start());
    assertEquals(0L, (long) reader.getCurrent().getValue());
    assertTrue(reader.advance());
    assertEquals(1L, (long) reader.getCurrent().getValue());
    TestCountingSource.CounterMark checkpoint = reader.getCheckpointMark();
    checkpoint.finalizeCheckpoint();
    reader = source.createReader(options, checkpoint);
    assertTrue(reader.start());
    assertEquals(2L, (long) reader.getCurrent().getValue());
    assertFalse(reader.advance());
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Example 65 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class CrashingRunnerTest method fromOptionsCreatesInstance.

@Test
public void fromOptionsCreatesInstance() {
    PipelineOptions opts = PipelineOptionsFactory.create();
    opts.setRunner(CrashingRunner.class);
    PipelineRunner<? extends PipelineResult> runner = PipelineRunner.fromOptions(opts);
    assertTrue("Should have created a CrashingRunner", runner instanceof CrashingRunner);
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Aggregations

PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)92 Test (org.junit.Test)79 File (java.io.File)26 ArrayList (java.util.ArrayList)16 Pipeline (org.apache.beam.sdk.Pipeline)10 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)9 Path (java.nio.file.Path)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)6 SerializedPipelineOptions (org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions)5 KV (org.apache.beam.sdk.values.KV)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 Table (com.google.api.services.bigquery.model.Table)4 TableReference (com.google.api.services.bigquery.model.TableReference)4 TableRow (com.google.api.services.bigquery.model.TableRow)4 HashBasedTable (com.google.common.collect.HashBasedTable)4 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)4 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)4 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3