Examples with PipelineOptions - org.apache.beam.sdk.options.PipelineOptions

Example 21 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FlinkPartialReduceFunction method combine.

@Override
public void combine(Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
    PipelineOptions options = serializedOptions.getPipelineOptions();
    FlinkSideInputReader sideInputReader = new FlinkSideInputReader(sideInputs, getRuntimeContext());
    AbstractFlinkCombineRunner<K, InputT, AccumT, AccumT, W> reduceRunner;
    if (!windowingStrategy.getWindowFn().isNonMerging() && !windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
        reduceRunner = new HashingFlinkCombineRunner<>();
    } else {
        reduceRunner = new SortingFlinkCombineRunner<>();
    }
    reduceRunner.combine(new AbstractFlinkCombineRunner.PartialFlinkCombiner<K, InputT, AccumT>(combineFn), windowingStrategy, sideInputReader, options, elements, out);
}

Also used : SerializedPipelineOptions(org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Example 22 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class OffsetBasedSourceTest method testProgressEmptySource.

@Test
public void testProgressEmptySource() throws IOException {
    PipelineOptions options = PipelineOptionsFactory.create();
    CoarseRangeSource source = new CoarseRangeSource(13, 17, 1, 100);
    try (OffsetBasedReader<Integer> reader = source.createReader(options)) {
        // before starting
        assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
        // confirm empty
        assertFalse(reader.start());
        // after reading empty source
        assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(0, reader.getSplitPointsRemaining());
    }
}

Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Example 23 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class CompressedSourceTest method testSplittableProgress.

@Test
public void testSplittableProgress() throws IOException {
    File tmpFile = tmpFolder.newFile("nonempty.txt");
    String filename = tmpFile.toPath().toString();
    Files.write(new byte[2], tmpFile);
    PipelineOptions options = PipelineOptionsFactory.create();
    CompressedSource<Byte> source = CompressedSource.from(new ByteSource(filename, 1));
    try (BoundedReader<Byte> readerOrig = source.createReader(options)) {
        assertThat(readerOrig, not(instanceOf(CompressedReader.class)));
        assertThat(readerOrig, instanceOf(FileBasedReader.class));
        FileBasedReader<Byte> reader = (FileBasedReader<Byte>) readerOrig;
        // Check preconditions before starting
        assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
        // First record: none consumed, unknown remaining.
        assertTrue(reader.start());
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
        // Second record: 1 consumed, know that we're on the last record.
        assertTrue(reader.advance());
        assertEquals(1, reader.getSplitPointsConsumed());
        assertEquals(1, reader.getSplitPointsRemaining());
        // Confirm empty and check post-conditions
        assertFalse(reader.advance());
        assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(2, reader.getSplitPointsConsumed());
        assertEquals(0, reader.getSplitPointsRemaining());
    }
}

Also used : FileBasedReader(org.apache.beam.sdk.io.FileBasedSource.FileBasedReader) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Matchers.containsString(org.hamcrest.Matchers.containsString) File(java.io.File) Test(org.junit.Test)

Example 24 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FileBasedSourceTest method testReadFileWithSplitsWithEmptyRange.

@Test
public void testReadFileWithSplitsWithEmptyRange() throws IOException {
    PipelineOptions options = PipelineOptionsFactory.create();
    String header = "<h>";
    List<String> data = new ArrayList<>();
    for (int i = 0; i < 5; i++) {
        data.add(header);
        data.addAll(createStringDataset(3, 9));
    }
    String fileName = "file";
    File file = createFileWithData(fileName, data);
    Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
    TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 42, header);
    TestFileBasedSource source2 = new TestFileBasedSource(metadata, 64, 42, 62, header);
    TestFileBasedSource source3 = new TestFileBasedSource(metadata, 64, 62, Long.MAX_VALUE, header);
    List<String> expectedResults = new ArrayList<String>();
    expectedResults.addAll(data);
    // Remove all occurrences of header from expected results.
    expectedResults.removeAll(Collections.singletonList(header));
    List<String> results = new ArrayList<>();
    results.addAll(readFromSource(source1, options));
    results.addAll(readFromSource(source2, options));
    results.addAll(readFromSource(source3, options));
    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
}

Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ArrayList(java.util.ArrayList) Metadata(org.apache.beam.sdk.io.fs.MatchResult.Metadata) File(java.io.File) Test(org.junit.Test)

Example 25 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FileBasedSourceTest method testReadRangeAtEnd.

@Test
public void testReadRangeAtEnd() throws IOException {
    PipelineOptions options = PipelineOptionsFactory.create();
    List<String> data = createStringDataset(3, 50);
    String fileName = "file";
    File file = createFileWithData(fileName, data);
    Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
    TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 162, null);
    TestFileBasedSource source2 = new TestFileBasedSource(metadata, 1024, 162, Long.MAX_VALUE, null);
    List<String> results = new ArrayList<>();
    results.addAll(readFromSource(source1, options));
    results.addAll(readFromSource(source2, options));
    assertThat(data, containsInAnyOrder(results.toArray()));
}

Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Metadata(org.apache.beam.sdk.io.fs.MatchResult.Metadata) ArrayList(java.util.ArrayList) File(java.io.File) Test(org.junit.Test)

Aggregations

PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)92 Test (org.junit.Test)79 File (java.io.File)26 ArrayList (java.util.ArrayList)16 Pipeline (org.apache.beam.sdk.Pipeline)10 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)9 Path (java.nio.file.Path)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)6 SerializedPipelineOptions (org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions)5 KV (org.apache.beam.sdk.values.KV)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 Table (com.google.api.services.bigquery.model.Table)4 TableReference (com.google.api.services.bigquery.model.TableReference)4 TableRow (com.google.api.services.bigquery.model.TableRow)4 HashBasedTable (com.google.common.collect.HashBasedTable)4 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)4 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)4 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3