Search in sources :

Example 71 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class OffsetBasedSourceTest method testProgress.

@Test
public void testProgress() throws IOException {
    PipelineOptions options = PipelineOptionsFactory.create();
    CoarseRangeSource source = new CoarseRangeSource(13, 17, 1, 2);
    try (OffsetBasedReader<Integer> reader = source.createReader(options)) {
        // Unstarted reader
        assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
        // Start and produce the element 14 since granularity is 2.
        assertTrue(reader.start());
        assertTrue(reader.isAtSplitPoint());
        assertEquals(14, reader.getCurrent().intValue());
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
        // Advance and produce the element 15, not a split point.
        assertTrue(reader.advance());
        assertEquals(15, reader.getCurrent().intValue());
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
        // Advance and produce the element 16, is a split point. Since the next offset (17) is
        // outside the range [13, 17), remaining parallelism should become 1 from UNKNOWN.
        assertTrue(reader.advance());
        assertTrue(reader.isAtSplitPoint());
        assertEquals(16, reader.getCurrent().intValue());
        assertEquals(1, reader.getSplitPointsConsumed());
        // The next offset is outside the range.
        assertEquals(1, reader.getSplitPointsRemaining());
        // Advance and produce the element 17, not a split point.
        assertTrue(reader.advance());
        assertEquals(17, reader.getCurrent().intValue());
        assertEquals(1, reader.getSplitPointsConsumed());
        assertEquals(1, reader.getSplitPointsRemaining());
        // Advance and reach the end of the reader.
        assertFalse(reader.advance());
        assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(2, reader.getSplitPointsConsumed());
        assertEquals(0, reader.getSplitPointsRemaining());
    }
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Example 72 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class OffsetBasedSourceTest method testReadingGranularityAndFractionConsumed.

@Test
public void testReadingGranularityAndFractionConsumed() throws IOException {
    // Tests that the reader correctly snaps to multiples of the given granularity
    // (note: this is testing test code), and that getFractionConsumed works sensibly
    // in the face of that.
    PipelineOptions options = PipelineOptionsFactory.create();
    CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
    try (CoarseRangeReader reader = source.createReader(options)) {
        List<Integer> items = new ArrayList<>();
        assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
        assertTrue(reader.start());
        items.add(reader.getCurrent());
        while (reader.advance()) {
            Double fraction = reader.getFractionConsumed();
            assertNotNull(fraction);
            assertTrue(fraction.toString(), fraction > 0.0);
            assertTrue(fraction.toString(), fraction <= 1.0);
            items.add(reader.getCurrent());
        }
        assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(20, items.size());
        assertEquals(20, items.get(0).intValue());
        assertEquals(39, items.get(items.size() - 1).intValue());
        source = new CoarseRangeSource(13, 17, 1, 10);
    }
    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
        assertFalse(reader.start());
    }
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 73 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class OffsetBasedSourceTest method testSplitAtFraction.

@Test
public void testSplitAtFraction() throws IOException {
    PipelineOptions options = PipelineOptionsFactory.create();
    CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
    try (CoarseRangeReader reader = source.createReader(options)) {
        List<Integer> originalItems = new ArrayList<>();
        assertTrue(reader.start());
        originalItems.add(reader.getCurrent());
        assertTrue(reader.advance());
        originalItems.add(reader.getCurrent());
        assertTrue(reader.advance());
        originalItems.add(reader.getCurrent());
        assertTrue(reader.advance());
        originalItems.add(reader.getCurrent());
        assertNull(reader.splitAtFraction(0.0));
        assertNull(reader.splitAtFraction(reader.getFractionConsumed() - 0.1));
        BoundedSource<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
        BoundedSource<Integer> primary = reader.getCurrentSource();
        List<Integer> primaryItems = readFromSource(primary, options);
        List<Integer> residualItems = readFromSource(residual, options);
        for (Integer item : residualItems) {
            assertTrue(item > reader.getCurrentOffset());
        }
        assertFalse(primaryItems.isEmpty());
        assertFalse(residualItems.isEmpty());
        assertTrue(primaryItems.get(primaryItems.size() - 1) <= residualItems.get(0));
        while (reader.advance()) {
            originalItems.add(reader.getCurrent());
        }
        assertEquals(originalItems, primaryItems);
    }
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 74 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class OffsetBasedSourceTest method testEstimatedSizeBytes.

@Test
public void testEstimatedSizeBytes() throws Exception {
    long start = 300;
    long end = 1000;
    long minBundleSize = 150;
    CoarseRangeSource testSource = new CoarseRangeSource(start, end, minBundleSize, 1);
    PipelineOptions options = PipelineOptionsFactory.create();
    assertEquals((end - start) * testSource.getBytesPerOffset(), testSource.getEstimatedSizeBytes(options));
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Example 75 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FileBasedSourceTest method testReadAllSplitsOfFilePattern.

@Test
public void testReadAllSplitsOfFilePattern() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    List<String> data1 = createStringDataset(3, 50);
    File file1 = createFileWithData("file1", data1);
    List<String> data2 = createStringDataset(3, 50);
    createFileWithData("file2", data2);
    List<String> data3 = createStringDataset(3, 50);
    createFileWithData("file3", data3);
    List<String> data4 = createStringDataset(3, 50);
    createFileWithData("otherfile", data4);
    TestFileBasedSource source = new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
    List<? extends BoundedSource<String>> sources = source.split(512, null);
    // Not a trivial split.
    assertTrue(sources.size() > 1);
    List<String> results = new ArrayList<String>();
    for (BoundedSource<String> split : sources) {
        results.addAll(readFromSource(split, options));
    }
    List<String> expectedResults = new ArrayList<String>();
    expectedResults.addAll(data1);
    expectedResults.addAll(data2);
    expectedResults.addAll(data3);
    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ArrayList(java.util.ArrayList) File(java.io.File) Test(org.junit.Test)

Aggregations

PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)92 Test (org.junit.Test)79 File (java.io.File)26 ArrayList (java.util.ArrayList)16 Pipeline (org.apache.beam.sdk.Pipeline)10 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)9 Path (java.nio.file.Path)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)6 SerializedPipelineOptions (org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions)5 KV (org.apache.beam.sdk.values.KV)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 Table (com.google.api.services.bigquery.model.Table)4 TableReference (com.google.api.services.bigquery.model.TableReference)4 TableRow (com.google.api.services.bigquery.model.TableRow)4 HashBasedTable (com.google.common.collect.HashBasedTable)4 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)4 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)4 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3