use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testProgress.
@Test
public void testProgress() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 17, 1, 2);
try (OffsetBasedReader<Integer> reader = source.createReader(options)) {
// Unstarted reader
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Start and produce the element 14 since granularity is 2.
assertTrue(reader.start());
assertTrue(reader.isAtSplitPoint());
assertEquals(14, reader.getCurrent().intValue());
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Advance and produce the element 15, not a split point.
assertTrue(reader.advance());
assertEquals(15, reader.getCurrent().intValue());
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Advance and produce the element 16, is a split point. Since the next offset (17) is
// outside the range [13, 17), remaining parallelism should become 1 from UNKNOWN.
assertTrue(reader.advance());
assertTrue(reader.isAtSplitPoint());
assertEquals(16, reader.getCurrent().intValue());
assertEquals(1, reader.getSplitPointsConsumed());
// The next offset is outside the range.
assertEquals(1, reader.getSplitPointsRemaining());
// Advance and produce the element 17, not a split point.
assertTrue(reader.advance());
assertEquals(17, reader.getCurrent().intValue());
assertEquals(1, reader.getSplitPointsConsumed());
assertEquals(1, reader.getSplitPointsRemaining());
// Advance and reach the end of the reader.
assertFalse(reader.advance());
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(2, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testReadingGranularityAndFractionConsumed.
@Test
public void testReadingGranularityAndFractionConsumed() throws IOException {
// Tests that the reader correctly snaps to multiples of the given granularity
// (note: this is testing test code), and that getFractionConsumed works sensibly
// in the face of that.
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
try (CoarseRangeReader reader = source.createReader(options)) {
List<Integer> items = new ArrayList<>();
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertTrue(reader.start());
items.add(reader.getCurrent());
while (reader.advance()) {
Double fraction = reader.getFractionConsumed();
assertNotNull(fraction);
assertTrue(fraction.toString(), fraction > 0.0);
assertTrue(fraction.toString(), fraction <= 1.0);
items.add(reader.getCurrent());
}
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(20, items.size());
assertEquals(20, items.get(0).intValue());
assertEquals(39, items.get(items.size() - 1).intValue());
source = new CoarseRangeSource(13, 17, 1, 10);
}
try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
assertFalse(reader.start());
}
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testSplitAtFraction.
@Test
public void testSplitAtFraction() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
try (CoarseRangeReader reader = source.createReader(options)) {
List<Integer> originalItems = new ArrayList<>();
assertTrue(reader.start());
originalItems.add(reader.getCurrent());
assertTrue(reader.advance());
originalItems.add(reader.getCurrent());
assertTrue(reader.advance());
originalItems.add(reader.getCurrent());
assertTrue(reader.advance());
originalItems.add(reader.getCurrent());
assertNull(reader.splitAtFraction(0.0));
assertNull(reader.splitAtFraction(reader.getFractionConsumed() - 0.1));
BoundedSource<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
BoundedSource<Integer> primary = reader.getCurrentSource();
List<Integer> primaryItems = readFromSource(primary, options);
List<Integer> residualItems = readFromSource(residual, options);
for (Integer item : residualItems) {
assertTrue(item > reader.getCurrentOffset());
}
assertFalse(primaryItems.isEmpty());
assertFalse(residualItems.isEmpty());
assertTrue(primaryItems.get(primaryItems.size() - 1) <= residualItems.get(0));
while (reader.advance()) {
originalItems.add(reader.getCurrent());
}
assertEquals(originalItems, primaryItems);
}
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testEstimatedSizeBytes.
@Test
public void testEstimatedSizeBytes() throws Exception {
long start = 300;
long end = 1000;
long minBundleSize = 150;
CoarseRangeSource testSource = new CoarseRangeSource(start, end, minBundleSize, 1);
PipelineOptions options = PipelineOptionsFactory.create();
assertEquals((end - start) * testSource.getBytesPerOffset(), testSource.getEstimatedSizeBytes(options));
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testReadAllSplitsOfFilePattern.
@Test
public void testReadAllSplitsOfFilePattern() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
List<String> data1 = createStringDataset(3, 50);
File file1 = createFileWithData("file1", data1);
List<String> data2 = createStringDataset(3, 50);
createFileWithData("file2", data2);
List<String> data3 = createStringDataset(3, 50);
createFileWithData("file3", data3);
List<String> data4 = createStringDataset(3, 50);
createFileWithData("otherfile", data4);
TestFileBasedSource source = new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
List<? extends BoundedSource<String>> sources = source.split(512, null);
// Not a trivial split.
assertTrue(sources.size() > 1);
List<String> results = new ArrayList<String>();
for (BoundedSource<String> split : sources) {
results.addAll(readFromSource(split, options));
}
List<String> expectedResults = new ArrayList<String>();
expectedResults.addAll(data1);
expectedResults.addAll(data2);
expectedResults.addAll(data3);
assertThat(expectedResults, containsInAnyOrder(results.toArray()));
}
Aggregations