use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testSplitAtFractionExhaustive.
@Test
public void testSplitAtFractionExhaustive() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource original = new CoarseRangeSource(13, 35, 1, 10);
assertSplitAtFractionExhaustive(original, options);
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testReadRangeFromFileWithSplitsFromMiddleOfHeader.
@Test
public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
String header = "<h>";
List<String> data = new ArrayList<>();
for (int i = 0; i < 10; i++) {
data.add(header);
data.addAll(createStringDataset(3, 9));
}
String fileName = "file";
File file = createFileWithData(fileName, data);
List<String> expectedResults = new ArrayList<String>();
expectedResults.addAll(data.subList(10, data.size()));
// Remove all occurrences of header from expected results.
expectedResults.removeAll(Collections.singletonList(header));
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
// Split starts after "<" of the header
TestFileBasedSource source = new TestFileBasedSource(metadata, 64, 1, Long.MAX_VALUE, header);
assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
// Split starts after "<h" of the header
source = new TestFileBasedSource(metadata, 64, 2, Long.MAX_VALUE, header);
assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
// Split starts after "<h>" of the header
source = new TestFileBasedSource(metadata, 64, 3, Long.MAX_VALUE, header);
assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testProgress.
@Test
public void testProgress() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 17, 1, 2);
try (OffsetBasedReader<Integer> reader = source.createReader(options)) {
// Unstarted reader
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Start and produce the element 14 since granularity is 2.
assertTrue(reader.start());
assertTrue(reader.isAtSplitPoint());
assertEquals(14, reader.getCurrent().intValue());
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Advance and produce the element 15, not a split point.
assertTrue(reader.advance());
assertEquals(15, reader.getCurrent().intValue());
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Advance and produce the element 16, is a split point. Since the next offset (17) is
// outside the range [13, 17), remaining parallelism should become 1 from UNKNOWN.
assertTrue(reader.advance());
assertTrue(reader.isAtSplitPoint());
assertEquals(16, reader.getCurrent().intValue());
assertEquals(1, reader.getSplitPointsConsumed());
// The next offset is outside the range.
assertEquals(1, reader.getSplitPointsRemaining());
// Advance and produce the element 17, not a split point.
assertTrue(reader.advance());
assertEquals(17, reader.getCurrent().intValue());
assertEquals(1, reader.getSplitPointsConsumed());
assertEquals(1, reader.getSplitPointsRemaining());
// Advance and reach the end of the reader.
assertFalse(reader.advance());
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(2, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testReadingGranularityAndFractionConsumed.
@Test
public void testReadingGranularityAndFractionConsumed() throws IOException {
// Tests that the reader correctly snaps to multiples of the given granularity
// (note: this is testing test code), and that getFractionConsumed works sensibly
// in the face of that.
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
try (CoarseRangeReader reader = source.createReader(options)) {
List<Integer> items = new ArrayList<>();
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertTrue(reader.start());
items.add(reader.getCurrent());
while (reader.advance()) {
Double fraction = reader.getFractionConsumed();
assertNotNull(fraction);
assertTrue(fraction.toString(), fraction > 0.0);
assertTrue(fraction.toString(), fraction <= 1.0);
items.add(reader.getCurrent());
}
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(20, items.size());
assertEquals(20, items.get(0).intValue());
assertEquals(39, items.get(items.size() - 1).intValue());
source = new CoarseRangeSource(13, 17, 1, 10);
}
try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
assertFalse(reader.start());
}
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testSplitAtFraction.
@Test
public void testSplitAtFraction() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
try (CoarseRangeReader reader = source.createReader(options)) {
List<Integer> originalItems = new ArrayList<>();
assertTrue(reader.start());
originalItems.add(reader.getCurrent());
assertTrue(reader.advance());
originalItems.add(reader.getCurrent());
assertTrue(reader.advance());
originalItems.add(reader.getCurrent());
assertTrue(reader.advance());
originalItems.add(reader.getCurrent());
assertNull(reader.splitAtFraction(0.0));
assertNull(reader.splitAtFraction(reader.getFractionConsumed() - 0.1));
BoundedSource<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
BoundedSource<Integer> primary = reader.getCurrentSource();
List<Integer> primaryItems = readFromSource(primary, options);
List<Integer> residualItems = readFromSource(residual, options);
for (Integer item : residualItems) {
assertTrue(item > reader.getCurrentOffset());
}
assertFalse(primaryItems.isEmpty());
assertFalse(residualItems.isEmpty());
assertTrue(primaryItems.get(primaryItems.size() - 1) <= residualItems.get(0));
while (reader.advance()) {
originalItems.add(reader.getCurrent());
}
assertEquals(originalItems, primaryItems);
}
}
Aggregations