use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FlinkPartialReduceFunction method combine.
@Override
public void combine(Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
PipelineOptions options = serializedOptions.getPipelineOptions();
FlinkSideInputReader sideInputReader = new FlinkSideInputReader(sideInputs, getRuntimeContext());
AbstractFlinkCombineRunner<K, InputT, AccumT, AccumT, W> reduceRunner;
if (!windowingStrategy.getWindowFn().isNonMerging() && !windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
reduceRunner = new HashingFlinkCombineRunner<>();
} else {
reduceRunner = new SortingFlinkCombineRunner<>();
}
reduceRunner.combine(new AbstractFlinkCombineRunner.PartialFlinkCombiner<K, InputT, AccumT>(combineFn), windowingStrategy, sideInputReader, options, elements, out);
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class OffsetBasedSourceTest method testProgressEmptySource.
@Test
public void testProgressEmptySource() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
CoarseRangeSource source = new CoarseRangeSource(13, 17, 1, 100);
try (OffsetBasedReader<Integer> reader = source.createReader(options)) {
// before starting
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// confirm empty
assertFalse(reader.start());
// after reading empty source
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class CompressedSourceTest method testSplittableProgress.
@Test
public void testSplittableProgress() throws IOException {
File tmpFile = tmpFolder.newFile("nonempty.txt");
String filename = tmpFile.toPath().toString();
Files.write(new byte[2], tmpFile);
PipelineOptions options = PipelineOptionsFactory.create();
CompressedSource<Byte> source = CompressedSource.from(new ByteSource(filename, 1));
try (BoundedReader<Byte> readerOrig = source.createReader(options)) {
assertThat(readerOrig, not(instanceOf(CompressedReader.class)));
assertThat(readerOrig, instanceOf(FileBasedReader.class));
FileBasedReader<Byte> reader = (FileBasedReader<Byte>) readerOrig;
// Check preconditions before starting
assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// First record: none consumed, unknown remaining.
assertTrue(reader.start());
assertEquals(0, reader.getSplitPointsConsumed());
assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
// Second record: 1 consumed, know that we're on the last record.
assertTrue(reader.advance());
assertEquals(1, reader.getSplitPointsConsumed());
assertEquals(1, reader.getSplitPointsRemaining());
// Confirm empty and check post-conditions
assertFalse(reader.advance());
assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
assertEquals(2, reader.getSplitPointsConsumed());
assertEquals(0, reader.getSplitPointsRemaining());
}
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testReadFileWithSplitsWithEmptyRange.
@Test
public void testReadFileWithSplitsWithEmptyRange() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
String header = "<h>";
List<String> data = new ArrayList<>();
for (int i = 0; i < 5; i++) {
data.add(header);
data.addAll(createStringDataset(3, 9));
}
String fileName = "file";
File file = createFileWithData(fileName, data);
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 42, header);
TestFileBasedSource source2 = new TestFileBasedSource(metadata, 64, 42, 62, header);
TestFileBasedSource source3 = new TestFileBasedSource(metadata, 64, 62, Long.MAX_VALUE, header);
List<String> expectedResults = new ArrayList<String>();
expectedResults.addAll(data);
// Remove all occurrences of header from expected results.
expectedResults.removeAll(Collections.singletonList(header));
List<String> results = new ArrayList<>();
results.addAll(readFromSource(source1, options));
results.addAll(readFromSource(source2, options));
results.addAll(readFromSource(source3, options));
assertThat(expectedResults, containsInAnyOrder(results.toArray()));
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testReadRangeAtEnd.
@Test
public void testReadRangeAtEnd() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
List<String> data = createStringDataset(3, 50);
String fileName = "file";
File file = createFileWithData(fileName, data);
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 162, null);
TestFileBasedSource source2 = new TestFileBasedSource(metadata, 1024, 162, Long.MAX_VALUE, null);
List<String> results = new ArrayList<>();
results.addAll(readFromSource(source1, options));
results.addAll(readFromSource(source2, options));
assertThat(data, containsInAnyOrder(results.toArray()));
}
Aggregations