use of org.apache.beam.sdk.util.ShardedFile in project beam by apache.
the class WindowedWordCountIT method testWindowedWordCountPipeline.
private void testWindowedWordCountPipeline(WindowedWordCountITOptions options) throws Exception {
ResourceId output = FileBasedSink.convertToFileResourceIfPossible(options.getOutput());
PerWindowFiles filenamePolicy = new PerWindowFiles(output);
List<ShardedFile> expectedOutputFiles = Lists.newArrayListWithCapacity(6);
for (int startMinute : ImmutableList.of(0, 10, 20, 30, 40, 50)) {
final Instant windowStart = new Instant(options.getMinTimestampMillis()).plus(Duration.standardMinutes(startMinute));
String filePrefix = filenamePolicy.filenamePrefixForWindow(new IntervalWindow(windowStart, windowStart.plus(Duration.standardMinutes(10))));
expectedOutputFiles.add(new NumberedShardedFile(output.getCurrentDirectory().resolve(filePrefix, StandardResolveOptions.RESOLVE_FILE).toString() + "*"));
}
ShardedFile inputFile = new ExplicitShardedFile(Collections.singleton(options.getInputFile()));
// For this integration test, input is tiny and we can build the expected counts
SortedMap<String, Long> expectedWordCounts = new TreeMap<>();
for (String line : inputFile.readFilesWithRetries(Sleeper.DEFAULT, BACK_OFF_FACTORY.backoff())) {
String[] words = line.split(ExampleUtils.TOKENIZER_PATTERN, -1);
for (String word : words) {
if (!word.isEmpty()) {
expectedWordCounts.put(word, MoreObjects.firstNonNull(expectedWordCounts.get(word), 0L) + 1L);
}
}
}
WindowedWordCount.runWindowedWordCount(options);
assertThat(expectedOutputFiles, containsWordCounts(expectedWordCounts));
}
Aggregations