use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class AutoCompleteTest method testWindowedAutoComplete.
@Test
public void testWindowedAutoComplete() {
List<TimestampedValue<String>> words = Arrays.asList(TimestampedValue.of("xA", new Instant(1)), TimestampedValue.of("xA", new Instant(1)), TimestampedValue.of("xB", new Instant(1)), TimestampedValue.of("xB", new Instant(2)), TimestampedValue.of("xB", new Instant(2)));
PCollection<String> input = p.apply(Create.of(words)).apply(new ReifyTimestamps<String>());
PCollection<KV<String, List<CompletionCandidate>>> output = input.apply(Window.<String>into(SlidingWindows.of(new Duration(2)))).apply(new ComputeTopCompletions(2, recursive));
PAssert.that(output).containsInAnyOrder(// Window [0, 2)
KV.of("x", parseList("xA:2", "xB:1")), KV.of("xA", parseList("xA:2")), KV.of("xB", parseList("xB:1")), // Window [1, 3)
KV.of("x", parseList("xB:3", "xA:2")), KV.of("xA", parseList("xA:2")), KV.of("xB", parseList("xB:3")), // Window [2, 3)
KV.of("x", parseList("xB:2")), KV.of("xB", parseList("xB:2")));
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class AvroIOTest method testWindowedAvroIOWrite.
@Test
@Category({ ValidatesRunner.class, UsesTestStream.class })
public void testWindowedAvroIOWrite() throws Throwable {
Path baseDir = Files.createTempDirectory(tmpFolder.getRoot().toPath(), "testwrite");
String baseFilename = baseDir.resolve("prefix").toString();
Instant base = new Instant(0);
ArrayList<GenericClass> allElements = new ArrayList<>();
ArrayList<TimestampedValue<GenericClass>> firstWindowElements = new ArrayList<>();
ArrayList<Instant> firstWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(0)), base.plus(Duration.standardSeconds(10)), base.plus(Duration.standardSeconds(20)), base.plus(Duration.standardSeconds(30)));
Random random = new Random();
for (int i = 0; i < 100; ++i) {
GenericClass item = new GenericClass(i, String.valueOf(i));
allElements.add(item);
firstWindowElements.add(TimestampedValue.of(item, firstWindowTimestamps.get(random.nextInt(firstWindowTimestamps.size()))));
}
ArrayList<TimestampedValue<GenericClass>> secondWindowElements = new ArrayList<>();
ArrayList<Instant> secondWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(70)), base.plus(Duration.standardSeconds(80)), base.plus(Duration.standardSeconds(90)));
for (int i = 100; i < 200; ++i) {
GenericClass item = new GenericClass(i, String.valueOf(i));
allElements.add(new GenericClass(i, String.valueOf(i)));
secondWindowElements.add(TimestampedValue.of(item, secondWindowTimestamps.get(random.nextInt(secondWindowTimestamps.size()))));
}
TimestampedValue<GenericClass>[] firstWindowArray = firstWindowElements.toArray(new TimestampedValue[100]);
TimestampedValue<GenericClass>[] secondWindowArray = secondWindowElements.toArray(new TimestampedValue[100]);
TestStream<GenericClass> values = TestStream.create(AvroCoder.of(GenericClass.class)).advanceWatermarkTo(new Instant(0)).addElements(firstWindowArray[0], Arrays.copyOfRange(firstWindowArray, 1, firstWindowArray.length)).advanceWatermarkTo(new Instant(0).plus(Duration.standardMinutes(1))).addElements(secondWindowArray[0], Arrays.copyOfRange(secondWindowArray, 1, secondWindowArray.length)).advanceWatermarkToInfinity();
FilenamePolicy policy = new WindowedFilenamePolicy(baseFilename);
windowedAvroWritePipeline.apply(values).apply(Window.<GenericClass>into(FixedWindows.of(Duration.standardMinutes(1)))).apply(AvroIO.write(GenericClass.class).to(baseFilename).withFilenamePolicy(policy).withWindowedWrites().withNumShards(2));
windowedAvroWritePipeline.run();
// Validate that the data written matches the expected elements in the expected order
List<File> expectedFiles = new ArrayList<>();
for (int shard = 0; shard < 2; shard++) {
for (int window = 0; window < 2; window++) {
Instant windowStart = new Instant(0).plus(Duration.standardMinutes(window));
IntervalWindow intervalWindow = new IntervalWindow(windowStart, Duration.standardMinutes(1));
expectedFiles.add(new File(baseFilename + "-" + intervalWindow.toString() + "-" + shard + "-of-1" + "-pane-0-final"));
}
}
List<GenericClass> actualElements = new ArrayList<>();
for (File outputFile : expectedFiles) {
assertTrue("Expected output file " + outputFile.getAbsolutePath(), outputFile.exists());
try (DataFileReader<GenericClass> reader = new DataFileReader<>(outputFile, new ReflectDatumReader<GenericClass>(ReflectData.get().getSchema(GenericClass.class)))) {
Iterators.addAll(actualElements, reader);
}
outputFile.delete();
}
assertThat(actualElements, containsInAnyOrder(allElements.toArray()));
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class CreateTest method testCreateParameterizedType.
@Test
@Category(NeedsRunner.class)
public void testCreateParameterizedType() throws Exception {
PCollection<TimestampedValue<String>> output = p.apply(Create.of(TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0))));
PAssert.that(output).containsInAnyOrder(TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0)));
p.run();
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class DoFnTesterTest method processElementWithOutputTimestamp.
@Test
public void processElementWithOutputTimestamp() throws Exception {
try (DoFnTester<Long, String> tester = DoFnTester.of(new CounterDoFn())) {
tester.processElement(1L);
tester.processElement(2L);
List<TimestampedValue<String>> peek = tester.peekOutputElementsWithTimestamp();
TimestampedValue<String> one = TimestampedValue.of("1", new Instant(1000L));
TimestampedValue<String> two = TimestampedValue.of("2", new Instant(2000L));
assertThat(peek, hasItems(one, two));
tester.processElement(3L);
tester.processElement(4L);
TimestampedValue<String> three = TimestampedValue.of("3", new Instant(3000L));
TimestampedValue<String> four = TimestampedValue.of("4", new Instant(4000L));
peek = tester.peekOutputElementsWithTimestamp();
assertThat(peek, hasItems(one, two, three, four));
List<TimestampedValue<String>> take = tester.takeOutputElementsWithTimestamp();
assertThat(take, hasItems(one, two, three, four));
// Following takeOutputElementsWithTimestamp(), neither takeOutputElementsWithTimestamp()
// nor peekOutputElementsWithTimestamp() return anything.
assertTrue(tester.takeOutputElementsWithTimestamp().isEmpty());
assertTrue(tester.peekOutputElementsWithTimestamp().isEmpty());
// peekOutputElements() and takeOutputElements() also return nothing.
assertTrue(tester.peekOutputElements().isEmpty());
assertTrue(tester.takeOutputElements().isEmpty());
}
}
use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.
the class DoFnTesterTest method processTimestampedElement.
@Test
public void processTimestampedElement() throws Exception {
try (DoFnTester<Long, TimestampedValue<Long>> tester = DoFnTester.of(new ReifyTimestamps())) {
TimestampedValue<Long> input = TimestampedValue.of(1L, new Instant(100));
tester.processTimestampedElement(input);
assertThat(tester.takeOutputElements(), contains(input));
}
}
Aggregations