Search in sources :

Example 6 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class AutoCompleteTest method testWindowedAutoComplete.

@Test
public void testWindowedAutoComplete() {
    List<TimestampedValue<String>> words = Arrays.asList(TimestampedValue.of("xA", new Instant(1)), TimestampedValue.of("xA", new Instant(1)), TimestampedValue.of("xB", new Instant(1)), TimestampedValue.of("xB", new Instant(2)), TimestampedValue.of("xB", new Instant(2)));
    PCollection<String> input = p.apply(Create.of(words)).apply(new ReifyTimestamps<String>());
    PCollection<KV<String, List<CompletionCandidate>>> output = input.apply(Window.<String>into(SlidingWindows.of(new Duration(2)))).apply(new ComputeTopCompletions(2, recursive));
    PAssert.that(output).containsInAnyOrder(// Window [0, 2)
    KV.of("x", parseList("xA:2", "xB:1")), KV.of("xA", parseList("xA:2")), KV.of("xB", parseList("xB:1")), // Window [1, 3)
    KV.of("x", parseList("xB:3", "xA:2")), KV.of("xA", parseList("xA:2")), KV.of("xB", parseList("xB:3")), // Window [2, 3)
    KV.of("x", parseList("xB:2")), KV.of("xB", parseList("xB:2")));
    p.run().waitUntilFinish();
}
Also used : CompletionCandidate(org.apache.beam.examples.complete.AutoComplete.CompletionCandidate) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) KV(org.apache.beam.sdk.values.KV) ComputeTopCompletions(org.apache.beam.examples.complete.AutoComplete.ComputeTopCompletions) Test(org.junit.Test)

Example 7 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class AvroIOTest method testWindowedAvroIOWrite.

@Test
@Category({ ValidatesRunner.class, UsesTestStream.class })
public void testWindowedAvroIOWrite() throws Throwable {
    Path baseDir = Files.createTempDirectory(tmpFolder.getRoot().toPath(), "testwrite");
    String baseFilename = baseDir.resolve("prefix").toString();
    Instant base = new Instant(0);
    ArrayList<GenericClass> allElements = new ArrayList<>();
    ArrayList<TimestampedValue<GenericClass>> firstWindowElements = new ArrayList<>();
    ArrayList<Instant> firstWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(0)), base.plus(Duration.standardSeconds(10)), base.plus(Duration.standardSeconds(20)), base.plus(Duration.standardSeconds(30)));
    Random random = new Random();
    for (int i = 0; i < 100; ++i) {
        GenericClass item = new GenericClass(i, String.valueOf(i));
        allElements.add(item);
        firstWindowElements.add(TimestampedValue.of(item, firstWindowTimestamps.get(random.nextInt(firstWindowTimestamps.size()))));
    }
    ArrayList<TimestampedValue<GenericClass>> secondWindowElements = new ArrayList<>();
    ArrayList<Instant> secondWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(70)), base.plus(Duration.standardSeconds(80)), base.plus(Duration.standardSeconds(90)));
    for (int i = 100; i < 200; ++i) {
        GenericClass item = new GenericClass(i, String.valueOf(i));
        allElements.add(new GenericClass(i, String.valueOf(i)));
        secondWindowElements.add(TimestampedValue.of(item, secondWindowTimestamps.get(random.nextInt(secondWindowTimestamps.size()))));
    }
    TimestampedValue<GenericClass>[] firstWindowArray = firstWindowElements.toArray(new TimestampedValue[100]);
    TimestampedValue<GenericClass>[] secondWindowArray = secondWindowElements.toArray(new TimestampedValue[100]);
    TestStream<GenericClass> values = TestStream.create(AvroCoder.of(GenericClass.class)).advanceWatermarkTo(new Instant(0)).addElements(firstWindowArray[0], Arrays.copyOfRange(firstWindowArray, 1, firstWindowArray.length)).advanceWatermarkTo(new Instant(0).plus(Duration.standardMinutes(1))).addElements(secondWindowArray[0], Arrays.copyOfRange(secondWindowArray, 1, secondWindowArray.length)).advanceWatermarkToInfinity();
    FilenamePolicy policy = new WindowedFilenamePolicy(baseFilename);
    windowedAvroWritePipeline.apply(values).apply(Window.<GenericClass>into(FixedWindows.of(Duration.standardMinutes(1)))).apply(AvroIO.write(GenericClass.class).to(baseFilename).withFilenamePolicy(policy).withWindowedWrites().withNumShards(2));
    windowedAvroWritePipeline.run();
    // Validate that the data written matches the expected elements in the expected order
    List<File> expectedFiles = new ArrayList<>();
    for (int shard = 0; shard < 2; shard++) {
        for (int window = 0; window < 2; window++) {
            Instant windowStart = new Instant(0).plus(Duration.standardMinutes(window));
            IntervalWindow intervalWindow = new IntervalWindow(windowStart, Duration.standardMinutes(1));
            expectedFiles.add(new File(baseFilename + "-" + intervalWindow.toString() + "-" + shard + "-of-1" + "-pane-0-final"));
        }
    }
    List<GenericClass> actualElements = new ArrayList<>();
    for (File outputFile : expectedFiles) {
        assertTrue("Expected output file " + outputFile.getAbsolutePath(), outputFile.exists());
        try (DataFileReader<GenericClass> reader = new DataFileReader<>(outputFile, new ReflectDatumReader<GenericClass>(ReflectData.get().getSchema(GenericClass.class)))) {
            Iterators.addAll(actualElements, reader);
        }
        outputFile.delete();
    }
    assertThat(actualElements, containsInAnyOrder(allElements.toArray()));
}
Also used : Path(java.nio.file.Path) Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) FilenamePolicy(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy) DataFileReader(org.apache.avro.file.DataFileReader) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Random(java.util.Random) File(java.io.File) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 8 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class CreateTest method testCreateParameterizedType.

@Test
@Category(NeedsRunner.class)
public void testCreateParameterizedType() throws Exception {
    PCollection<TimestampedValue<String>> output = p.apply(Create.of(TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0))));
    PAssert.that(output).containsInAnyOrder(TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0)));
    p.run();
}
Also used : TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Instant(org.joda.time.Instant) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 9 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class DoFnTesterTest method processElementWithOutputTimestamp.

@Test
public void processElementWithOutputTimestamp() throws Exception {
    try (DoFnTester<Long, String> tester = DoFnTester.of(new CounterDoFn())) {
        tester.processElement(1L);
        tester.processElement(2L);
        List<TimestampedValue<String>> peek = tester.peekOutputElementsWithTimestamp();
        TimestampedValue<String> one = TimestampedValue.of("1", new Instant(1000L));
        TimestampedValue<String> two = TimestampedValue.of("2", new Instant(2000L));
        assertThat(peek, hasItems(one, two));
        tester.processElement(3L);
        tester.processElement(4L);
        TimestampedValue<String> three = TimestampedValue.of("3", new Instant(3000L));
        TimestampedValue<String> four = TimestampedValue.of("4", new Instant(4000L));
        peek = tester.peekOutputElementsWithTimestamp();
        assertThat(peek, hasItems(one, two, three, four));
        List<TimestampedValue<String>> take = tester.takeOutputElementsWithTimestamp();
        assertThat(take, hasItems(one, two, three, four));
        // Following takeOutputElementsWithTimestamp(), neither takeOutputElementsWithTimestamp()
        // nor peekOutputElementsWithTimestamp() return anything.
        assertTrue(tester.takeOutputElementsWithTimestamp().isEmpty());
        assertTrue(tester.peekOutputElementsWithTimestamp().isEmpty());
        // peekOutputElements() and takeOutputElements() also return nothing.
        assertTrue(tester.peekOutputElements().isEmpty());
        assertTrue(tester.takeOutputElements().isEmpty());
    }
}
Also used : TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Instant(org.joda.time.Instant) Test(org.junit.Test)

Example 10 with TimestampedValue

use of org.apache.beam.sdk.values.TimestampedValue in project beam by apache.

the class DoFnTesterTest method processTimestampedElement.

@Test
public void processTimestampedElement() throws Exception {
    try (DoFnTester<Long, TimestampedValue<Long>> tester = DoFnTester.of(new ReifyTimestamps())) {
        TimestampedValue<Long> input = TimestampedValue.of(1L, new Instant(100));
        tester.processTimestampedElement(input);
        assertThat(tester.takeOutputElements(), contains(input));
    }
}
Also used : TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Instant(org.joda.time.Instant) Test(org.junit.Test)

Aggregations

TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)10 Instant (org.joda.time.Instant)8 Test (org.junit.Test)7 Category (org.junit.experimental.categories.Category)3 ArrayList (java.util.ArrayList)2 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)2 KV (org.apache.beam.sdk.values.KV)2 Function (com.google.common.base.Function)1 File (java.io.File)1 Path (java.nio.file.Path)1 Random (java.util.Random)1 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1 DataFileReader (org.apache.avro.file.DataFileReader)1 CompletionCandidate (org.apache.beam.examples.complete.AutoComplete.CompletionCandidate)1 ComputeTopCompletions (org.apache.beam.examples.complete.AutoComplete.ComputeTopCompletions)1 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)1 Checkpoint (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter.Checkpoint)1 CreateStream (org.apache.beam.runners.spark.io.CreateStream)1 EvaluationContext (org.apache.beam.runners.spark.translation.EvaluationContext)1 TransformEvaluator (org.apache.beam.runners.spark.translation.TransformEvaluator)1