Search in sources :

Example 1 with WindowRuntime

use of org.talend.components.processing.runtime.window.WindowRuntime in project components by Talend.

the class WindowRuntimeTest method testSessionWindow.

@Test
public void testSessionWindow() {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);
    /*
         * // creation of PCollection with different timestamp PCollection<IndexedRecord>
         */
    List<TimestampedValue<IndexedRecord>> data = // 
    Arrays.asList(// 
    TimestampedValue.of(irA, new Instant(0L)), // 
    TimestampedValue.of(irB, new Instant(0L)), // 
    TimestampedValue.of(irC, new Instant(1L)), // 
    TimestampedValue.of(irA, new Instant(2L)), // 
    TimestampedValue.of(irA, new Instant(2L)), // 
    TimestampedValue.of(irB, new Instant(2L)), // 
    TimestampedValue.of(irB, new Instant(30L)), // 
    TimestampedValue.of(irA, new Instant(30L)), // 
    TimestampedValue.of(irA, new Instant(50L)), // 
    TimestampedValue.of(irC, new Instant(55L)), TimestampedValue.of(irA, new Instant(59L)));
    Create.TimestampedValues<IndexedRecord> pt = Create.timestamped(data);
    pt = (Create.TimestampedValues<IndexedRecord>) pt.withCoder(LazyAvroCoder.of());
    PCollection<IndexedRecord> input = p.apply(pt);
    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.setValue("windowLength", 10);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", true);
    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);
    PCollection<IndexedRecord> test = windowRun.expand(input);
    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord>perElement());
    // window duration: 4 - sliding: 2
    // 
    PAssert.that(windowed_counts).containsInAnyOrder(// 
    KV.of(irA, 3L), // 
    KV.of(irB, 2L), // 
    KV.of(irC, 1L), // 
    KV.of(irB, 1L), // 
    KV.of(irA, 1L), // 
    KV.of(irA, 2L), KV.of(irC, 1L));
    p.run();
}
Also used : WindowProperties(org.talend.components.processing.definition.window.WindowProperties) IndexedRecord(org.apache.avro.generic.IndexedRecord) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) WindowRuntime(org.talend.components.processing.runtime.window.WindowRuntime) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Create(org.apache.beam.sdk.transforms.Create) Test(org.junit.Test)

Example 2 with WindowRuntime

use of org.talend.components.processing.runtime.window.WindowRuntime in project components by Talend.

the class WindowRuntimeTest method testSlidingWindow.

@Test
public void testSlidingWindow() {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);
    /*
         * // creation of PCollection with different timestamp PCollection<IndexedRecord>
         */
    List<TimestampedValue<IndexedRecord>> data = // 
    Arrays.asList(// 
    TimestampedValue.of(irA, new Instant(0L)), // 
    TimestampedValue.of(irB, new Instant(0L)), // 
    TimestampedValue.of(irC, new Instant(1L)), // 
    TimestampedValue.of(irA, new Instant(2L)), // 
    TimestampedValue.of(irA, new Instant(2L)), // 
    TimestampedValue.of(irB, new Instant(2L)), // 
    TimestampedValue.of(irB, new Instant(3L)), // 
    TimestampedValue.of(irC, new Instant(3L)), TimestampedValue.of(irA, new Instant(4L)));
    Create.TimestampedValues<IndexedRecord> pt = Create.timestamped(data);
    pt = (Create.TimestampedValues<IndexedRecord>) pt.withCoder(LazyAvroCoder.of());
    PCollection<IndexedRecord> input = p.apply(pt);
    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.setValue("windowLength", 4);
    windowProperties.setValue("windowSlideLength", 2);
    windowProperties.setValue("windowSession", false);
    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);
    PCollection<IndexedRecord> test = windowRun.expand(input);
    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord>perElement());
    // window duration: 4 - sliding: 2
    // 
    PAssert.that(windowed_counts).containsInAnyOrder(// 
    KV.of(irA, 1L), // 
    KV.of(irA, 1L), // 
    KV.of(irA, 3L), // 
    KV.of(irA, 3L), // 
    KV.of(irB, 1L), // 
    KV.of(irB, 3L), // 
    KV.of(irB, 2L), // 
    KV.of(irC, 1L), // 
    KV.of(irC, 1L), KV.of(irC, 2L));
    p.run();
}
Also used : WindowProperties(org.talend.components.processing.definition.window.WindowProperties) IndexedRecord(org.apache.avro.generic.IndexedRecord) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) WindowRuntime(org.talend.components.processing.runtime.window.WindowRuntime) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Create(org.apache.beam.sdk.transforms.Create) Test(org.junit.Test)

Example 3 with WindowRuntime

use of org.talend.components.processing.runtime.window.WindowRuntime in project components by Talend.

the class WindowRuntimeTest method testFixedWindow.

@Test
public void testFixedWindow() {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);
    // creation of PCollection with different timestamp PCollection<IndexedRecord>
    List<TimestampedValue<IndexedRecord>> data = Arrays.asList(TimestampedValue.of(irA, new Instant(1L)), TimestampedValue.of(irB, new Instant(2L)), TimestampedValue.of(irC, new Instant(3L)));
    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p.apply(Create.timestamped(data).withCoder(LazyAvroCoder.of()));
    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.windowLength.setValue(2);
    windowProperties.windowSlideLength.setValue(-1);
    windowProperties.windowSession.setValue(false);
    windowProperties.setValue("windowLength", 2);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", false);
    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);
    PCollection<IndexedRecord> test = windowRun.expand(input);
    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord>perElement());
    // ///////
    // Fixed duration: 2
    PAssert.that(windowed_counts).containsInAnyOrder(KV.of(irA, 1L), KV.of(irB, 1L), KV.of(irC, 1L));
    p.run();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) WindowProperties(org.talend.components.processing.definition.window.WindowProperties) WindowRuntime(org.talend.components.processing.runtime.window.WindowRuntime) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) IndexedRecord(org.apache.avro.generic.IndexedRecord) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)3 Pipeline (org.apache.beam.sdk.Pipeline)3 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)3 KV (org.apache.beam.sdk.values.KV)3 TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)3 Instant (org.joda.time.Instant)3 Test (org.junit.Test)3 WindowProperties (org.talend.components.processing.definition.window.WindowProperties)3 WindowRuntime (org.talend.components.processing.runtime.window.WindowRuntime)3 Create (org.apache.beam.sdk.transforms.Create)2 PCollection (org.apache.beam.sdk.values.PCollection)1