Search in sources :

Example 1 with ApexGroupByKeyOperator

use of org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator in project beam by apache.

the class ApexGroupByKeyOperatorTest method testGlobalWindowMinTimestamp.

@Test
public void testGlobalWindowMinTimestamp() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setRunner(TestApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    WindowingStrategy<?, ?> ws = WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10)));
    PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal(pipeline, ws, IsBounded.BOUNDED);
    input.setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
    ApexGroupByKeyOperator<String, Integer> operator = new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend());
    operator.setup(null);
    operator.beginWindow(1);
    Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
    final List<Object> results = Lists.newArrayList();
    Sink<Object> sink = new Sink<Object>() {

        @Override
        public void put(Object tuple) {
            results.add(tuple);
        }

        @Override
        public int getCount(boolean reset) {
            return 0;
        }
    };
    operator.output.setSink(sink);
    operator.setup(null);
    operator.beginWindow(1);
    Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE;
    BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000));
    PaneInfo paneInfo = PaneInfo.NO_FIRING;
    WindowedValue<KV<String, Integer>> wv1 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
    WindowedValue<KV<String, Integer>> wv2 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
    ApexStreamTuple<WindowedValue<KV<String, Integer>>> watermark = ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
    Assert.assertEquals("number outputs", 0, results.size());
    operator.input.process(watermark);
    Assert.assertEquals("number outputs", 2, results.size());
    @SuppressWarnings({ "unchecked", "rawtypes" }) ApexStreamTuple.DataTuple<WindowedValue<KV<String, Iterable<Integer>>>> dataTuple = (ApexStreamTuple.DataTuple) results.get(0);
    List<Integer> counts = Lists.newArrayList(1, 1);
    Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue());
    Assert.assertEquals("expected watermark", watermark, results.get(1));
}
Also used : ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) Sink(com.datatorrent.api.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) ApexGroupByKeyOperator(org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator) ApexStateInternals(org.apache.beam.runners.apex.translation.utils.ApexStateInternals) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Aggregations

Sink (com.datatorrent.api.Sink)1 ApexPipelineOptions (org.apache.beam.runners.apex.ApexPipelineOptions)1 ApexGroupByKeyOperator (org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator)1 ApexStateInternals (org.apache.beam.runners.apex.translation.utils.ApexStateInternals)1 ApexStreamTuple (org.apache.beam.runners.apex.translation.utils.ApexStreamTuple)1 Pipeline (org.apache.beam.sdk.Pipeline)1 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)1 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)1 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 Instant (org.joda.time.Instant)1 Test (org.junit.Test)1