Search in sources :

Example 66 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class GroupReduceCompilationTest method testGroupedReduceWithFieldPositionKeyNonCombinable.

@Test
public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(1).reduceGroup(new RichGroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {

            public void reduce(Iterable<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(1), reduceNode.getKeys(0));
        assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 67 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class GroupReduceCompilationTest method testAllGroupReduceNoCombiner.

@Test
public void testAllGroupReduceNoCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
        data.reduceGroup(new RichGroupReduceFunction<Double, Double>() {

            public void reduce(Iterable<Double> values, Collector<Double> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // the all-reduce has no combiner, when the parallelism of the input is one
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that reduce has the right strategy
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check parallelism
        assertEquals(1, sourceNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 68 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class DataStreamAllroundTestProgram method main.

public static void main(String[] args) throws Exception {
    final ParameterTool pt = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    setupEnvironment(env, pt);
    // add a keyed stateful map operator, which uses Kryo for state serialization
    DataStream<Event> eventStream = env.addSource(createEventSource(pt)).name(EVENT_SOURCE.getName()).uid(EVENT_SOURCE.getUid()).assignTimestampsAndWatermarks(createTimestampExtractor(pt)).keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
    (MapFunction<Event, Event>) in -> in, // ComplexPayload state object
    (Event event, ComplexPayload lastState) -> {
        if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
            throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName(), lastState.getStrPayload());
        }
        return new ComplexPayload(event, KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName());
    }, Arrays.asList(new KryoSerializer<>(ComplexPayload.class, // KryoSerializer
    env.getConfig()), // custom
    new StatefulComplexPayloadSerializer()), // serializer
    Collections.singletonList(// KryoSerializer via type
    ComplexPayload.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()).uid(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getUid());
    // add a keyed stateful map operator, which uses Avro for state serialization
    eventStream = eventStream.keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
    (MapFunction<Event, Event>) in -> in, // ComplexPayloadAvro state object
    (Event event, ComplexPayloadAvro lastState) -> {
        if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_AVRO_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
            throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_AVRO_SER.getName(), lastState.getStrPayload());
        }
        ComplexPayloadAvro payload = new ComplexPayloadAvro();
        payload.setEventTime(event.getEventTime());
        payload.setInnerPayLoad(new InnerPayLoadAvro(event.getSequenceNumber()));
        payload.setStrPayload(KEYED_STATE_OPER_WITH_AVRO_SER.getName());
        payload.setStringList(Arrays.asList(String.valueOf(event.getKey()), event.getPayload()));
        return payload;
    }, Collections.singletonList(new AvroSerializer<>(ComplexPayloadAvro.class)), // custom AvroSerializer
    Collections.singletonList(// AvroSerializer via type
    ComplexPayloadAvro.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_AVRO_SER.getName()).uid(KEYED_STATE_OPER_WITH_AVRO_SER.getUid());
    DataStream<Event> eventStream2 = eventStream.map(createArtificialOperatorStateMapper((MapFunction<Event, Event>) in -> in)).returns(Event.class).name(OPERATOR_STATE_OPER.getName()).uid(OPERATOR_STATE_OPER.getUid());
    // apply a tumbling window that simply passes forward window elements;
    // this allows the job to cover timers state
    @SuppressWarnings("Convert2Lambda") DataStream<Event> eventStream3 = applyTumblingWindows(eventStream2.keyBy(Event::getKey), pt).apply(new WindowFunction<Event, Event, Integer, TimeWindow>() {

        @Override
        public void apply(Integer integer, TimeWindow window, Iterable<Event> input, Collector<Event> out) {
            for (Event e : input) {
                out.collect(e);
            }
        }
    }).name(TIME_WINDOW_OPER.getName()).uid(TIME_WINDOW_OPER.getUid());
    eventStream3 = DataStreamAllroundTestJobFactory.verifyCustomStatefulTypeSerializer(eventStream3);
    if (isSimulateFailures(pt)) {
        eventStream3 = eventStream3.map(createFailureMapper(pt)).setParallelism(1).name(FAILURE_MAPPER_NAME.getName()).uid(FAILURE_MAPPER_NAME.getUid());
    }
    eventStream3.keyBy(Event::getKey).flatMap(createSemanticsCheckMapper(pt)).name(SEMANTICS_CHECK_MAPPER.getName()).uid(SEMANTICS_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SEMANTICS_CHECK_PRINT_SINK.getName()).uid(SEMANTICS_CHECK_PRINT_SINK.getUid());
    // Check sliding windows aggregations. Output all elements assigned to a window and later on
    // check if each event was emitted slide_factor number of times
    DataStream<Tuple2<Integer, List<Event>>> eventStream4 = eventStream2.keyBy(Event::getKey).window(createSlidingWindow(pt)).apply(new WindowFunction<Event, Tuple2<Integer, List<Event>>, Integer, TimeWindow>() {

        private static final long serialVersionUID = 3166250579972849440L;

        @Override
        public void apply(Integer key, TimeWindow window, Iterable<Event> input, Collector<Tuple2<Integer, List<Event>>> out) {
            out.collect(Tuple2.of(key, StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList())));
        }
    }).name(SLIDING_WINDOW_AGG.getName()).uid(SLIDING_WINDOW_AGG.getUid());
    eventStream4.keyBy(events -> events.f0).flatMap(createSlidingWindowCheckMapper(pt)).name(SLIDING_WINDOW_CHECK_MAPPER.getName()).uid(SLIDING_WINDOW_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SLIDING_WINDOW_CHECK_PRINT_SINK.getName()).uid(SLIDING_WINDOW_CHECK_PRINT_SINK.getUid());
    env.execute("General purpose test job");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) StatefulComplexPayloadSerializer(org.apache.flink.streaming.tests.artificialstate.StatefulComplexPayloadSerializer) SEMANTICS_CHECK_PRINT_SINK(org.apache.flink.streaming.tests.TestOperatorEnum.SEMANTICS_CHECK_PRINT_SINK) Arrays(java.util.Arrays) DataStreamAllroundTestJobFactory.applyTumblingWindows(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.applyTumblingWindows) DataStreamAllroundTestJobFactory.createFailureMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createFailureMapper) DataStreamAllroundTestJobFactory.createArtificialKeyedStateMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createArtificialKeyedStateMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PrintSinkFunction(org.apache.flink.streaming.api.functions.sink.PrintSinkFunction) KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER(org.apache.flink.streaming.tests.TestOperatorEnum.KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER) ComplexPayloadAvro(org.apache.flink.streaming.tests.avro.ComplexPayloadAvro) AvroSerializer(org.apache.flink.formats.avro.typeutils.AvroSerializer) MapFunction(org.apache.flink.api.common.functions.MapFunction) DataStreamAllroundTestJobFactory.createSlidingWindow(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSlidingWindow) DataStreamAllroundTestJobFactory.createSlidingWindowCheckMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSlidingWindowCheckMapper) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) EVENT_SOURCE(org.apache.flink.streaming.tests.TestOperatorEnum.EVENT_SOURCE) DataStreamAllroundTestJobFactory.isSimulateFailures(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.isSimulateFailures) DataStreamAllroundTestJobFactory.setupEnvironment(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.setupEnvironment) Collector(org.apache.flink.util.Collector) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) StreamSupport(java.util.stream.StreamSupport) OPERATOR_STATE_OPER(org.apache.flink.streaming.tests.TestOperatorEnum.OPERATOR_STATE_OPER) SLIDING_WINDOW_CHECK_MAPPER(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_CHECK_MAPPER) KEYED_STATE_OPER_WITH_AVRO_SER(org.apache.flink.streaming.tests.TestOperatorEnum.KEYED_STATE_OPER_WITH_AVRO_SER) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) FAILURE_MAPPER_NAME(org.apache.flink.streaming.tests.TestOperatorEnum.FAILURE_MAPPER_NAME) Collectors(java.util.stream.Collectors) DataStream(org.apache.flink.streaming.api.datastream.DataStream) WindowFunction(org.apache.flink.streaming.api.functions.windowing.WindowFunction) SLIDING_WINDOW_AGG(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_AGG) List(java.util.List) DataStreamAllroundTestJobFactory.createArtificialOperatorStateMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createArtificialOperatorStateMapper) DataStreamAllroundTestJobFactory.createEventSource(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createEventSource) ValueState(org.apache.flink.api.common.state.ValueState) ComplexPayload(org.apache.flink.streaming.tests.artificialstate.ComplexPayload) SEMANTICS_CHECK_MAPPER(org.apache.flink.streaming.tests.TestOperatorEnum.SEMANTICS_CHECK_MAPPER) SLIDING_WINDOW_CHECK_PRINT_SINK(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_CHECK_PRINT_SINK) TIME_WINDOW_OPER(org.apache.flink.streaming.tests.TestOperatorEnum.TIME_WINDOW_OPER) Collections(java.util.Collections) InnerPayLoadAvro(org.apache.flink.streaming.tests.avro.InnerPayLoadAvro) DataStreamAllroundTestJobFactory.createSemanticsCheckMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSemanticsCheckMapper) DataStreamAllroundTestJobFactory.createTimestampExtractor(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createTimestampExtractor) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MapFunction(org.apache.flink.api.common.functions.MapFunction) InnerPayLoadAvro(org.apache.flink.streaming.tests.avro.InnerPayLoadAvro) StatefulComplexPayloadSerializer(org.apache.flink.streaming.tests.artificialstate.StatefulComplexPayloadSerializer) ComplexPayloadAvro(org.apache.flink.streaming.tests.avro.ComplexPayloadAvro) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) ComplexPayload(org.apache.flink.streaming.tests.artificialstate.ComplexPayload) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 69 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class GroupReduceOperatorTest method testGroupReduceCollectionWithRuntimeContext.

@Test
public void testGroupReduceCollectionWithRuntimeContext() {
    try {
        final String taskName = "Test Task";
        final AtomicBoolean opened = new AtomicBoolean();
        final AtomicBoolean closed = new AtomicBoolean();
        final RichGroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>> reducer = new RichGroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {

            @Override
            public void reduce(Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception {
                Iterator<Tuple2<String, Integer>> input = values.iterator();
                Tuple2<String, Integer> result = input.next();
                int sum = result.f1;
                while (input.hasNext()) {
                    Tuple2<String, Integer> next = input.next();
                    sum += next.f1;
                }
                result.f1 = sum;
                out.collect(result);
            }

            @Override
            public void open(Configuration parameters) throws Exception {
                opened.set(true);
                RuntimeContext ctx = getRuntimeContext();
                assertEquals(0, ctx.getIndexOfThisSubtask());
                assertEquals(1, ctx.getNumberOfParallelSubtasks());
                assertEquals(taskName, ctx.getTaskName());
            }

            @Override
            public void close() throws Exception {
                closed.set(true);
            }
        };
        GroupReduceOperatorBase<Tuple2<String, Integer>, Tuple2<String, Integer>, GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>> op = new GroupReduceOperatorBase<>(reducer, new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE), new int[] { 0 }, "TestReducer");
        List<Tuple2<String, Integer>> input = new ArrayList<>(asList(new Tuple2<>("foo", 1), new Tuple2<>("foo", 3), new Tuple2<>("bar", 2), new Tuple2<>("bar", 4)));
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<>(resultMutableSafe);
        Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<>(resultRegular);
        Set<Tuple2<String, Integer>> expectedResult = new HashSet<>(asList(new Tuple2<>("foo", 4), new Tuple2<>("bar", 6)));
        assertEquals(expectedResult, resultSetMutableSafe);
        assertEquals(expectedResult, resultSetRegular);
        assertTrue(opened.get());
        assertTrue(closed.get());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) Collector(org.apache.flink.util.Collector) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) HashSet(java.util.HashSet) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Test(org.junit.Test)

Example 70 with Collector

use of org.apache.flink.util.Collector in project beam by apache.

the class SortingFlinkCombineRunner method combine.

@Override
public void combine(FlinkCombiner<K, InputT, AccumT, OutputT> flinkCombiner, WindowingStrategy<Object, W> windowingStrategy, SideInputReader sideInputReader, PipelineOptions options, Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
    @SuppressWarnings("unchecked") TimestampCombiner timestampCombiner = (TimestampCombiner) windowingStrategy.getTimestampCombiner();
    // get all elements so that we can sort them, has to fit into
    // memory
    // this seems very unprudent, but correct, for now
    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
        for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
            sortedInput.add(exploded);
        }
    }
    sortedInput.sort(Comparator.comparing(o -> Iterables.getOnlyElement(o.getWindows()).maxTimestamp()));
    if (windowingStrategy.needsMerge()) {
        // merge windows, we have to do it in an extra pre-processing step and
        // can't do it as we go since the window of early elements would not
        // be correct when calling the CombineFn
        mergeWindow(sortedInput);
    }
    // iterate over the elements that are sorted by window timestamp
    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
    // create accumulator using the first elements key
    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
    K key = currentValue.getValue().getKey();
    W currentWindow = (W) Iterables.getOnlyElement(currentValue.getWindows());
    InputT firstValue = currentValue.getValue().getValue();
    AccumT accumulator = flinkCombiner.firstInput(key, firstValue, options, sideInputReader, currentValue.getWindows());
    // we use this to keep track of the timestamps assigned by the TimestampCombiner
    Instant windowTimestamp = timestampCombiner.assign(currentWindow, currentValue.getTimestamp());
    while (iterator.hasNext()) {
        WindowedValue<KV<K, InputT>> nextValue = iterator.next();
        W nextWindow = (W) Iterables.getOnlyElement(nextValue.getWindows());
        if (currentWindow.equals(nextWindow)) {
            // continue accumulating and merge windows
            InputT value = nextValue.getValue().getValue();
            accumulator = flinkCombiner.addInput(key, accumulator, value, options, sideInputReader, currentValue.getWindows());
            windowTimestamp = timestampCombiner.combine(windowTimestamp, timestampCombiner.assign(currentWindow, nextValue.getTimestamp()));
        } else {
            // emit the value that we currently have
            out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
            currentWindow = nextWindow;
            currentValue = nextValue;
            InputT value = nextValue.getValue().getValue();
            accumulator = flinkCombiner.firstInput(key, value, options, sideInputReader, currentValue.getWindows());
            windowTimestamp = timestampCombiner.assign(currentWindow, nextValue.getTimestamp());
        }
    }
    // emit the final accumulator
    out.collect(WindowedValue.of(KV.of(key, flinkCombiner.extractOutput(key, accumulator, options, sideInputReader, currentValue.getWindows())), windowTimestamp, currentWindow, PaneInfo.NO_FIRING));
}
Also used : KV(org.apache.beam.sdk.values.KV) Iterator(java.util.Iterator) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) List(java.util.List) Collector(org.apache.flink.util.Collector) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Instant(org.joda.time.Instant) Comparator(java.util.Comparator) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) SideInputReader(org.apache.beam.runners.core.SideInputReader) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Aggregations

Collector (org.apache.flink.util.Collector)80 Test (org.junit.Test)60 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)33 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)32 Configuration (org.apache.flink.configuration.Configuration)27 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)19 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)18 ArrayList (java.util.ArrayList)16 DataStream (org.apache.flink.streaming.api.datastream.DataStream)16 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)16 HashMap (java.util.HashMap)14 List (java.util.List)14 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)14 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)12 IOException (java.io.IOException)11 Arrays (java.util.Arrays)11 Map (java.util.Map)11 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)11 Assert.assertTrue (org.junit.Assert.assertTrue)11 InternalWindowFunction (org.apache.flink.streaming.runtime.operators.windowing.functions.InternalWindowFunction)10