Search in sources :

Example 1 with WindowFunction

use of org.apache.flink.streaming.api.functions.windowing.WindowFunction in project flink by apache.

the class StateDescriptorPassingTest method testApplyWindowState.

@Test
public void testApplyWindowState() {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class);
    DataStream<File> src = env.fromElements(new File("/")).assignTimestampsAndWatermarks(WatermarkStrategy.<File>forMonotonousTimestamps().withTimestampAssigner((file, ts) -> System.currentTimeMillis()));
    SingleOutputStreamOperator<?> result = src.keyBy(new KeySelector<File, String>() {

        @Override
        public String getKey(File value) {
            return null;
        }
    }).window(TumblingEventTimeWindows.of(Time.milliseconds(1000))).apply(new WindowFunction<File, String, String, TimeWindow>() {

        @Override
        public void apply(String s, TimeWindow window, Iterable<File> input, Collector<String> out) {
        }
    });
    validateListStateDescriptorConfigured(result);
}
Also used : Kryo(com.esotericsoftware.kryo.Kryo) Collector(org.apache.flink.util.Collector) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) ProcessAllWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) JavaSerializer(com.esotericsoftware.kryo.serializers.JavaSerializer) Time(org.apache.flink.streaming.api.windowing.time.Time) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) StateDescriptor(org.apache.flink.api.common.state.StateDescriptor) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) WindowOperator(org.apache.flink.streaming.runtime.operators.windowing.WindowOperator) Assert.assertTrue(org.junit.Assert.assertTrue) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Test(org.junit.Test) ProcessWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) File(java.io.File) DataStream(org.apache.flink.streaming.api.datastream.DataStream) WindowFunction(org.apache.flink.streaming.api.functions.windowing.WindowFunction) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) AllWindowFunction(org.apache.flink.streaming.api.functions.windowing.AllWindowFunction) ListSerializer(org.apache.flink.api.common.typeutils.base.ListSerializer) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) Test(org.junit.Test)

Example 2 with WindowFunction

use of org.apache.flink.streaming.api.functions.windowing.WindowFunction in project flink by apache.

the class DataStreamAllroundTestProgram method main.

public static void main(String[] args) throws Exception {
    final ParameterTool pt = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    setupEnvironment(env, pt);
    // add a keyed stateful map operator, which uses Kryo for state serialization
    DataStream<Event> eventStream = env.addSource(createEventSource(pt)).name(EVENT_SOURCE.getName()).uid(EVENT_SOURCE.getUid()).assignTimestampsAndWatermarks(createTimestampExtractor(pt)).keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
    (MapFunction<Event, Event>) in -> in, // ComplexPayload state object
    (Event event, ComplexPayload lastState) -> {
        if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
            throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName(), lastState.getStrPayload());
        }
        return new ComplexPayload(event, KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName());
    }, Arrays.asList(new KryoSerializer<>(ComplexPayload.class, // KryoSerializer
    env.getConfig()), // custom
    new StatefulComplexPayloadSerializer()), // serializer
    Collections.singletonList(// KryoSerializer via type
    ComplexPayload.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getName()).uid(KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER.getUid());
    // add a keyed stateful map operator, which uses Avro for state serialization
    eventStream = eventStream.keyBy(Event::getKey).map(createArtificialKeyedStateMapper(// map function simply forwards the inputs
    (MapFunction<Event, Event>) in -> in, // ComplexPayloadAvro state object
    (Event event, ComplexPayloadAvro lastState) -> {
        if (lastState != null && !lastState.getStrPayload().equals(KEYED_STATE_OPER_WITH_AVRO_SER.getName()) && lastState.getInnerPayLoad().getSequenceNumber() == (event.getSequenceNumber() - 1)) {
            throwIncorrectRestoredStateException((event.getSequenceNumber() - 1), KEYED_STATE_OPER_WITH_AVRO_SER.getName(), lastState.getStrPayload());
        }
        ComplexPayloadAvro payload = new ComplexPayloadAvro();
        payload.setEventTime(event.getEventTime());
        payload.setInnerPayLoad(new InnerPayLoadAvro(event.getSequenceNumber()));
        payload.setStrPayload(KEYED_STATE_OPER_WITH_AVRO_SER.getName());
        payload.setStringList(Arrays.asList(String.valueOf(event.getKey()), event.getPayload()));
        return payload;
    }, Collections.singletonList(new AvroSerializer<>(ComplexPayloadAvro.class)), // custom AvroSerializer
    Collections.singletonList(// AvroSerializer via type
    ComplexPayloadAvro.class))).returns(Event.class).name(KEYED_STATE_OPER_WITH_AVRO_SER.getName()).uid(KEYED_STATE_OPER_WITH_AVRO_SER.getUid());
    DataStream<Event> eventStream2 = eventStream.map(createArtificialOperatorStateMapper((MapFunction<Event, Event>) in -> in)).returns(Event.class).name(OPERATOR_STATE_OPER.getName()).uid(OPERATOR_STATE_OPER.getUid());
    // apply a tumbling window that simply passes forward window elements;
    // this allows the job to cover timers state
    @SuppressWarnings("Convert2Lambda") DataStream<Event> eventStream3 = applyTumblingWindows(eventStream2.keyBy(Event::getKey), pt).apply(new WindowFunction<Event, Event, Integer, TimeWindow>() {

        @Override
        public void apply(Integer integer, TimeWindow window, Iterable<Event> input, Collector<Event> out) {
            for (Event e : input) {
                out.collect(e);
            }
        }
    }).name(TIME_WINDOW_OPER.getName()).uid(TIME_WINDOW_OPER.getUid());
    eventStream3 = DataStreamAllroundTestJobFactory.verifyCustomStatefulTypeSerializer(eventStream3);
    if (isSimulateFailures(pt)) {
        eventStream3 = eventStream3.map(createFailureMapper(pt)).setParallelism(1).name(FAILURE_MAPPER_NAME.getName()).uid(FAILURE_MAPPER_NAME.getUid());
    }
    eventStream3.keyBy(Event::getKey).flatMap(createSemanticsCheckMapper(pt)).name(SEMANTICS_CHECK_MAPPER.getName()).uid(SEMANTICS_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SEMANTICS_CHECK_PRINT_SINK.getName()).uid(SEMANTICS_CHECK_PRINT_SINK.getUid());
    // Check sliding windows aggregations. Output all elements assigned to a window and later on
    // check if each event was emitted slide_factor number of times
    DataStream<Tuple2<Integer, List<Event>>> eventStream4 = eventStream2.keyBy(Event::getKey).window(createSlidingWindow(pt)).apply(new WindowFunction<Event, Tuple2<Integer, List<Event>>, Integer, TimeWindow>() {

        private static final long serialVersionUID = 3166250579972849440L;

        @Override
        public void apply(Integer key, TimeWindow window, Iterable<Event> input, Collector<Tuple2<Integer, List<Event>>> out) {
            out.collect(Tuple2.of(key, StreamSupport.stream(input.spliterator(), false).collect(Collectors.toList())));
        }
    }).name(SLIDING_WINDOW_AGG.getName()).uid(SLIDING_WINDOW_AGG.getUid());
    eventStream4.keyBy(events -> events.f0).flatMap(createSlidingWindowCheckMapper(pt)).name(SLIDING_WINDOW_CHECK_MAPPER.getName()).uid(SLIDING_WINDOW_CHECK_MAPPER.getUid()).addSink(new PrintSinkFunction<>()).name(SLIDING_WINDOW_CHECK_PRINT_SINK.getName()).uid(SLIDING_WINDOW_CHECK_PRINT_SINK.getUid());
    env.execute("General purpose test job");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) StatefulComplexPayloadSerializer(org.apache.flink.streaming.tests.artificialstate.StatefulComplexPayloadSerializer) SEMANTICS_CHECK_PRINT_SINK(org.apache.flink.streaming.tests.TestOperatorEnum.SEMANTICS_CHECK_PRINT_SINK) Arrays(java.util.Arrays) DataStreamAllroundTestJobFactory.applyTumblingWindows(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.applyTumblingWindows) DataStreamAllroundTestJobFactory.createFailureMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createFailureMapper) DataStreamAllroundTestJobFactory.createArtificialKeyedStateMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createArtificialKeyedStateMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PrintSinkFunction(org.apache.flink.streaming.api.functions.sink.PrintSinkFunction) KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER(org.apache.flink.streaming.tests.TestOperatorEnum.KEYED_STATE_OPER_WITH_KRYO_AND_CUSTOM_SER) ComplexPayloadAvro(org.apache.flink.streaming.tests.avro.ComplexPayloadAvro) AvroSerializer(org.apache.flink.formats.avro.typeutils.AvroSerializer) MapFunction(org.apache.flink.api.common.functions.MapFunction) DataStreamAllroundTestJobFactory.createSlidingWindow(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSlidingWindow) DataStreamAllroundTestJobFactory.createSlidingWindowCheckMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSlidingWindowCheckMapper) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) EVENT_SOURCE(org.apache.flink.streaming.tests.TestOperatorEnum.EVENT_SOURCE) DataStreamAllroundTestJobFactory.isSimulateFailures(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.isSimulateFailures) DataStreamAllroundTestJobFactory.setupEnvironment(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.setupEnvironment) Collector(org.apache.flink.util.Collector) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) StreamSupport(java.util.stream.StreamSupport) OPERATOR_STATE_OPER(org.apache.flink.streaming.tests.TestOperatorEnum.OPERATOR_STATE_OPER) SLIDING_WINDOW_CHECK_MAPPER(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_CHECK_MAPPER) KEYED_STATE_OPER_WITH_AVRO_SER(org.apache.flink.streaming.tests.TestOperatorEnum.KEYED_STATE_OPER_WITH_AVRO_SER) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) FAILURE_MAPPER_NAME(org.apache.flink.streaming.tests.TestOperatorEnum.FAILURE_MAPPER_NAME) Collectors(java.util.stream.Collectors) DataStream(org.apache.flink.streaming.api.datastream.DataStream) WindowFunction(org.apache.flink.streaming.api.functions.windowing.WindowFunction) SLIDING_WINDOW_AGG(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_AGG) List(java.util.List) DataStreamAllroundTestJobFactory.createArtificialOperatorStateMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createArtificialOperatorStateMapper) DataStreamAllroundTestJobFactory.createEventSource(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createEventSource) ValueState(org.apache.flink.api.common.state.ValueState) ComplexPayload(org.apache.flink.streaming.tests.artificialstate.ComplexPayload) SEMANTICS_CHECK_MAPPER(org.apache.flink.streaming.tests.TestOperatorEnum.SEMANTICS_CHECK_MAPPER) SLIDING_WINDOW_CHECK_PRINT_SINK(org.apache.flink.streaming.tests.TestOperatorEnum.SLIDING_WINDOW_CHECK_PRINT_SINK) TIME_WINDOW_OPER(org.apache.flink.streaming.tests.TestOperatorEnum.TIME_WINDOW_OPER) Collections(java.util.Collections) InnerPayLoadAvro(org.apache.flink.streaming.tests.avro.InnerPayLoadAvro) DataStreamAllroundTestJobFactory.createSemanticsCheckMapper(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createSemanticsCheckMapper) DataStreamAllroundTestJobFactory.createTimestampExtractor(org.apache.flink.streaming.tests.DataStreamAllroundTestJobFactory.createTimestampExtractor) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MapFunction(org.apache.flink.api.common.functions.MapFunction) InnerPayLoadAvro(org.apache.flink.streaming.tests.avro.InnerPayLoadAvro) StatefulComplexPayloadSerializer(org.apache.flink.streaming.tests.artificialstate.StatefulComplexPayloadSerializer) ComplexPayloadAvro(org.apache.flink.streaming.tests.avro.ComplexPayloadAvro) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) ComplexPayload(org.apache.flink.streaming.tests.artificialstate.ComplexPayload) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

KryoSerializer (org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer)2 DataStream (org.apache.flink.streaming.api.datastream.DataStream)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 WindowFunction (org.apache.flink.streaming.api.functions.windowing.WindowFunction)2 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)2 Kryo (com.esotericsoftware.kryo.Kryo)1 JavaSerializer (com.esotericsoftware.kryo.serializers.JavaSerializer)1 File (java.io.File)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 StreamSupport (java.util.stream.StreamSupport)1 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)1 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)1 StateDescriptor (org.apache.flink.api.common.state.StateDescriptor)1 ValueState (org.apache.flink.api.common.state.ValueState)1 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)1