Search in sources :

Example 61 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method doTestTumblingTimeWindowWithKVState.

public void doTestTumblingTimeWindowWithKVState(int maxParallelism) {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setMaxParallelism(maxParallelism);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            private ValueState<Integer> count;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
                count = getRuntimeContext().getState(new ValueStateDescriptor<>("count", Integer.class, 0));
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) throws Exception {
                // different count results for each key
                if (count.value() == 0) {
                    count.update(tuple.<Long>getField(0).intValue());
                }
                // validate that the function has been opened properly
                assertTrue(open);
                count.update(count.value() + 1);
                out.collect(new Tuple4<>(tuple.<Long>getField(0), window.getStart(), window.getEnd(), new IntType(count.value())));
            }
        }).addSink(new CountValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 62 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method testPreAggregatedSlidingTimeWindow.

@Test
public void testPreAggregatedSlidingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int WINDOW_SLIDE = windowSlide();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS), Time.of(WINDOW_SLIDE, MILLISECONDS)).reduce(new ReduceFunction<Tuple2<Long, IntType>>() {

            @Override
            public Tuple2<Long, IntType> reduce(Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) {
                // validate that the function has been opened properly
                return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
            }
        }, new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                for (Tuple2<Long, IntType> in : input) {
                    out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1));
                }
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SLIDE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 63 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class EventTimeAllWindowCheckpointingITCase method testSlidingTimeWindow.

@Test
public void testSlidingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = 3000;
    final int WINDOW_SIZE = 1000;
    final int WINDOW_SLIDE = 100;
    final int NUM_KEYS = 1;
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
        env.getConfig().disableSysoutLogging();
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().timeWindowAll(Time.of(WINDOW_SIZE, MILLISECONDS), Time.of(WINDOW_SLIDE, MILLISECONDS)).apply(new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                int sum = 0;
                long key = -1;
                for (Tuple2<Long, IntType> value : values) {
                    sum += value.f1.value;
                    key = value.f0;
                }
                out.collect(new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)));
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SLIDE)).setParallelism(1);
        tryExecute(env, "Sliding Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 64 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class RescalingITCase method createJobGraphWithKeyedState.

private static JobGraph createJobGraphWithKeyedState(int parallelism, int maxParallelism, int numberKeys, int numberElements, boolean terminateAfterEmission, int checkpointingInterval) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    if (0 < maxParallelism) {
        env.getConfig().setMaxParallelism(maxParallelism);
    }
    env.enableCheckpointing(checkpointingInterval);
    env.setRestartStrategy(RestartStrategies.noRestart());
    DataStream<Integer> input = env.addSource(new SubtaskIndexSource(numberKeys, numberElements, terminateAfterEmission)).keyBy(new KeySelector<Integer, Integer>() {

        private static final long serialVersionUID = -7952298871120320940L;

        @Override
        public Integer getKey(Integer value) throws Exception {
            return value;
        }
    });
    SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);
    DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));
    result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());
    return env.getStreamGraph().getJobGraph();
}
Also used : CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 65 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class CheckpointingCustomKvStateProgram method main.

public static void main(String[] args) throws Exception {
    final String jarFile = args[0];
    final String host = args[1];
    final int port = Integer.parseInt(args[2]);
    final String checkpointPath = args[3];
    final String outputPath = args[4];
    final int parallelism = 1;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment(host, port, jarFile);
    env.setParallelism(parallelism);
    env.getConfig().disableSysoutLogging();
    env.enableCheckpointing(100);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
    env.setStateBackend(new FsStateBackend(checkpointPath));
    DataStream<Integer> source = env.addSource(new InfiniteIntegerSource());
    source.map(new MapFunction<Integer, Tuple2<Integer, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Tuple2<Integer, Integer> map(Integer value) throws Exception {
            return new Tuple2<>(ThreadLocalRandom.current().nextInt(parallelism), value);
        }
    }).keyBy(new KeySelector<Tuple2<Integer, Integer>, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer getKey(Tuple2<Integer, Integer> value) throws Exception {
            return value.f0;
        }
    }).flatMap(new ReducingStateFlatMap()).writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
}
Also used : RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) FsStateBackend(org.apache.flink.runtime.state.filesystem.FsStateBackend)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)383 Test (org.junit.Test)286 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)192 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)81 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)75 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)48 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)34 Properties (java.util.Properties)32 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)31 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)30 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)27 SuccessException (org.apache.flink.test.util.SuccessException)27 IOException (java.io.IOException)24 Configuration (org.apache.flink.configuration.Configuration)24 SlidingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows)24 KeySelector (org.apache.flink.api.java.functions.KeySelector)22 ProcessingTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.ProcessingTimeTrigger)21 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 MapFunction (org.apache.flink.api.common.functions.MapFunction)19