Search in sources :

Example 11 with SourceFunction

use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.

the class CoGroupJoinITCase method testCoGroup.

@Test
public void testCoGroup() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("a", 2));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("b", 4));
            ctx.collect(Tuple2.of("b", 5));
            ctx.collect(Tuple2.of("a", 6));
            ctx.collect(Tuple2.of("a", 7));
            ctx.collect(Tuple2.of("a", 8));
        // source is finite, so it will have an implicit MAX
        // watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    DataStream<Tuple2<String, Integer>> source2 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("c", 6));
            ctx.collect(Tuple2.of("c", 7));
            ctx.collect(Tuple2.of("c", 8));
        // source is finite, so it will have an implicit MAX
        // watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    source1.coGroup(source2).where(new Tuple2KeyExtractor()).equalTo(new Tuple2KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new CoGroupFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {

        @Override
        public void coGroup(Iterable<Tuple2<String, Integer>> first, Iterable<Tuple2<String, Integer>> second, Collector<String> out) throws Exception {
            StringBuilder result = new StringBuilder();
            result.append("F:");
            for (Tuple2<String, Integer> t : first) {
                result.append(t.toString());
            }
            result.append(" S:");
            for (Tuple2<String, Integer> t : second) {
                result.append(t.toString());
            }
            out.collect(result.toString());
        }
    }).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value) throws Exception {
            testResults.add(value);
        }
    });
    env.execute("CoGroup Test");
    List<String> expectedResult = Arrays.asList("F:(a,0)(a,1)(a,2) S:(a,0)(a,1)", "F:(b,3)(b,4)(b,5) S:(b,3)", "F:(a,6)(a,7)(a,8) S:", "F: S:(c,6)(c,7)(c,8)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 12 with SourceFunction

use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.

the class CoGroupJoinITCase method testSelfJoin.

@Test
public void testSelfJoin() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple3.of("a", "x", 0));
            ctx.collect(Tuple3.of("a", "y", 1));
            ctx.collect(Tuple3.of("a", "z", 2));
            ctx.collect(Tuple3.of("b", "u", 3));
            ctx.collect(Tuple3.of("b", "w", 5));
            ctx.collect(Tuple3.of("a", "i", 6));
            ctx.collect(Tuple3.of("a", "j", 7));
            ctx.collect(Tuple3.of("a", "k", 8));
        // source is finite, so it will have an implicit MAX
        // watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
    source1.join(source1).where(new Tuple3KeyExtractor()).equalTo(new Tuple3KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {

        @Override
        public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
            return first + ":" + second;
        }
    }).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value) throws Exception {
            testResults.add(value);
        }
    });
    env.execute("Self-Join Test");
    List<String> expectedResult = Arrays.asList("(a,x,0):(a,x,0)", "(a,x,0):(a,y,1)", "(a,x,0):(a,z,2)", "(a,y,1):(a,x,0)", "(a,y,1):(a,y,1)", "(a,y,1):(a,z,2)", "(a,z,2):(a,x,0)", "(a,z,2):(a,y,1)", "(a,z,2):(a,z,2)", "(b,u,3):(b,u,3)", "(b,u,3):(b,w,5)", "(b,w,5):(b,u,3)", "(b,w,5):(b,w,5)", "(a,i,6):(a,i,6)", "(a,i,6):(a,j,7)", "(a,i,6):(a,k,8)", "(a,j,7):(a,i,6)", "(a,j,7):(a,j,7)", "(a,j,7):(a,k,8)", "(a,k,8):(a,i,6)", "(a,k,8):(a,j,7)", "(a,k,8):(a,k,8)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) JoinFunction(org.apache.flink.api.common.functions.JoinFunction) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 13 with SourceFunction

use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.

the class CheckpointExceptionHandlerConfigurationTest method doTestPropagationFromCheckpointConfig.

public void doTestPropagationFromCheckpointConfig(boolean failTaskOnCheckpointErrors) {
    StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
    streamExecutionEnvironment.setParallelism(1);
    streamExecutionEnvironment.getCheckpointConfig().setCheckpointInterval(1000);
    streamExecutionEnvironment.getCheckpointConfig().setFailOnCheckpointingErrors(failTaskOnCheckpointErrors);
    streamExecutionEnvironment.addSource(new SourceFunction<Integer>() {

        @Override
        public void run(SourceContext<Integer> ctx) {
        }

        @Override
        public void cancel() {
        }
    }).addSink(new DiscardingSink<>());
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 14 with SourceFunction

use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.

the class StreamExecutionEnvironmentTest method testParallelismBounds.

@Test
public void testParallelismBounds() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
        }

        @Override
        public void cancel() {
        }
    };
    SingleOutputStreamOperator<Object> operator = env.addSource(srcFun).flatMap(new FlatMapFunction<Integer, Object>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer value, Collector<Object> out) throws Exception {
        }
    });
    // default value for max parallelism
    Assert.assertEquals(-1, operator.getTransformation().getMaxParallelism());
    // bounds for parallelism 1
    try {
        operator.setParallelism(0);
        Assert.fail();
    } catch (IllegalArgumentException expected) {
    }
    // bounds for parallelism 2
    operator.setParallelism(1);
    Assert.assertEquals(1, operator.getParallelism());
    // bounds for parallelism 3
    operator.setParallelism(1 << 15);
    Assert.assertEquals(1 << 15, operator.getParallelism());
    // default value after generating
    env.getStreamGraph(false).getJobGraph();
    Assert.assertEquals(-1, operator.getTransformation().getMaxParallelism());
    // configured value after generating
    env.setMaxParallelism(42);
    env.getStreamGraph(false).getJobGraph();
    Assert.assertEquals(42, operator.getTransformation().getMaxParallelism());
    // bounds configured parallelism 1
    try {
        env.setMaxParallelism(0);
        Assert.fail();
    } catch (IllegalArgumentException expected) {
    }
    // bounds configured parallelism 2
    try {
        env.setMaxParallelism(1 + (1 << 15));
        Assert.fail();
    } catch (IllegalArgumentException expected) {
    }
    // bounds for max parallelism 1
    try {
        operator.setMaxParallelism(0);
        Assert.fail();
    } catch (IllegalArgumentException expected) {
    }
    // bounds for max parallelism 2
    try {
        operator.setMaxParallelism(1 + (1 << 15));
        Assert.fail();
    } catch (IllegalArgumentException expected) {
    }
    // bounds for max parallelism 3
    operator.setMaxParallelism(1);
    Assert.assertEquals(1, operator.getTransformation().getMaxParallelism());
    // bounds for max parallelism 4
    operator.setMaxParallelism(1 << 15);
    Assert.assertEquals(1 << 15, operator.getTransformation().getMaxParallelism());
    // override config
    env.getStreamGraph(false).getJobGraph();
    Assert.assertEquals(1 << 15, operator.getTransformation().getMaxParallelism());
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) NoSuchElementException(java.util.NoSuchElementException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 15 with SourceFunction

use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.

the class StreamExecutionEnvironmentTest method testSources.

@Test
public void testSources() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
        }

        @Override
        public void cancel() {
        }
    };
    DataStreamSource<Integer> src1 = env.addSource(srcFun);
    src1.addSink(new DiscardingSink<Integer>());
    assertEquals(srcFun, getFunctionFromDataSource(src1));
    List<Long> list = Arrays.asList(0L, 1L, 2L);
    DataStreamSource<Long> src2 = env.generateSequence(0, 2);
    assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);
    DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
    assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);
    DataStreamSource<Long> src4 = env.fromCollection(list);
    assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FromElementsFunction(org.apache.flink.streaming.api.functions.source.FromElementsFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StatefulSequenceSource(org.apache.flink.streaming.api.functions.source.StatefulSequenceSource) Test(org.junit.Test)

Aggregations

SourceFunction (org.apache.flink.streaming.api.functions.source.SourceFunction)21 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Test (org.junit.Test)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 ArrayList (java.util.ArrayList)5 FoldFunction (org.apache.flink.api.common.functions.FoldFunction)4 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)4 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)3 Collector (org.apache.flink.util.Collector)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Properties (java.util.Properties)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 JoinFunction (org.apache.flink.api.common.functions.JoinFunction)2 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)2 SinkFunction (org.apache.flink.streaming.api.functions.sink.SinkFunction)2 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)2 StreamTransformation (org.apache.flink.streaming.api.transformations.StreamTransformation)2 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)2 AccumulatingProcessingTimeWindowOperator (org.apache.flink.streaming.runtime.operators.windowing.AccumulatingProcessingTimeWindowOperator)2