Search in sources :

Example 6 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class WindowFoldITCase method testFoldProcessAllWindow.

@Test
public void testFoldProcessAllWindow() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(1);
    DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("a", 2));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("b", 4));
            ctx.collect(Tuple2.of("b", 5));
            ctx.collect(Tuple2.of("a", 6));
            ctx.collect(Tuple2.of("a", 7));
            ctx.collect(Tuple2.of("a", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    source1.windowAll(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).fold(Tuple2.of(0, "R:"), new FoldFunction<Tuple2<String, Integer>, Tuple2<Integer, String>>() {

        @Override
        public Tuple2<Integer, String> fold(Tuple2<Integer, String> accumulator, Tuple2<String, Integer> value) throws Exception {
            accumulator.f1 += value.f0;
            accumulator.f0 += value.f1;
            return accumulator;
        }
    }, new ProcessAllWindowFunction<Tuple2<Integer, String>, Tuple3<String, Integer, Integer>, TimeWindow>() {

        @Override
        public void process(Context context, Iterable<Tuple2<Integer, String>> elements, Collector<Tuple3<String, Integer, Integer>> out) throws Exception {
            int i = 0;
            for (Tuple2<Integer, String> in : elements) {
                out.collect(new Tuple3<>(in.f1, in.f0, i++));
            }
        }
    }).addSink(new SinkFunction<Tuple3<String, Integer, Integer>>() {

        @Override
        public void invoke(Tuple3<String, Integer, Integer> value) throws Exception {
            testResults.add(value.toString());
        }
    });
    env.execute("Fold Process Window Test");
    List<String> expectedResult = Arrays.asList("(R:aaa,3,0)", "(R:aaa,21,0)", "(R:bbb,12,0)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FoldFunction(org.apache.flink.api.common.functions.FoldFunction) ProcessAllWindowFunction(org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 7 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class SelfConnectionITCase method differentDataStreamDifferentChain.

/**
	 * We connect two different data streams in different chains to a CoMap.
	 * (This is not actually self-connect.)
	 */
@Test
public void differentDataStreamDifferentChain() {
    TestListResultSink<String> resultSink = new TestListResultSink<String>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(3);
    DataStream<Integer> src = env.fromElements(1, 3, 5).disableChaining();
    DataStream<String> stringMap = src.flatMap(new FlatMapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer value, Collector<String> out) throws Exception {
            out.collect("x " + value);
        }
    }).keyBy(new KeySelector<String, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer getKey(String value) throws Exception {
            return value.length();
        }
    });
    DataStream<Long> longMap = src.map(new MapFunction<Integer, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Long map(Integer value) throws Exception {
            return (long) (value + 1);
        }
    }).keyBy(new KeySelector<Long, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Integer getKey(Long value) throws Exception {
            return value.intValue();
        }
    });
    stringMap.connect(longMap).map(new CoMapFunction<String, Long, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map1(String value) {
            return value;
        }

        @Override
        public String map2(Long value) {
            return value.toString();
        }
    }).addSink(resultSink);
    try {
        env.execute();
    } catch (Exception e) {
        e.printStackTrace();
    }
    List<String> expected = Arrays.asList("x 1", "x 3", "x 5", "2", "4", "6");
    List<String> result = resultSink.getResult();
    Collections.sort(expected);
    Collections.sort(result);
    assertEquals(expected, result);
}
Also used : CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 8 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class CoGroupJoinITCase method testCoGroup.

@Test
public void testCoGroup() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(1);
    DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("a", 2));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("b", 4));
            ctx.collect(Tuple2.of("b", 5));
            ctx.collect(Tuple2.of("a", 6));
            ctx.collect(Tuple2.of("a", 7));
            ctx.collect(Tuple2.of("a", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    DataStream<Tuple2<String, Integer>> source2 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("c", 6));
            ctx.collect(Tuple2.of("c", 7));
            ctx.collect(Tuple2.of("c", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    source1.coGroup(source2).where(new Tuple2KeyExtractor()).equalTo(new Tuple2KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new CoGroupFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {

        @Override
        public void coGroup(Iterable<Tuple2<String, Integer>> first, Iterable<Tuple2<String, Integer>> second, Collector<String> out) throws Exception {
            StringBuilder result = new StringBuilder();
            result.append("F:");
            for (Tuple2<String, Integer> t : first) {
                result.append(t.toString());
            }
            result.append(" S:");
            for (Tuple2<String, Integer> t : second) {
                result.append(t.toString());
            }
            out.collect(result.toString());
        }
    }).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value) throws Exception {
            testResults.add(value);
        }
    });
    env.execute("CoGroup Test");
    List<String> expectedResult = Arrays.asList("F:(a,0)(a,1)(a,2) S:(a,0)(a,1)", "F:(b,3)(b,4)(b,5) S:(b,3)", "F:(a,6)(a,7)(a,8) S:", "F: S:(c,6)(c,7)(c,8)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 9 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class DataStreamTest method operatorTest.

@Test
public void operatorTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Long> src = env.generateSequence(0, 0);
    MapFunction<Long, Integer> mapFunction = new MapFunction<Long, Integer>() {

        @Override
        public Integer map(Long value) throws Exception {
            return null;
        }
    };
    DataStream<Integer> map = src.map(mapFunction);
    map.addSink(new DiscardingSink<Integer>());
    assertEquals(mapFunction, getFunctionForDataStream(map));
    FlatMapFunction<Long, Integer> flatMapFunction = new FlatMapFunction<Long, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Long value, Collector<Integer> out) throws Exception {
        }
    };
    DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
    flatMap.addSink(new DiscardingSink<Integer>());
    assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));
    FilterFunction<Integer> filterFunction = new FilterFunction<Integer>() {

        @Override
        public boolean filter(Integer value) throws Exception {
            return false;
        }
    };
    DataStream<Integer> unionFilter = map.union(flatMap).filter(filterFunction);
    unionFilter.addSink(new DiscardingSink<Integer>());
    assertEquals(filterFunction, getFunctionForDataStream(unionFilter));
    try {
        env.getStreamGraph().getStreamEdges(map.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    try {
        env.getStreamGraph().getStreamEdges(flatMap.getId(), unionFilter.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    OutputSelector<Integer> outputSelector = new OutputSelector<Integer>() {

        @Override
        public Iterable<String> select(Integer value) {
            return null;
        }
    };
    SplitStream<Integer> split = unionFilter.split(outputSelector);
    split.select("dummy").addSink(new DiscardingSink<Integer>());
    List<OutputSelector<?>> outputSelectors = env.getStreamGraph().getStreamNode(unionFilter.getId()).getOutputSelectors();
    assertEquals(1, outputSelectors.size());
    assertEquals(outputSelector, outputSelectors.get(0));
    DataStream<Integer> select = split.select("a");
    DataStreamSink<Integer> sink = select.print();
    StreamEdge splitEdge = env.getStreamGraph().getStreamEdges(unionFilter.getId(), sink.getTransformation().getId()).get(0);
    assertEquals("a", splitEdge.getSelectedNames().get(0));
    ConnectedStreams<Integer, Integer> connect = map.connect(flatMap);
    CoMapFunction<Integer, Integer, String> coMapper = new CoMapFunction<Integer, Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map1(Integer value) {
            return null;
        }

        @Override
        public String map2(Integer value) {
            return null;
        }
    };
    DataStream<String> coMap = connect.map(coMapper);
    coMap.addSink(new DiscardingSink<String>());
    assertEquals(coMapper, getFunctionForDataStream(coMap));
    try {
        env.getStreamGraph().getStreamEdges(map.getId(), coMap.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
    try {
        env.getStreamGraph().getStreamEdges(flatMap.getId(), coMap.getId());
    } catch (RuntimeException e) {
        fail(e.getMessage());
    }
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) OutputSelector(org.apache.flink.streaming.api.collector.selector.OutputSelector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 10 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class DataStreamTest method testKeyedProcessTranslation.

/**
	 * Verify that a {@link KeyedStream#process(ProcessFunction)} call is correctly translated to
	 * an operator.
	 */
@Test
public void testKeyedProcessTranslation() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Long> src = env.generateSequence(0, 0);
    ProcessFunction<Long, Integer> processFunction = new ProcessFunction<Long, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(Long value, Context ctx, Collector<Integer> out) throws Exception {
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Integer> out) throws Exception {
        }
    };
    DataStream<Integer> processed = src.keyBy(new IdentityKeySelector<Long>()).process(processFunction);
    processed.addSink(new DiscardingSink<Integer>());
    assertEquals(processFunction, getFunctionForDataStream(processed));
    assertTrue(getOperatorForDataStream(processed) instanceof KeyedProcessOperator);
}
Also used : ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) Test(org.junit.Test)

Aggregations

Collector (org.apache.flink.util.Collector)50 Test (org.junit.Test)38 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)20 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)16 Configuration (org.apache.flink.configuration.Configuration)16 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)14 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)11 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)8 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)8 Plan (org.apache.flink.api.common.Plan)7 HashSet (java.util.HashSet)6 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)6 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 Map (java.util.Map)4 TaskInfo (org.apache.flink.api.common.TaskInfo)4 CoGroupFunction (org.apache.flink.api.common.functions.CoGroupFunction)4