Search in sources :

Example 6 with CoGroupFunction

use of org.apache.flink.api.common.functions.CoGroupFunction in project flink by apache.

the class CoGroupJoinITCase method testCoGroup.

@Test
public void testCoGroup() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("a", 2));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("b", 4));
            ctx.collect(Tuple2.of("b", 5));
            ctx.collect(Tuple2.of("a", 6));
            ctx.collect(Tuple2.of("a", 7));
            ctx.collect(Tuple2.of("a", 8));
        // source is finite, so it will have an implicit MAX
        // watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    DataStream<Tuple2<String, Integer>> source2 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {

        @Override
        public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple2.of("a", 0));
            ctx.collect(Tuple2.of("a", 1));
            ctx.collect(Tuple2.of("b", 3));
            ctx.collect(Tuple2.of("c", 6));
            ctx.collect(Tuple2.of("c", 7));
            ctx.collect(Tuple2.of("c", 8));
        // source is finite, so it will have an implicit MAX
        // watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
    source1.coGroup(source2).where(new Tuple2KeyExtractor()).equalTo(new Tuple2KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new CoGroupFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {

        @Override
        public void coGroup(Iterable<Tuple2<String, Integer>> first, Iterable<Tuple2<String, Integer>> second, Collector<String> out) throws Exception {
            StringBuilder result = new StringBuilder();
            result.append("F:");
            for (Tuple2<String, Integer> t : first) {
                result.append(t.toString());
            }
            result.append(" S:");
            for (Tuple2<String, Integer> t : second) {
                result.append(t.toString());
            }
            out.collect(result.toString());
        }
    }).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value) throws Exception {
            testResults.add(value);
        }
    });
    env.execute("CoGroup Test");
    List<String> expectedResult = Arrays.asList("F:(a,0)(a,1)(a,2) S:(a,0)(a,1)", "F:(b,3)(b,4)(b,5) S:(b,3)", "F:(a,6)(a,7)(a,8) S:", "F: S:(c,6)(c,7)(c,8)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 7 with CoGroupFunction

use of org.apache.flink.api.common.functions.CoGroupFunction in project flink by apache.

the class CoGroupedStreamsTest method setUp.

@Before
public void setUp() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    dataStream1 = env.fromElements("a1", "a2", "a3");
    dataStream2 = env.fromElements("a1", "a2");
    keySelector = element -> element;
    tsAssigner = TumblingEventTimeWindows.of(Time.milliseconds(1L));
    coGroupFunction = (CoGroupFunction<String, String, String>) (first, second, out) -> out.collect("");
}
Also used : BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Time(org.apache.flink.streaming.api.windowing.time.Time) KeySelector(org.apache.flink.api.java.functions.KeySelector) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) Test(org.junit.Test) Assert(org.junit.Assert) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Before(org.junit.Before) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Before(org.junit.Before)

Aggregations

CoGroupFunction (org.apache.flink.api.common.functions.CoGroupFunction)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 Test (org.junit.Test)5 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)4 Collector (org.apache.flink.util.Collector)4 Plan (org.apache.flink.api.common.Plan)3 CoGroupOperatorBase (org.apache.flink.api.common.operators.base.CoGroupOperatorBase)2 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)1 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)1 Keys (org.apache.flink.api.common.operators.Keys)1 IncompatibleKeysException (org.apache.flink.api.common.operators.Keys.IncompatibleKeysException)1 Order (org.apache.flink.api.common.operators.Order)1 CoGroupRawOperatorBase (org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase)1 JoinOperatorBase (org.apache.flink.api.common.operators.base.JoinOperatorBase)1 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)1 Types (org.apache.flink.api.common.typeinfo.Types)1 DataSet (org.apache.flink.api.java.DataSet)1