Search in sources :

Example 1 with JoinFunction

use of org.apache.flink.api.common.functions.JoinFunction in project flink by apache.

the class CoGroupJoinITCase method testSelfJoin.

@Test
public void testSelfJoin() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(1);
    DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple3.of("a", "x", 0));
            ctx.collect(Tuple3.of("a", "y", 1));
            ctx.collect(Tuple3.of("a", "z", 2));
            ctx.collect(Tuple3.of("b", "u", 3));
            ctx.collect(Tuple3.of("b", "w", 5));
            ctx.collect(Tuple3.of("a", "i", 6));
            ctx.collect(Tuple3.of("a", "j", 7));
            ctx.collect(Tuple3.of("a", "k", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
    source1.join(source1).where(new Tuple3KeyExtractor()).equalTo(new Tuple3KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {

        @Override
        public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
            return first + ":" + second;
        }
    }).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value) throws Exception {
            testResults.add(value);
        }
    });
    env.execute("Self-Join Test");
    List<String> expectedResult = Arrays.asList("(a,x,0):(a,x,0)", "(a,x,0):(a,y,1)", "(a,x,0):(a,z,2)", "(a,y,1):(a,x,0)", "(a,y,1):(a,y,1)", "(a,y,1):(a,z,2)", "(a,z,2):(a,x,0)", "(a,z,2):(a,y,1)", "(a,z,2):(a,z,2)", "(b,u,3):(b,u,3)", "(b,u,3):(b,w,5)", "(b,w,5):(b,u,3)", "(b,w,5):(b,w,5)", "(a,i,6):(a,i,6)", "(a,i,6):(a,j,7)", "(a,i,6):(a,k,8)", "(a,j,7):(a,i,6)", "(a,j,7):(a,j,7)", "(a,j,7):(a,k,8)", "(a,k,8):(a,i,6)", "(a,k,8):(a,j,7)", "(a,k,8):(a,k,8)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) JoinFunction(org.apache.flink.api.common.functions.JoinFunction) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 2 with JoinFunction

use of org.apache.flink.api.common.functions.JoinFunction in project flink by apache.

the class CoGroupConnectedComponentsITCase method testProgram.

// --------------------------------------------------------------------------------------------
//  The test program
// --------------------------------------------------------------------------------------------
@Override
protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");
    DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");
    DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {

        @Override
        public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
            return new Tuple2<>(value.f0, value.f0);
        }
    }).name("Assign Vertex Ids");
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);
    JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

        @Override
        public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
            return new Tuple2<>(second.f1, first.f1);
        }
    }).name("Join Candidate Id With Neighbor");
    CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(new MinIdAndUpdate()).name("min Id and Update");
    iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");
    env.execute("Workset Connected Components");
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JoinFunction(org.apache.flink.api.common.functions.JoinFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction)

Example 3 with JoinFunction

use of org.apache.flink.api.common.functions.JoinFunction in project flink by apache.

the class CoGroupJoinITCase method testJoin.

@Test
public void testJoin() throws Exception {
    testResults = new ArrayList<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(1);
    DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {

        @Override
        public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple3.of("a", "x", 0));
            ctx.collect(Tuple3.of("a", "y", 1));
            ctx.collect(Tuple3.of("a", "z", 2));
            ctx.collect(Tuple3.of("b", "u", 3));
            ctx.collect(Tuple3.of("b", "w", 5));
            ctx.collect(Tuple3.of("a", "i", 6));
            ctx.collect(Tuple3.of("a", "j", 7));
            ctx.collect(Tuple3.of("a", "k", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
    DataStream<Tuple3<String, String, Integer>> source2 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {

        @Override
        public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
            ctx.collect(Tuple3.of("a", "u", 0));
            ctx.collect(Tuple3.of("a", "w", 1));
            ctx.collect(Tuple3.of("b", "i", 3));
            ctx.collect(Tuple3.of("b", "k", 5));
            ctx.collect(Tuple3.of("a", "x", 6));
            ctx.collect(Tuple3.of("a", "z", 8));
        // source is finite, so it will have an implicit MAX watermark when it finishes
        }

        @Override
        public void cancel() {
        }
    }).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
    source1.join(source2).where(new Tuple3KeyExtractor()).equalTo(new Tuple3KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {

        @Override
        public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
            return first + ":" + second;
        }
    }).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value) throws Exception {
            testResults.add(value);
        }
    });
    env.execute("Join Test");
    List<String> expectedResult = Arrays.asList("(a,x,0):(a,u,0)", "(a,x,0):(a,w,1)", "(a,y,1):(a,u,0)", "(a,y,1):(a,w,1)", "(a,z,2):(a,u,0)", "(a,z,2):(a,w,1)", "(b,u,3):(b,i,3)", "(b,u,3):(b,k,5)", "(b,w,5):(b,i,3)", "(b,w,5):(b,k,5)", "(a,i,6):(a,x,6)", "(a,i,6):(a,z,8)", "(a,j,7):(a,x,6)", "(a,j,7):(a,z,8)", "(a,k,8):(a,x,6)", "(a,k,8):(a,z,8)");
    Collections.sort(expectedResult);
    Collections.sort(testResults);
    Assert.assertEquals(expectedResult, testResults);
}
Also used : SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) JoinFunction(org.apache.flink.api.common.functions.JoinFunction) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

JoinFunction (org.apache.flink.api.common.functions.JoinFunction)3 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 SourceFunction (org.apache.flink.streaming.api.functions.source.SourceFunction)2 Test (org.junit.Test)2 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1