use of org.apache.flink.api.common.functions.JoinFunction in project flink by apache.
the class CoGroupJoinITCase method testSelfJoin.
@Test
public void testSelfJoin() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "x", 0));
ctx.collect(Tuple3.of("a", "y", 1));
ctx.collect(Tuple3.of("a", "z", 2));
ctx.collect(Tuple3.of("b", "u", 3));
ctx.collect(Tuple3.of("b", "w", 5));
ctx.collect(Tuple3.of("a", "i", 6));
ctx.collect(Tuple3.of("a", "j", 7));
ctx.collect(Tuple3.of("a", "k", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
source1.join(source1).where(new Tuple3KeyExtractor()).equalTo(new Tuple3KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
@Override
public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
return first + ":" + second;
}
}).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("Self-Join Test");
List<String> expectedResult = Arrays.asList("(a,x,0):(a,x,0)", "(a,x,0):(a,y,1)", "(a,x,0):(a,z,2)", "(a,y,1):(a,x,0)", "(a,y,1):(a,y,1)", "(a,y,1):(a,z,2)", "(a,z,2):(a,x,0)", "(a,z,2):(a,y,1)", "(a,z,2):(a,z,2)", "(b,u,3):(b,u,3)", "(b,u,3):(b,w,5)", "(b,w,5):(b,u,3)", "(b,w,5):(b,w,5)", "(a,i,6):(a,i,6)", "(a,i,6):(a,j,7)", "(a,i,6):(a,k,8)", "(a,j,7):(a,i,6)", "(a,j,7):(a,j,7)", "(a,j,7):(a,k,8)", "(a,k,8):(a,i,6)", "(a,k,8):(a,j,7)", "(a,k,8):(a,k,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.api.common.functions.JoinFunction in project flink by apache.
the class CoGroupConnectedComponentsITCase method testProgram.
// --------------------------------------------------------------------------------------------
// The test program
// --------------------------------------------------------------------------------------------
@Override
protected void testProgram() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");
DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
return new Tuple2<>(value.f0, value.f0);
}
}).name("Assign Vertex Ids");
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);
JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
return new Tuple2<>(second.f1, first.f1);
}
}).name("Join Candidate Id With Neighbor");
CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(new MinIdAndUpdate()).name("min Id and Update");
iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");
env.execute("Workset Connected Components");
}
use of org.apache.flink.api.common.functions.JoinFunction in project flink by apache.
the class CoGroupJoinITCase method testJoin.
@Test
public void testJoin() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "x", 0));
ctx.collect(Tuple3.of("a", "y", 1));
ctx.collect(Tuple3.of("a", "z", 2));
ctx.collect(Tuple3.of("b", "u", 3));
ctx.collect(Tuple3.of("b", "w", 5));
ctx.collect(Tuple3.of("a", "i", 6));
ctx.collect(Tuple3.of("a", "j", 7));
ctx.collect(Tuple3.of("a", "k", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
DataStream<Tuple3<String, String, Integer>> source2 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "u", 0));
ctx.collect(Tuple3.of("a", "w", 1));
ctx.collect(Tuple3.of("b", "i", 3));
ctx.collect(Tuple3.of("b", "k", 5));
ctx.collect(Tuple3.of("a", "x", 6));
ctx.collect(Tuple3.of("a", "z", 8));
// source is finite, so it will have an implicit MAX watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
source1.join(source2).where(new Tuple3KeyExtractor()).equalTo(new Tuple3KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
@Override
public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
return first + ":" + second;
}
}).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("Join Test");
List<String> expectedResult = Arrays.asList("(a,x,0):(a,u,0)", "(a,x,0):(a,w,1)", "(a,y,1):(a,u,0)", "(a,y,1):(a,w,1)", "(a,z,2):(a,u,0)", "(a,z,2):(a,w,1)", "(b,u,3):(b,i,3)", "(b,u,3):(b,k,5)", "(b,w,5):(b,i,3)", "(b,w,5):(b,k,5)", "(a,i,6):(a,x,6)", "(a,i,6):(a,z,8)", "(a,j,7):(a,x,6)", "(a,j,7):(a,z,8)", "(a,k,8):(a,x,6)", "(a,k,8):(a,z,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
Aggregations