use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.
the class CoGroupJoinITCase method testCoGroup.
@Test
public void testCoGroup() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStream<Tuple2<String, Integer>> source1 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("a", 2));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("b", 4));
ctx.collect(Tuple2.of("b", 5));
ctx.collect(Tuple2.of("a", 6));
ctx.collect(Tuple2.of("a", 7));
ctx.collect(Tuple2.of("a", 8));
// source is finite, so it will have an implicit MAX
// watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
DataStream<Tuple2<String, Integer>> source2 = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
ctx.collect(Tuple2.of("a", 0));
ctx.collect(Tuple2.of("a", 1));
ctx.collect(Tuple2.of("b", 3));
ctx.collect(Tuple2.of("c", 6));
ctx.collect(Tuple2.of("c", 7));
ctx.collect(Tuple2.of("c", 8));
// source is finite, so it will have an implicit MAX
// watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple2TimestampExtractor());
source1.coGroup(source2).where(new Tuple2KeyExtractor()).equalTo(new Tuple2KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new CoGroupFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() {
@Override
public void coGroup(Iterable<Tuple2<String, Integer>> first, Iterable<Tuple2<String, Integer>> second, Collector<String> out) throws Exception {
StringBuilder result = new StringBuilder();
result.append("F:");
for (Tuple2<String, Integer> t : first) {
result.append(t.toString());
}
result.append(" S:");
for (Tuple2<String, Integer> t : second) {
result.append(t.toString());
}
out.collect(result.toString());
}
}).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("CoGroup Test");
List<String> expectedResult = Arrays.asList("F:(a,0)(a,1)(a,2) S:(a,0)(a,1)", "F:(b,3)(b,4)(b,5) S:(b,3)", "F:(a,6)(a,7)(a,8) S:", "F: S:(c,6)(c,7)(c,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.
the class CoGroupJoinITCase method testSelfJoin.
@Test
public void testSelfJoin() throws Exception {
testResults = new ArrayList<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStream<Tuple3<String, String, Integer>> source1 = env.addSource(new SourceFunction<Tuple3<String, String, Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Tuple3<String, String, Integer>> ctx) throws Exception {
ctx.collect(Tuple3.of("a", "x", 0));
ctx.collect(Tuple3.of("a", "y", 1));
ctx.collect(Tuple3.of("a", "z", 2));
ctx.collect(Tuple3.of("b", "u", 3));
ctx.collect(Tuple3.of("b", "w", 5));
ctx.collect(Tuple3.of("a", "i", 6));
ctx.collect(Tuple3.of("a", "j", 7));
ctx.collect(Tuple3.of("a", "k", 8));
// source is finite, so it will have an implicit MAX
// watermark when it finishes
}
@Override
public void cancel() {
}
}).assignTimestampsAndWatermarks(new Tuple3TimestampExtractor());
source1.join(source1).where(new Tuple3KeyExtractor()).equalTo(new Tuple3KeyExtractor()).window(TumblingEventTimeWindows.of(Time.of(3, TimeUnit.MILLISECONDS))).apply(new JoinFunction<Tuple3<String, String, Integer>, Tuple3<String, String, Integer>, String>() {
@Override
public String join(Tuple3<String, String, Integer> first, Tuple3<String, String, Integer> second) throws Exception {
return first + ":" + second;
}
}).addSink(new SinkFunction<String>() {
@Override
public void invoke(String value) throws Exception {
testResults.add(value);
}
});
env.execute("Self-Join Test");
List<String> expectedResult = Arrays.asList("(a,x,0):(a,x,0)", "(a,x,0):(a,y,1)", "(a,x,0):(a,z,2)", "(a,y,1):(a,x,0)", "(a,y,1):(a,y,1)", "(a,y,1):(a,z,2)", "(a,z,2):(a,x,0)", "(a,z,2):(a,y,1)", "(a,z,2):(a,z,2)", "(b,u,3):(b,u,3)", "(b,u,3):(b,w,5)", "(b,w,5):(b,u,3)", "(b,w,5):(b,w,5)", "(a,i,6):(a,i,6)", "(a,i,6):(a,j,7)", "(a,i,6):(a,k,8)", "(a,j,7):(a,i,6)", "(a,j,7):(a,j,7)", "(a,j,7):(a,k,8)", "(a,k,8):(a,i,6)", "(a,k,8):(a,j,7)", "(a,k,8):(a,k,8)");
Collections.sort(expectedResult);
Collections.sort(testResults);
Assert.assertEquals(expectedResult, testResults);
}
use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.
the class CheckpointExceptionHandlerConfigurationTest method doTestPropagationFromCheckpointConfig.
public void doTestPropagationFromCheckpointConfig(boolean failTaskOnCheckpointErrors) {
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
streamExecutionEnvironment.setParallelism(1);
streamExecutionEnvironment.getCheckpointConfig().setCheckpointInterval(1000);
streamExecutionEnvironment.getCheckpointConfig().setFailOnCheckpointingErrors(failTaskOnCheckpointErrors);
streamExecutionEnvironment.addSource(new SourceFunction<Integer>() {
@Override
public void run(SourceContext<Integer> ctx) {
}
@Override
public void cancel() {
}
}).addSink(new DiscardingSink<>());
}
use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.
the class StreamExecutionEnvironmentTest method testParallelismBounds.
@Test
public void testParallelismBounds() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
}
@Override
public void cancel() {
}
};
SingleOutputStreamOperator<Object> operator = env.addSource(srcFun).flatMap(new FlatMapFunction<Integer, Object>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Integer value, Collector<Object> out) throws Exception {
}
});
// default value for max parallelism
Assert.assertEquals(-1, operator.getTransformation().getMaxParallelism());
// bounds for parallelism 1
try {
operator.setParallelism(0);
Assert.fail();
} catch (IllegalArgumentException expected) {
}
// bounds for parallelism 2
operator.setParallelism(1);
Assert.assertEquals(1, operator.getParallelism());
// bounds for parallelism 3
operator.setParallelism(1 << 15);
Assert.assertEquals(1 << 15, operator.getParallelism());
// default value after generating
env.getStreamGraph(false).getJobGraph();
Assert.assertEquals(-1, operator.getTransformation().getMaxParallelism());
// configured value after generating
env.setMaxParallelism(42);
env.getStreamGraph(false).getJobGraph();
Assert.assertEquals(42, operator.getTransformation().getMaxParallelism());
// bounds configured parallelism 1
try {
env.setMaxParallelism(0);
Assert.fail();
} catch (IllegalArgumentException expected) {
}
// bounds configured parallelism 2
try {
env.setMaxParallelism(1 + (1 << 15));
Assert.fail();
} catch (IllegalArgumentException expected) {
}
// bounds for max parallelism 1
try {
operator.setMaxParallelism(0);
Assert.fail();
} catch (IllegalArgumentException expected) {
}
// bounds for max parallelism 2
try {
operator.setMaxParallelism(1 + (1 << 15));
Assert.fail();
} catch (IllegalArgumentException expected) {
}
// bounds for max parallelism 3
operator.setMaxParallelism(1);
Assert.assertEquals(1, operator.getTransformation().getMaxParallelism());
// bounds for max parallelism 4
operator.setMaxParallelism(1 << 15);
Assert.assertEquals(1 << 15, operator.getTransformation().getMaxParallelism());
// override config
env.getStreamGraph(false).getJobGraph();
Assert.assertEquals(1 << 15, operator.getTransformation().getMaxParallelism());
}
use of org.apache.flink.streaming.api.functions.source.SourceFunction in project flink by apache.
the class StreamExecutionEnvironmentTest method testSources.
@Test
public void testSources() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
}
@Override
public void cancel() {
}
};
DataStreamSource<Integer> src1 = env.addSource(srcFun);
src1.addSink(new DiscardingSink<Integer>());
assertEquals(srcFun, getFunctionFromDataSource(src1));
List<Long> list = Arrays.asList(0L, 1L, 2L);
DataStreamSource<Long> src2 = env.generateSequence(0, 2);
assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);
DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);
DataStreamSource<Long> src4 = env.fromCollection(list);
assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}
Aggregations