use of org.apache.flink.util.OutputTag in project flink by splunk.
the class SideOutputITCase method testLegacyKeyedCoProcessFunctionSideOutputWithMultipleConsumers.
/**
* Test keyed CoProcessFunction side output with multiple consumers.
*/
@Test
public void testLegacyKeyedCoProcessFunctionSideOutputWithMultipleConsumers() throws Exception {
final OutputTag<String> sideOutputTag1 = new OutputTag<String>("side1") {
};
final OutputTag<String> sideOutputTag2 = new OutputTag<String>("side2") {
};
TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
TestListResultSink<String> sideOutputResultSink2 = new TestListResultSink<>();
TestListResultSink<Integer> resultSink = new TestListResultSink<>();
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(3);
DataStream<Integer> ds1 = see.fromCollection(elements);
DataStream<Integer> ds2 = see.fromCollection(elements);
SingleOutputStreamOperator<Integer> passThroughtStream = ds1.keyBy(i -> i).connect(ds2.keyBy(i -> i)).process(new CoProcessFunction<Integer, Integer, Integer>() {
@Override
public void processElement1(Integer value, Context ctx, Collector<Integer> out) throws Exception {
if (value < 4) {
out.collect(value);
ctx.output(sideOutputTag1, "sideout1-" + String.valueOf(value));
}
}
@Override
public void processElement2(Integer value, Context ctx, Collector<Integer> out) throws Exception {
if (value >= 4) {
out.collect(value);
ctx.output(sideOutputTag2, "sideout2-" + String.valueOf(value));
}
}
});
passThroughtStream.getSideOutput(sideOutputTag1).addSink(sideOutputResultSink1);
passThroughtStream.getSideOutput(sideOutputTag2).addSink(sideOutputResultSink2);
passThroughtStream.addSink(resultSink);
see.execute();
assertEquals(Arrays.asList("sideout1-1", "sideout1-2", "sideout1-3"), sideOutputResultSink1.getSortedResult());
assertEquals(Arrays.asList("sideout2-4", "sideout2-5"), sideOutputResultSink2.getSortedResult());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult());
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class SideOutputITCase method testSideOutputWithMultipleConsumersWithObjectReuse.
@Test
public void testSideOutputWithMultipleConsumersWithObjectReuse() throws Exception {
final OutputTag<String> sideOutputTag = new OutputTag<String>("side") {
};
TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
TestListResultSink<String> sideOutputResultSink2 = new TestListResultSink<>();
TestListResultSink<Integer> resultSink = new TestListResultSink<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
env.setParallelism(3);
DataStream<Integer> dataStream = env.fromCollection(elements);
SingleOutputStreamOperator<Integer> passThroughtStream = dataStream.process(new ProcessFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
out.collect(value);
ctx.output(sideOutputTag, "sideout-" + String.valueOf(value));
}
});
passThroughtStream.getSideOutput(sideOutputTag).addSink(sideOutputResultSink1);
passThroughtStream.getSideOutput(sideOutputTag).addSink(sideOutputResultSink2);
passThroughtStream.addSink(resultSink);
env.execute();
assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-3", "sideout-4", "sideout-5"), sideOutputResultSink1.getSortedResult());
assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-3", "sideout-4", "sideout-5"), sideOutputResultSink2.getSortedResult());
assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult());
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class SideOutputITCase method testProcessFunctionSideOutputWithWrongTag.
/**
* Test ProcessFunction side outputs with wrong {@code OutputTag}.
*/
@Test
public void testProcessFunctionSideOutputWithWrongTag() throws Exception {
final OutputTag<String> sideOutputTag1 = new OutputTag<String>("side") {
};
final OutputTag<String> sideOutputTag2 = new OutputTag<String>("other-side") {
};
TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(3);
DataStream<Integer> dataStream = see.fromCollection(elements);
dataStream.process(new ProcessFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
out.collect(value);
ctx.output(sideOutputTag2, "sideout-" + String.valueOf(value));
}
}).getSideOutput(sideOutputTag1).addSink(sideOutputResultSink);
see.execute();
assertEquals(Arrays.asList(), sideOutputResultSink.getSortedResult());
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class SideOutputITCase method testWatermarkForwarding.
/**
* Verify that watermarks are forwarded to all side outputs.
*/
@Test
public void testWatermarkForwarding() throws Exception {
final OutputTag<String> sideOutputTag1 = new OutputTag<String>("side") {
};
final OutputTag<String> sideOutputTag2 = new OutputTag<String>("other-side") {
};
TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
TestListResultSink<String> sideOutputResultSink2 = new TestListResultSink<>();
TestListResultSink<String> resultSink = new TestListResultSink<>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
DataStream<Integer> dataStream = env.addSource(new SourceFunction<Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void run(SourceContext<Integer> ctx) throws Exception {
ctx.collectWithTimestamp(1, 0);
ctx.emitWatermark(new Watermark(0));
ctx.collectWithTimestamp(2, 1);
ctx.collectWithTimestamp(5, 2);
ctx.emitWatermark(new Watermark(2));
ctx.collectWithTimestamp(3, 3);
ctx.collectWithTimestamp(4, 4);
}
@Override
public void cancel() {
}
});
SingleOutputStreamOperator<Integer> passThroughtStream = dataStream.process(new ProcessFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
out.collect(value);
ctx.output(sideOutputTag1, "sideout-" + String.valueOf(value));
}
});
class WatermarkReifier extends AbstractStreamOperator<String> implements OneInputStreamOperator<String, String> {
private static final long serialVersionUID = 1L;
@Override
public void processElement(StreamRecord<String> element) throws Exception {
output.collect(new StreamRecord<>("E:" + element.getValue()));
}
@Override
public void processWatermark(Watermark mark) throws Exception {
super.processWatermark(mark);
output.collect(new StreamRecord<>("WM:" + mark.getTimestamp()));
}
}
passThroughtStream.getSideOutput(sideOutputTag1).transform("ReifyWatermarks", BasicTypeInfo.STRING_TYPE_INFO, new WatermarkReifier()).addSink(sideOutputResultSink1);
passThroughtStream.getSideOutput(sideOutputTag2).transform("ReifyWatermarks", BasicTypeInfo.STRING_TYPE_INFO, new WatermarkReifier()).addSink(sideOutputResultSink2);
passThroughtStream.map(new MapFunction<Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public String map(Integer value) throws Exception {
return value.toString();
}
}).transform("ReifyWatermarks", BasicTypeInfo.STRING_TYPE_INFO, new WatermarkReifier()).addSink(resultSink);
env.execute();
assertEquals(Arrays.asList("E:sideout-1", "E:sideout-2", "E:sideout-3", "E:sideout-4", "E:sideout-5", "WM:0", "WM:0", "WM:0", "WM:2", "WM:2", "WM:2", "WM:" + Long.MAX_VALUE, "WM:" + Long.MAX_VALUE, "WM:" + Long.MAX_VALUE), sideOutputResultSink1.getSortedResult());
assertEquals(Arrays.asList("E:sideout-1", "E:sideout-2", "E:sideout-3", "E:sideout-4", "E:sideout-5", "WM:0", "WM:0", "WM:0", "WM:2", "WM:2", "WM:2", "WM:" + Long.MAX_VALUE, "WM:" + Long.MAX_VALUE, "WM:" + Long.MAX_VALUE), sideOutputResultSink1.getSortedResult());
assertEquals(Arrays.asList("E:1", "E:2", "E:3", "E:4", "E:5", "WM:0", "WM:0", "WM:0", "WM:2", "WM:2", "WM:2", "WM:" + Long.MAX_VALUE, "WM:" + Long.MAX_VALUE, "WM:" + Long.MAX_VALUE), resultSink.getSortedResult());
}
use of org.apache.flink.util.OutputTag in project flink by splunk.
the class IterateITCase method testmultipleHeadsTailsSimple.
@Test
public void testmultipleHeadsTailsSimple() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> source1 = env.fromElements(1, 2, 3, 4, 5).shuffle().map(noOpIntMap).name("ParallelizeMapShuffle");
DataStream<Integer> source2 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap).name("ParallelizeMapRebalance");
IterativeStream<Integer> iter1 = source1.union(source2).iterate();
DataStream<Integer> head1 = iter1.map(noOpIntMap).name("IterRebalanceMap").setParallelism(parallelism / 2);
DataStream<Integer> head2 = iter1.map(noOpIntMap).name("IterForwardMap");
DataStreamSink<Integer> head3 = iter1.map(noOpIntMap).setParallelism(parallelism / 2).addSink(new ReceiveCheckNoOpSink<Integer>());
DataStreamSink<Integer> head4 = iter1.map(noOpIntMap).addSink(new ReceiveCheckNoOpSink<Integer>());
OutputTag<Integer> even = new OutputTag<Integer>("even") {
};
OutputTag<Integer> odd = new OutputTag<Integer>("odd") {
};
SingleOutputStreamOperator<Object> source3 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap).name("EvenOddSourceMap").process(new ProcessFunction<Integer, Object>() {
@Override
public void processElement(Integer value, Context ctx, Collector<Object> out) throws Exception {
if (value % 2 == 0) {
ctx.output(even, value);
} else {
ctx.output(odd, value);
}
}
});
iter1.closeWith(source3.getSideOutput(even).union(head1.rebalance().map(noOpIntMap).broadcast(), head2.shuffle()));
StreamGraph graph = env.getStreamGraph();
JobGraph jg = graph.getJobGraph();
assertEquals(1, graph.getIterationSourceSinkPairs().size());
Tuple2<StreamNode, StreamNode> sourceSinkPair = graph.getIterationSourceSinkPairs().iterator().next();
StreamNode itSource = sourceSinkPair.f0;
StreamNode itSink = sourceSinkPair.f1;
assertEquals(4, itSource.getOutEdges().size());
assertEquals(3, itSink.getInEdges().size());
assertEquals(itSource.getParallelism(), itSink.getParallelism());
for (StreamEdge edge : itSource.getOutEdges()) {
if (graph.getTargetVertex(edge).getOperatorName().equals("IterRebalanceMap")) {
assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
} else if (graph.getTargetVertex(edge).getOperatorName().equals("IterForwardMap")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
}
}
for (StreamEdge edge : itSink.getInEdges()) {
if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapShuffle")) {
assertTrue(edge.getPartitioner() instanceof ShufflePartitioner);
}
if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapForward")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
}
if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("EvenOddSourceMap")) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
}
}
// Test co-location
JobVertex itSource1 = null;
JobVertex itSink1 = null;
for (JobVertex vertex : jg.getVertices()) {
if (vertex.getName().contains("IterationSource")) {
itSource1 = vertex;
} else if (vertex.getName().contains("IterationSink")) {
itSink1 = vertex;
}
}
assertTrue(itSource1.getCoLocationGroup() != null);
assertEquals(itSource1.getCoLocationGroup(), itSink1.getCoLocationGroup());
}
Aggregations