use of org.apache.flink.util.OutputTag in project BigDataSourceCode by baolibin.
the class OperatorChain method createStreamOutput.
private RecordWriterOutput<OUT> createStreamOutput(RecordWriter<SerializationDelegate<StreamRecord<OUT>>> recordWriter, StreamEdge edge, StreamConfig upStreamConfig, Environment taskEnvironment) {
// OutputTag, return null if not sideOutput
OutputTag sideOutputTag = edge.getOutputTag();
TypeSerializer outSerializer = null;
if (edge.getOutputTag() != null) {
// side output
outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserClassLoader());
} else {
// main output
outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader());
}
return new RecordWriterOutput<>(recordWriter, outSerializer, sideOutputTag, this);
}
use of org.apache.flink.util.OutputTag in project flink-ml by apache.
the class IterationConstructionTest method testReplayedIteration.
@Test
public void testReplayedIteration() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> variableSource = env.addSource(new DraftExecutionEnvironment.EmptySource<Integer>() {
}).setParallelism(2).name("Variable");
DataStream<Integer> constantSource = env.addSource(new DraftExecutionEnvironment.EmptySource<Integer>() {
}).setParallelism(3).name("Constant");
DataStreamList result = Iterations.iterateBoundedStreamsUntilTermination(DataStreamList.of(variableSource), ReplayableDataStreamList.replay(constantSource), IterationConfig.newBuilder().setOperatorLifeCycle(IterationConfig.OperatorLifeCycle.PER_ROUND).build(), (variableStreams, dataStreams) -> {
SingleOutputStreamOperator<Integer> processor = variableStreams.<Integer>get(0).connect(dataStreams.<Integer>get(0)).process(new CoProcessFunction<Integer, Integer, Integer>() {
@Override
public void processElement1(Integer value, Context ctx, Collector<Integer> out) throws Exception {
}
@Override
public void processElement2(Integer value, Context ctx, Collector<Integer> out) throws Exception {
}
}).name("Processor").setParallelism(4);
return new IterationBodyResult(DataStreamList.of(processor.map(x -> x).name("Feedback").setParallelism(2)), DataStreamList.of(processor.getSideOutput(new OutputTag<Integer>("output") {
})), processor.map(x -> x).name("Termination").setParallelism(5));
});
result.get(0).addSink(new DiscardingSink<>()).name("Sink").setParallelism(4);
List<String> expectedVertexNames = Arrays.asList(/* 0 */
"Source: Variable -> input-Variable", /* 1 */
"Source: Constant -> input-Constant", /* 2 */
"Source: Termination -> input-Termination", /* 3 */
"head-Variable", /* 4 */
"Replayer-Constant", /* 5 */
"Processor", /* 6 */
"Feedback", /* 7 */
"tail-Feedback -> filter-tail", /* 8 */
"Termination", /* 9 */
"head-Termination", /* 10 */
"criteria-merge", /* 11 */
"tail-criteria-merge -> filter-tail", /* 12 */
"tail-map-SideOutput", /* 13 */
"output-SideOutput -> Sink: Sink");
List<Integer> expectedParallelisms = Arrays.asList(2, 3, 5, 2, 3, 4, 2, 2, 5, 5, 5, 5, 1, 4);
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
assertEquals(expectedVertexNames, vertices.stream().map(JobVertex::getName).collect(Collectors.toList()));
assertEquals(expectedParallelisms, vertices.stream().map(JobVertex::getParallelism).collect(Collectors.toList()));
assertNotNull(vertices.get(3).getCoLocationGroup());
assertNotNull(vertices.get(9).getCoLocationGroup());
assertSame(vertices.get(3).getCoLocationGroup(), vertices.get(7).getCoLocationGroup());
assertSame(vertices.get(9).getCoLocationGroup(), vertices.get(11).getCoLocationGroup());
}
use of org.apache.flink.util.OutputTag in project flink-ml by apache.
the class BroadcastOutputTest method testBroadcastWithMixedOutputWithSideOutput.
@Test
public void testBroadcastWithMixedOutputWithSideOutput() throws Exception {
StreamExecutionEnvironment env = createTestEnvironment();
SingleOutputStreamOperator<Integer> dataStream = env.addSource(new TestSource()).transform("broadcast", TypeInformation.of(Integer.class), new TestBroadcastOperator());
dataStream.addSink(new CheckResultSink());
dataStream.getSideOutput(new OutputTag<Integer>("0") {
}).addSink(new CheckResultSink());
dataStream.getSideOutput(new OutputTag<Integer>("1") {
}).addSink(new CheckResultSink()).setParallelism(2);
dataStream.getSideOutput(new OutputTag<Integer>("2") {
}).addSink(new CheckResultSink()).setParallelism(4);
env.execute();
}
use of org.apache.flink.util.OutputTag in project flink-ml by apache.
the class BoundedAllRoundStreamIterationITCase method createVariableOnlyJobGraph.
private static JobGraph createVariableOnlyJobGraph(int numSources, int numRecordsPerSource, boolean holdSource, int period, boolean sync, int maxRound, @Nullable Integer terminationCriteriaRound, boolean terminationCriteriaFollowsConstantsStreams, SharedReference<BlockingQueue<OutputRecord<Integer>>> result) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStream<EpochRecord> variableSource = env.addSource(new DraftExecutionEnvironment.EmptySource<EpochRecord>() {
}).setParallelism(numSources).name("Variable");
DataStream<EpochRecord> constSource = env.addSource(new SequenceSource(numRecordsPerSource, holdSource, period)).setParallelism(numSources).name("Constant");
DataStreamList outputs = Iterations.iterateBoundedStreamsUntilTermination(DataStreamList.of(variableSource), ReplayableDataStreamList.notReplay(constSource), IterationConfig.newBuilder().build(), (variableStreams, dataStreams) -> {
SingleOutputStreamOperator<EpochRecord> reducer = variableStreams.<EpochRecord>get(0).connect(dataStreams.<EpochRecord>get(0)).process(new TwoInputReduceAllRoundProcessFunction(sync, maxRound));
return new IterationBodyResult(DataStreamList.of(reducer.partitionCustom((k, numPartitions) -> k % numPartitions, EpochRecord::getValue).map(x -> x).keyBy(EpochRecord::getValue).process(new StatefulProcessFunction<EpochRecord>() {
}).setParallelism(4).map(new IncrementEpochMap()).setParallelism(numSources)), DataStreamList.of(reducer.getSideOutput(new OutputTag<OutputRecord<Integer>>("output") {
})), terminationCriteriaRound == null ? null : (terminationCriteriaFollowsConstantsStreams ? dataStreams.<EpochRecord>get(0) : reducer).flatMap(new TerminateOnMaxIter(terminationCriteriaRound)));
});
outputs.<OutputRecord<Integer>>get(0).addSink(new CollectSink(result));
return env.getStreamGraph().getJobGraph();
}
use of org.apache.flink.util.OutputTag in project flink by apache.
the class OperatorChain method createStreamOutput.
private <T> RecordWriterOutput<T> createStreamOutput(StreamEdge edge, StreamConfig upStreamConfig, int outputIndex, Environment taskEnvironment, String taskName) {
// OutputTag, return null if not sideOutput
OutputTag sideOutputTag = edge.getOutputTag();
TypeSerializer outSerializer = null;
if (edge.getOutputTag() != null) {
// side output
outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserClassLoader());
} else {
// main output
outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader());
}
@SuppressWarnings("unchecked") StreamPartitioner<T> outputPartitioner = (StreamPartitioner<T>) edge.getPartitioner();
LOG.debug("Using partitioner {} for output {} of task ", outputPartitioner, outputIndex, taskName);
ResultPartitionWriter bufferWriter = taskEnvironment.getWriter(outputIndex);
// we initialize the partitioner here with the number of key groups (aka max. parallelism)
if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
if (0 < numKeyGroups) {
((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
}
}
StreamRecordWriter<SerializationDelegate<StreamRecord<T>>> output = new StreamRecordWriter<>(bufferWriter, outputPartitioner, upStreamConfig.getBufferTimeout());
output.setMetricGroup(taskEnvironment.getMetricGroup().getIOMetricGroup());
return new RecordWriterOutput<>(output, outSerializer, sideOutputTag, this);
}
Aggregations