Search in sources :

Example 51 with OutputTag

use of org.apache.flink.util.OutputTag in project BigDataSourceCode by baolibin.

the class OperatorChain method createStreamOutput.

private RecordWriterOutput<OUT> createStreamOutput(RecordWriter<SerializationDelegate<StreamRecord<OUT>>> recordWriter, StreamEdge edge, StreamConfig upStreamConfig, Environment taskEnvironment) {
    // OutputTag, return null if not sideOutput
    OutputTag sideOutputTag = edge.getOutputTag();
    TypeSerializer outSerializer = null;
    if (edge.getOutputTag() != null) {
        // side output
        outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserClassLoader());
    } else {
        // main output
        outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader());
    }
    return new RecordWriterOutput<>(recordWriter, outSerializer, sideOutputTag, this);
}
Also used : TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) OutputTag(org.apache.flink.util.OutputTag) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput)

Example 52 with OutputTag

use of org.apache.flink.util.OutputTag in project flink-ml by apache.

the class IterationConstructionTest method testReplayedIteration.

@Test
public void testReplayedIteration() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> variableSource = env.addSource(new DraftExecutionEnvironment.EmptySource<Integer>() {
    }).setParallelism(2).name("Variable");
    DataStream<Integer> constantSource = env.addSource(new DraftExecutionEnvironment.EmptySource<Integer>() {
    }).setParallelism(3).name("Constant");
    DataStreamList result = Iterations.iterateBoundedStreamsUntilTermination(DataStreamList.of(variableSource), ReplayableDataStreamList.replay(constantSource), IterationConfig.newBuilder().setOperatorLifeCycle(IterationConfig.OperatorLifeCycle.PER_ROUND).build(), (variableStreams, dataStreams) -> {
        SingleOutputStreamOperator<Integer> processor = variableStreams.<Integer>get(0).connect(dataStreams.<Integer>get(0)).process(new CoProcessFunction<Integer, Integer, Integer>() {

            @Override
            public void processElement1(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            }

            @Override
            public void processElement2(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            }
        }).name("Processor").setParallelism(4);
        return new IterationBodyResult(DataStreamList.of(processor.map(x -> x).name("Feedback").setParallelism(2)), DataStreamList.of(processor.getSideOutput(new OutputTag<Integer>("output") {
        })), processor.map(x -> x).name("Termination").setParallelism(5));
    });
    result.get(0).addSink(new DiscardingSink<>()).name("Sink").setParallelism(4);
    List<String> expectedVertexNames = Arrays.asList(/* 0 */
    "Source: Variable -> input-Variable", /* 1 */
    "Source: Constant -> input-Constant", /* 2 */
    "Source: Termination -> input-Termination", /* 3 */
    "head-Variable", /* 4 */
    "Replayer-Constant", /* 5 */
    "Processor", /* 6 */
    "Feedback", /* 7 */
    "tail-Feedback -> filter-tail", /* 8 */
    "Termination", /* 9 */
    "head-Termination", /* 10 */
    "criteria-merge", /* 11 */
    "tail-criteria-merge -> filter-tail", /* 12 */
    "tail-map-SideOutput", /* 13 */
    "output-SideOutput -> Sink: Sink");
    List<Integer> expectedParallelisms = Arrays.asList(2, 3, 5, 2, 3, 4, 2, 2, 5, 5, 5, 5, 1, 4);
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    assertEquals(expectedVertexNames, vertices.stream().map(JobVertex::getName).collect(Collectors.toList()));
    assertEquals(expectedParallelisms, vertices.stream().map(JobVertex::getParallelism).collect(Collectors.toList()));
    assertNotNull(vertices.get(3).getCoLocationGroup());
    assertNotNull(vertices.get(9).getCoLocationGroup());
    assertSame(vertices.get(3).getCoLocationGroup(), vertices.get(7).getCoLocationGroup());
    assertSame(vertices.get(9).getCoLocationGroup(), vertices.get(11).getCoLocationGroup());
}
Also used : Arrays(java.util.Arrays) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) CoProcessFunction(org.apache.flink.streaming.api.functions.co.CoProcessFunction) OutputTag(org.apache.flink.util.OutputTag) Test(org.junit.Test) Collectors(java.util.stream.Collectors) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Assert.assertSame(org.junit.Assert.assertSame) List(java.util.List) DraftExecutionEnvironment(org.apache.flink.iteration.compile.DraftExecutionEnvironment) Collector(org.apache.flink.util.Collector) TestLogger(org.apache.flink.util.TestLogger) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) DraftExecutionEnvironment(org.apache.flink.iteration.compile.DraftExecutionEnvironment) Test(org.junit.Test)

Example 53 with OutputTag

use of org.apache.flink.util.OutputTag in project flink-ml by apache.

the class BroadcastOutputTest method testBroadcastWithMixedOutputWithSideOutput.

@Test
public void testBroadcastWithMixedOutputWithSideOutput() throws Exception {
    StreamExecutionEnvironment env = createTestEnvironment();
    SingleOutputStreamOperator<Integer> dataStream = env.addSource(new TestSource()).transform("broadcast", TypeInformation.of(Integer.class), new TestBroadcastOperator());
    dataStream.addSink(new CheckResultSink());
    dataStream.getSideOutput(new OutputTag<Integer>("0") {
    }).addSink(new CheckResultSink());
    dataStream.getSideOutput(new OutputTag<Integer>("1") {
    }).addSink(new CheckResultSink()).setParallelism(2);
    dataStream.getSideOutput(new OutputTag<Integer>("2") {
    }).addSink(new CheckResultSink()).setParallelism(4);
    env.execute();
}
Also used : OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 54 with OutputTag

use of org.apache.flink.util.OutputTag in project flink-ml by apache.

the class BoundedAllRoundStreamIterationITCase method createVariableOnlyJobGraph.

private static JobGraph createVariableOnlyJobGraph(int numSources, int numRecordsPerSource, boolean holdSource, int period, boolean sync, int maxRound, @Nullable Integer terminationCriteriaRound, boolean terminationCriteriaFollowsConstantsStreams, SharedReference<BlockingQueue<OutputRecord<Integer>>> result) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<EpochRecord> variableSource = env.addSource(new DraftExecutionEnvironment.EmptySource<EpochRecord>() {
    }).setParallelism(numSources).name("Variable");
    DataStream<EpochRecord> constSource = env.addSource(new SequenceSource(numRecordsPerSource, holdSource, period)).setParallelism(numSources).name("Constant");
    DataStreamList outputs = Iterations.iterateBoundedStreamsUntilTermination(DataStreamList.of(variableSource), ReplayableDataStreamList.notReplay(constSource), IterationConfig.newBuilder().build(), (variableStreams, dataStreams) -> {
        SingleOutputStreamOperator<EpochRecord> reducer = variableStreams.<EpochRecord>get(0).connect(dataStreams.<EpochRecord>get(0)).process(new TwoInputReduceAllRoundProcessFunction(sync, maxRound));
        return new IterationBodyResult(DataStreamList.of(reducer.partitionCustom((k, numPartitions) -> k % numPartitions, EpochRecord::getValue).map(x -> x).keyBy(EpochRecord::getValue).process(new StatefulProcessFunction<EpochRecord>() {
        }).setParallelism(4).map(new IncrementEpochMap()).setParallelism(numSources)), DataStreamList.of(reducer.getSideOutput(new OutputTag<OutputRecord<Integer>>("output") {
        })), terminationCriteriaRound == null ? null : (terminationCriteriaFollowsConstantsStreams ? dataStreams.<EpochRecord>get(0) : reducer).flatMap(new TerminateOnMaxIter(terminationCriteriaRound)));
    });
    outputs.<OutputRecord<Integer>>get(0).addSink(new CollectSink(result));
    return env.getStreamGraph().getJobGraph();
}
Also used : UnboundedStreamIterationITCase.verifyResult(org.apache.flink.test.iteration.UnboundedStreamIterationITCase.verifyResult) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SharedObjects(org.apache.flink.testutils.junit.SharedObjects) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SequenceSource(org.apache.flink.test.iteration.operators.SequenceSource) DataStreamList(org.apache.flink.iteration.DataStreamList) EpochRecord(org.apache.flink.test.iteration.operators.EpochRecord) After(org.junit.After) Map(java.util.Map) CollectSink(org.apache.flink.test.iteration.operators.CollectSink) TestLogger(org.apache.flink.util.TestLogger) ReplayableDataStreamList(org.apache.flink.iteration.ReplayableDataStreamList) MiniCluster(org.apache.flink.runtime.minicluster.MiniCluster) TwoInputReduceAllRoundProcessFunction(org.apache.flink.test.iteration.operators.TwoInputReduceAllRoundProcessFunction) SharedReference(org.apache.flink.testutils.junit.SharedReference) Nullable(javax.annotation.Nullable) UnboundedStreamIterationITCase.computeRoundStat(org.apache.flink.test.iteration.UnboundedStreamIterationITCase.computeRoundStat) Before(org.junit.Before) UnboundedStreamIterationITCase.createMiniClusterConfiguration(org.apache.flink.test.iteration.UnboundedStreamIterationITCase.createMiniClusterConfiguration) Iterations(org.apache.flink.iteration.Iterations) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) IterationBodyResult(org.apache.flink.iteration.IterationBodyResult) OutputTag(org.apache.flink.util.OutputTag) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) IncrementEpochMap(org.apache.flink.test.iteration.operators.IncrementEpochMap) TerminateOnMaxIter(org.apache.flink.ml.common.iteration.TerminateOnMaxIter) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) DataStream(org.apache.flink.streaming.api.datastream.DataStream) IterationBody(org.apache.flink.iteration.IterationBody) DraftExecutionEnvironment(org.apache.flink.iteration.compile.DraftExecutionEnvironment) Rule(org.junit.Rule) OutputRecord(org.apache.flink.test.iteration.operators.OutputRecord) IterationConfig(org.apache.flink.iteration.IterationConfig) StatefulProcessFunction(org.apache.flink.test.iteration.operators.StatefulProcessFunction) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CollectSink(org.apache.flink.test.iteration.operators.CollectSink) EpochRecord(org.apache.flink.test.iteration.operators.EpochRecord) DataStreamList(org.apache.flink.iteration.DataStreamList) ReplayableDataStreamList(org.apache.flink.iteration.ReplayableDataStreamList) OutputRecord(org.apache.flink.test.iteration.operators.OutputRecord) StatefulProcessFunction(org.apache.flink.test.iteration.operators.StatefulProcessFunction) TerminateOnMaxIter(org.apache.flink.ml.common.iteration.TerminateOnMaxIter) TwoInputReduceAllRoundProcessFunction(org.apache.flink.test.iteration.operators.TwoInputReduceAllRoundProcessFunction) SequenceSource(org.apache.flink.test.iteration.operators.SequenceSource) IncrementEpochMap(org.apache.flink.test.iteration.operators.IncrementEpochMap) IterationBodyResult(org.apache.flink.iteration.IterationBodyResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) DraftExecutionEnvironment(org.apache.flink.iteration.compile.DraftExecutionEnvironment)

Example 55 with OutputTag

use of org.apache.flink.util.OutputTag in project flink by apache.

the class OperatorChain method createStreamOutput.

private <T> RecordWriterOutput<T> createStreamOutput(StreamEdge edge, StreamConfig upStreamConfig, int outputIndex, Environment taskEnvironment, String taskName) {
    // OutputTag, return null if not sideOutput
    OutputTag sideOutputTag = edge.getOutputTag();
    TypeSerializer outSerializer = null;
    if (edge.getOutputTag() != null) {
        // side output
        outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserClassLoader());
    } else {
        // main output
        outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader());
    }
    @SuppressWarnings("unchecked") StreamPartitioner<T> outputPartitioner = (StreamPartitioner<T>) edge.getPartitioner();
    LOG.debug("Using partitioner {} for output {} of task ", outputPartitioner, outputIndex, taskName);
    ResultPartitionWriter bufferWriter = taskEnvironment.getWriter(outputIndex);
    // we initialize the partitioner here with the number of key groups (aka max. parallelism)
    if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
        int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
        if (0 < numKeyGroups) {
            ((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
        }
    }
    StreamRecordWriter<SerializationDelegate<StreamRecord<T>>> output = new StreamRecordWriter<>(bufferWriter, outputPartitioner, upStreamConfig.getBufferTimeout());
    output.setMetricGroup(taskEnvironment.getMetricGroup().getIOMetricGroup());
    return new RecordWriterOutput<>(output, outSerializer, sideOutputTag, this);
}
Also used : ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) StreamRecordWriter(org.apache.flink.streaming.runtime.io.StreamRecordWriter) ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) OutputTag(org.apache.flink.util.OutputTag)

Aggregations

OutputTag (org.apache.flink.util.OutputTag)111 Test (org.junit.Test)97 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)86 TestListResultSink (org.apache.flink.test.streaming.runtime.util.TestListResultSink)57 ExpectedException (org.junit.rules.ExpectedException)57 List (java.util.List)24 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)24 ArrayList (java.util.ArrayList)20 SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)18 HashMap (java.util.HashMap)17 DataStream (org.apache.flink.streaming.api.datastream.DataStream)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)16 Collector (org.apache.flink.util.Collector)16 Arrays (java.util.Arrays)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)15 Assert.assertEquals (org.junit.Assert.assertEquals)13 Objects (java.util.Objects)12 Optional (java.util.Optional)12 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)12 Map (java.util.Map)10