Search in sources :

Example 6 with OutputTag

use of org.apache.flink.util.OutputTag in project flink by apache.

the class SideOutputITCase method testProcessFunctionSideOutput.

/**
	 * Test ProcessFunction side output.
	 */
@Test
public void testProcessFunctionSideOutput() throws Exception {
    final OutputTag<String> sideOutputTag = new OutputTag<String>("side") {
    };
    TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
    TestListResultSink<Integer> resultSink = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(3);
    DataStream<Integer> dataStream = see.fromCollection(elements);
    SingleOutputStreamOperator<Integer> passThroughtStream = dataStream.process(new ProcessFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            out.collect(value);
            ctx.output(sideOutputTag, "sideout-" + String.valueOf(value));
        }
    });
    passThroughtStream.getSideOutput(sideOutputTag).addSink(sideOutputResultSink);
    passThroughtStream.addSink(resultSink);
    see.execute();
    assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-3", "sideout-4", "sideout-5"), sideOutputResultSink.getSortedResult());
    assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult());
}
Also used : ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 7 with OutputTag

use of org.apache.flink.util.OutputTag in project flink by apache.

the class SideOutputITCase method testKeyedWindowLateArrivingEvents.

@Test
public void testKeyedWindowLateArrivingEvents() throws Exception {
    TestListResultSink<String> resultSink = new TestListResultSink<>();
    TestListResultSink<Integer> lateResultSink = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(3);
    see.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    DataStream<Integer> dataStream = see.fromCollection(elements);
    OutputTag<Integer> lateDataTag = new OutputTag<Integer>("late") {
    };
    SingleOutputStreamOperator<String> windowOperator = dataStream.assignTimestampsAndWatermarks(new TestWatermarkAssigner()).keyBy(new TestKeySelector()).timeWindow(Time.milliseconds(1), Time.milliseconds(1)).allowedLateness(Time.milliseconds(2)).sideOutputLateData(lateDataTag).apply(new WindowFunction<Integer, String, Integer, TimeWindow>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void apply(Integer key, TimeWindow window, Iterable<Integer> input, Collector<String> out) throws Exception {
            for (Integer val : input) {
                out.collect(String.valueOf(key) + "-" + String.valueOf(val));
            }
        }
    });
    windowOperator.addSink(resultSink);
    windowOperator.getSideOutput(lateDataTag).addSink(lateResultSink);
    see.execute();
    assertEquals(Arrays.asList("1-1", "2-2", "4-4", "5-5"), resultSink.getSortedResult());
    assertEquals(Collections.singletonList(3), lateResultSink.getSortedResult());
}
Also used : TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 8 with OutputTag

use of org.apache.flink.util.OutputTag in project flink by apache.

the class SideOutputITCase method testWatermarkForwarding.

/**
	 * Verify that watermarks are forwarded to all side outputs.
	 */
@Test
public void testWatermarkForwarding() throws Exception {
    final OutputTag<String> sideOutputTag1 = new OutputTag<String>("side") {
    };
    final OutputTag<String> sideOutputTag2 = new OutputTag<String>("other-side") {
    };
    TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
    TestListResultSink<String> sideOutputResultSink2 = new TestListResultSink<>();
    TestListResultSink<String> resultSink = new TestListResultSink<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    env.setParallelism(3);
    DataStream<Integer> dataStream = env.addSource(new SourceFunction<Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void run(SourceContext<Integer> ctx) throws Exception {
            ctx.collectWithTimestamp(1, 0);
            ctx.emitWatermark(new Watermark(0));
            ctx.collectWithTimestamp(2, 1);
            ctx.collectWithTimestamp(5, 2);
            ctx.emitWatermark(new Watermark(2));
            ctx.collectWithTimestamp(3, 3);
            ctx.collectWithTimestamp(4, 4);
        }

        @Override
        public void cancel() {
        }
    });
    SingleOutputStreamOperator<Integer> passThroughtStream = dataStream.process(new ProcessFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            out.collect(value);
            ctx.output(sideOutputTag1, "sideout-" + String.valueOf(value));
        }
    });
    class WatermarkReifier extends AbstractStreamOperator<String> implements OneInputStreamOperator<String, String> {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(StreamRecord<String> element) throws Exception {
            output.collect(new StreamRecord<>("E:" + element.getValue()));
        }

        @Override
        public void processWatermark(Watermark mark) throws Exception {
            super.processWatermark(mark);
            output.collect(new StreamRecord<>("WM:" + mark.getTimestamp()));
        }
    }
    passThroughtStream.getSideOutput(sideOutputTag1).transform("ReifyWatermarks", BasicTypeInfo.STRING_TYPE_INFO, new WatermarkReifier()).addSink(sideOutputResultSink1);
    passThroughtStream.getSideOutput(sideOutputTag2).transform("ReifyWatermarks", BasicTypeInfo.STRING_TYPE_INFO, new WatermarkReifier()).addSink(sideOutputResultSink2);
    passThroughtStream.map(new MapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public String map(Integer value) throws Exception {
            return value.toString();
        }
    }).transform("ReifyWatermarks", BasicTypeInfo.STRING_TYPE_INFO, new WatermarkReifier()).addSink(resultSink);
    env.execute();
    assertEquals(Arrays.asList("E:sideout-1", "E:sideout-2", "E:sideout-3", "E:sideout-4", "E:sideout-5", "WM:0", "WM:2", "WM:" + Long.MAX_VALUE), sideOutputResultSink1.getSortedResult());
    assertEquals(Arrays.asList("E:sideout-1", "E:sideout-2", "E:sideout-3", "E:sideout-4", "E:sideout-5", "WM:0", "WM:2", "WM:" + Long.MAX_VALUE), sideOutputResultSink1.getSortedResult());
    assertEquals(Arrays.asList("E:1", "E:2", "E:3", "E:4", "E:5", "WM:0", "WM:2", "WM:" + Long.MAX_VALUE), resultSink.getSortedResult());
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 9 with OutputTag

use of org.apache.flink.util.OutputTag in project flink by apache.

the class SideOutputITCase method testAllWindowLateArrivingEvents.

/**
	 * Test window late arriving events stream
	 */
@Test
public void testAllWindowLateArrivingEvents() throws Exception {
    TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    see.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    DataStream<Integer> dataStream = see.fromCollection(elements);
    OutputTag<Integer> lateDataTag = new OutputTag<Integer>("late") {
    };
    SingleOutputStreamOperator<Integer> windowOperator = dataStream.assignTimestampsAndWatermarks(new TestWatermarkAssigner()).timeWindowAll(Time.milliseconds(1), Time.milliseconds(1)).sideOutputLateData(lateDataTag).apply(new AllWindowFunction<Integer, Integer, TimeWindow>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void apply(TimeWindow window, Iterable<Integer> values, Collector<Integer> out) throws Exception {
            for (Integer val : values) {
                out.collect(val);
            }
        }
    });
    windowOperator.getSideOutput(lateDataTag).flatMap(new FlatMapFunction<Integer, String>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void flatMap(Integer value, Collector<String> out) throws Exception {
            out.collect("late-" + String.valueOf(value));
        }
    }).addSink(sideOutputResultSink);
    see.execute();
    assertEquals(sideOutputResultSink.getSortedResult(), Arrays.asList("late-3", "late-4"));
}
Also used : TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 10 with OutputTag

use of org.apache.flink.util.OutputTag in project flink by apache.

the class OperatorChain method createStreamOutput.

private <T> RecordWriterOutput<T> createStreamOutput(StreamEdge edge, StreamConfig upStreamConfig, int outputIndex, Environment taskEnvironment, String taskName) {
    // OutputTag, return null if not sideOutput
    OutputTag sideOutputTag = edge.getOutputTag();
    TypeSerializer outSerializer = null;
    if (edge.getOutputTag() != null) {
        // side output
        outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserClassLoader());
    } else {
        // main output
        outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader());
    }
    @SuppressWarnings("unchecked") StreamPartitioner<T> outputPartitioner = (StreamPartitioner<T>) edge.getPartitioner();
    LOG.debug("Using partitioner {} for output {} of task ", outputPartitioner, outputIndex, taskName);
    ResultPartitionWriter bufferWriter = taskEnvironment.getWriter(outputIndex);
    // we initialize the partitioner here with the number of key groups (aka max. parallelism)
    if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
        int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
        if (0 < numKeyGroups) {
            ((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
        }
    }
    StreamRecordWriter<SerializationDelegate<StreamRecord<T>>> output = new StreamRecordWriter<>(bufferWriter, outputPartitioner, upStreamConfig.getBufferTimeout());
    output.setMetricGroup(taskEnvironment.getMetricGroup().getIOMetricGroup());
    return new RecordWriterOutput<>(output, outSerializer, sideOutputTag, this);
}
Also used : ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) StreamRecordWriter(org.apache.flink.streaming.runtime.io.StreamRecordWriter) ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) OutputTag(org.apache.flink.util.OutputTag)

Aggregations

OutputTag (org.apache.flink.util.OutputTag)10 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)9 TestListResultSink (org.apache.flink.test.streaming.runtime.util.TestListResultSink)9 Test (org.junit.Test)9 ExpectedException (org.junit.rules.ExpectedException)9 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)2 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)1 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)1 KeySelector (org.apache.flink.api.java.functions.KeySelector)1 ResultPartitionWriter (org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter)1 SerializationDelegate (org.apache.flink.runtime.plugable.SerializationDelegate)1 AbstractStreamOperator (org.apache.flink.streaming.api.operators.AbstractStreamOperator)1 OneInputStreamOperator (org.apache.flink.streaming.api.operators.OneInputStreamOperator)1 Watermark (org.apache.flink.streaming.api.watermark.Watermark)1 RecordWriterOutput (org.apache.flink.streaming.runtime.io.RecordWriterOutput)1 StreamRecordWriter (org.apache.flink.streaming.runtime.io.StreamRecordWriter)1 ConfigurableStreamPartitioner (org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner)1 StreamPartitioner (org.apache.flink.streaming.runtime.partitioner.StreamPartitioner)1 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)1 Collector (org.apache.flink.util.Collector)1