Search in sources :

Example 86 with OutputTag

use of org.apache.flink.util.OutputTag in project flink by splunk.

the class OperatorChain method createStreamOutput.

private RecordWriterOutput<OUT> createStreamOutput(RecordWriter<SerializationDelegate<StreamRecord<OUT>>> recordWriter, StreamEdge edge, StreamConfig upStreamConfig, Environment taskEnvironment) {
    // OutputTag, return null if not sideOutput
    OutputTag sideOutputTag = edge.getOutputTag();
    TypeSerializer outSerializer;
    if (edge.getOutputTag() != null) {
        // side output
        outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserCodeClassLoader().asClassLoader());
    } else {
        // main output
        outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserCodeClassLoader().asClassLoader());
    }
    return closer.register(new RecordWriterOutput<OUT>(recordWriter, outSerializer, sideOutputTag, edge.supportsUnalignedCheckpoints()));
}
Also used : TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) OutputTag(org.apache.flink.util.OutputTag)

Example 87 with OutputTag

use of org.apache.flink.util.OutputTag in project flink-mirror by flink-ci.

the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.

@Test
public void testBatchExecutionWithTimersOneInput() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // set parallelism to 1 to have consistent order of results
    env.setParallelism(1);
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
    SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
    OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
    SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {

        private MapState<Long, Integer> countState;

        private ValueState<Long> previousTimestampState;

        @Override
        public void open(Configuration parameters) {
            countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
            previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            Long elementTimestamp = ctx.timestamp();
            long nextTen = ((elementTimestamp + 10) / 10) * 10;
            ctx.timerService().registerEventTimeTimer(nextTen);
            if (elementTimestamp < ctx.timerService().currentWatermark()) {
                ctx.output(lateElements, value);
            } else {
                Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
                assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
                previousTimestampState.update(elementTimestamp);
                Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
                countState.put(nextTen, currentCount + 1);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
            countState.remove(timestamp);
            // this would go in infinite loop if we did not quiesce the
            // timer service.
            ctx.timerService().registerEventTimeTimer(timestamp + 1);
        }
    });
    DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
    List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
    List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
    assertTrue(lateRecordsCollected.isEmpty());
    assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) OutputTag(org.apache.flink.util.OutputTag) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 88 with OutputTag

use of org.apache.flink.util.OutputTag in project flink-mirror by flink-ci.

the class SideOutputITCase method testDifferentSideOutputTypes.

@Test
public void testDifferentSideOutputTypes() throws Exception {
    final OutputTag<String> sideOutputTag1 = new OutputTag<String>("string") {
    };
    final OutputTag<Integer> sideOutputTag2 = new OutputTag<Integer>("int") {
    };
    TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
    TestListResultSink<Integer> sideOutputResultSink2 = new TestListResultSink<>();
    TestListResultSink<Integer> resultSink = new TestListResultSink<>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    env.setParallelism(3);
    DataStream<Integer> dataStream = env.fromCollection(elements);
    SingleOutputStreamOperator<Integer> passThroughtStream = dataStream.process(new ProcessFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            out.collect(value);
            ctx.output(sideOutputTag1, "sideout-" + String.valueOf(value));
            ctx.output(sideOutputTag2, 13);
        }
    });
    passThroughtStream.getSideOutput(sideOutputTag1).addSink(sideOutputResultSink1);
    passThroughtStream.getSideOutput(sideOutputTag2).addSink(sideOutputResultSink2);
    passThroughtStream.addSink(resultSink);
    env.execute();
    assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-3", "sideout-4", "sideout-5"), sideOutputResultSink1.getSortedResult());
    assertEquals(Arrays.asList(13, 13, 13, 13, 13), sideOutputResultSink2.getSortedResult());
    assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult());
}
Also used : ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 89 with OutputTag

use of org.apache.flink.util.OutputTag in project flink-mirror by flink-ci.

the class SideOutputITCase method testSideOutputNameClash.

@Test
public void testSideOutputNameClash() throws Exception {
    final OutputTag<String> sideOutputTag1 = new OutputTag<String>("side") {
    };
    final OutputTag<Integer> sideOutputTag2 = new OutputTag<Integer>("side") {
    };
    TestListResultSink<String> sideOutputResultSink1 = new TestListResultSink<>();
    TestListResultSink<Integer> sideOutputResultSink2 = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(3);
    DataStream<Integer> dataStream = see.fromCollection(elements);
    SingleOutputStreamOperator<Integer> passThroughtStream = dataStream.process(new ProcessFunction<Integer, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            out.collect(value);
            ctx.output(sideOutputTag1, "sideout-" + String.valueOf(value));
            ctx.output(sideOutputTag2, 13);
        }
    });
    passThroughtStream.getSideOutput(sideOutputTag1).addSink(sideOutputResultSink1);
    expectedException.expect(UnsupportedOperationException.class);
    passThroughtStream.getSideOutput(sideOutputTag2).addSink(sideOutputResultSink2);
}
Also used : ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 90 with OutputTag

use of org.apache.flink.util.OutputTag in project flink-mirror by flink-ci.

the class SideOutputITCase method testLegacyKeyedCoProcessFunctionSideOutput.

/**
 * Test keyed CoProcessFunction side output.
 */
@Test
public void testLegacyKeyedCoProcessFunctionSideOutput() throws Exception {
    final OutputTag<String> sideOutputTag = new OutputTag<String>("side") {
    };
    TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>();
    TestListResultSink<Integer> resultSink = new TestListResultSink<>();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(3);
    DataStream<Integer> ds1 = see.fromCollection(elements);
    DataStream<Integer> ds2 = see.fromCollection(elements);
    SingleOutputStreamOperator<Integer> passThroughtStream = ds1.keyBy(i -> i).connect(ds2.keyBy(i -> i)).process(new CoProcessFunction<Integer, Integer, Integer>() {

        @Override
        public void processElement1(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            if (value < 3) {
                out.collect(value);
                ctx.output(sideOutputTag, "sideout1-" + String.valueOf(value));
            }
        }

        @Override
        public void processElement2(Integer value, Context ctx, Collector<Integer> out) throws Exception {
            if (value >= 3) {
                out.collect(value);
                ctx.output(sideOutputTag, "sideout2-" + String.valueOf(value));
            }
        }
    });
    passThroughtStream.getSideOutput(sideOutputTag).addSink(sideOutputResultSink);
    passThroughtStream.addSink(resultSink);
    see.execute();
    assertEquals(Arrays.asList("sideout1-1", "sideout1-2", "sideout2-3", "sideout2-4", "sideout2-5"), sideOutputResultSink.getSortedResult());
    assertEquals(Arrays.asList(1, 2, 3, 4, 5), resultSink.getSortedResult());
}
Also used : ExpectedException(org.junit.rules.ExpectedException) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) OutputTag(org.apache.flink.util.OutputTag) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

OutputTag (org.apache.flink.util.OutputTag)111 Test (org.junit.Test)97 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)86 TestListResultSink (org.apache.flink.test.streaming.runtime.util.TestListResultSink)57 ExpectedException (org.junit.rules.ExpectedException)57 List (java.util.List)24 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)24 ArrayList (java.util.ArrayList)20 SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)18 HashMap (java.util.HashMap)17 DataStream (org.apache.flink.streaming.api.datastream.DataStream)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)16 Collector (org.apache.flink.util.Collector)16 Arrays (java.util.Arrays)15 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)15 Assert.assertEquals (org.junit.Assert.assertEquals)13 Objects (java.util.Objects)12 Optional (java.util.Optional)12 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)12 Map (java.util.Map)10