Search in sources :

Example 1 with KeyedStream

use of org.apache.flink.streaming.api.datastream.KeyedStream in project flink by apache.

the class SortingBoundedInputITCase method testThreeInputOperator.

@Test
public void testThreeInputOperator() {
    long numberOfRecords = 500_000;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements1 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements2 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements3 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedMultipleInputTransformation<Long> assertingTransformation = new KeyedMultipleInputTransformation<>("Asserting operator", new AssertingThreeInputOperatorFactory(), BasicTypeInfo.LONG_TYPE_INFO, -1, BasicTypeInfo.INT_TYPE_INFO);
    assertingTransformation.addInput(elements1.getTransformation(), elements1.getKeySelector());
    assertingTransformation.addInput(elements2.getTransformation(), elements2.getKeySelector());
    assertingTransformation.addInput(elements3.getTransformation(), elements3.getKeySelector());
    env.addOperator(assertingTransformation);
    DataStream<Long> counts = new DataStream<>(env, assertingTransformation);
    long sum = CollectionUtil.iteratorToList(DataStreamUtils.collect(counts)).stream().mapToLong(l -> l).sum();
    assertThat(sum, equalTo(numberOfRecords * 3));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 2 with KeyedStream

use of org.apache.flink.streaming.api.datastream.KeyedStream in project flink by apache.

the class DataStreamTest method testKeyedConnectedStreamsType.

@Test
public void testKeyedConnectedStreamsType() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Integer> stream1 = env.fromElements(1, 2);
    DataStreamSource<Integer> stream2 = env.fromElements(1, 2);
    ConnectedStreams<Integer, Integer> connectedStreams = stream1.connect(stream2).keyBy(v -> v, v -> v);
    KeyedStream<?, ?> firstKeyedInput = (KeyedStream<?, ?>) connectedStreams.getFirstInput();
    KeyedStream<?, ?> secondKeyedInput = (KeyedStream<?, ?>) connectedStreams.getSecondInput();
    assertThat(firstKeyedInput.getKeyType(), equalTo(Types.INT));
    assertThat(secondKeyedInput.getKeyType(), equalTo(Types.INT));
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) Test(org.junit.Test)

Example 3 with KeyedStream

use of org.apache.flink.streaming.api.datastream.KeyedStream in project flink by apache.

the class DataStreamTest method testFailedTranslationOnKeyed.

/**
 * Tests that with a {@link KeyedStream} we have to provide a {@link
 * KeyedBroadcastProcessFunction}.
 */
@Test
public void testFailedTranslationOnKeyed() {
    final MapStateDescriptor<Long, String> descriptor = new MapStateDescriptor<>("broadcast", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final DataStream<Long> srcOne = env.generateSequence(0L, 5L).assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {

        @Override
        public long extractTimestamp(Long element, long previousElementTimestamp) {
            return element;
        }
    }).keyBy((KeySelector<Long, Long>) value -> value);
    final DataStream<String> srcTwo = env.fromElements("Test:0", "Test:1", "Test:2", "Test:3", "Test:4", "Test:5").assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {

        @Override
        public long extractTimestamp(String element, long previousElementTimestamp) {
            return Long.parseLong(element.split(":")[1]);
        }
    });
    BroadcastStream<String> broadcast = srcTwo.broadcast(descriptor);
    BroadcastConnectedStream<Long, String> bcStream = srcOne.connect(broadcast);
    expectedException.expect(IllegalArgumentException.class);
    bcStream.process(new BroadcastProcessFunction<Long, String, String>() {

        @Override
        public void processBroadcastElement(String value, Context ctx, Collector<String> out) throws Exception {
        // do nothing
        }

        @Override
        public void processElement(Long value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
        // do nothing
        }
    });
}
Also used : Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) PurgingTrigger(org.apache.flink.streaming.api.windowing.triggers.PurgingTrigger) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) CustomPartitionerWrapper(org.apache.flink.streaming.runtime.partitioner.CustomPartitionerWrapper) Duration(java.time.Duration) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) TestLogger(org.apache.flink.util.TestLogger) Function(org.apache.flink.api.common.functions.Function) Assert.fail(org.junit.Assert.fail) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Method(java.lang.reflect.Method) GlobalWindow(org.apache.flink.streaming.api.windowing.windows.GlobalWindow) StringStartsWith(org.hamcrest.core.StringStartsWith) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) KeySelector(org.apache.flink.api.java.functions.KeySelector) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) ConnectedStreams(org.apache.flink.streaming.api.datastream.ConnectedStreams) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) GlobalPartitioner(org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) List(java.util.List) TypeExtractor(org.apache.flink.api.java.typeutils.TypeExtractor) BroadcastConnectedStream(org.apache.flink.streaming.api.datastream.BroadcastConnectedStream) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) Assert.assertFalse(org.junit.Assert.assertFalse) AllWindowFunction(org.apache.flink.streaming.api.functions.windowing.AllWindowFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CountTrigger(org.apache.flink.streaming.api.windowing.triggers.CountTrigger) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Watermark(org.apache.flink.streaming.api.watermark.Watermark) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ObjectArrayTypeInfo(org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) Partitioner(org.apache.flink.api.common.functions.Partitioner) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) Collector(org.apache.flink.util.Collector) ProcessOperator(org.apache.flink.streaming.api.operators.ProcessOperator) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) ExpectedException(org.junit.rules.ExpectedException) Nullable(javax.annotation.Nullable) Types(org.apache.flink.api.common.typeinfo.Types) DataStreamSink(org.apache.flink.streaming.api.datastream.DataStreamSink) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) Time(org.apache.flink.streaming.api.windowing.time.Time) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) LegacyKeyedProcessOperator(org.apache.flink.streaming.api.operators.LegacyKeyedProcessOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) KeyGroupStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.KeyGroupStreamPartitioner) Rule(org.junit.Rule) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) EnumTypeInfo(org.apache.flink.api.java.typeutils.EnumTypeInfo) Assert(org.junit.Assert) GlobalWindows(org.apache.flink.streaming.api.windowing.assigners.GlobalWindows) Assert.assertEquals(org.junit.Assert.assertEquals) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

KeyedStream (org.apache.flink.streaming.api.datastream.KeyedStream)3 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)3 List (java.util.List)2 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)2 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)2 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)2 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)2 DataStream (org.apache.flink.streaming.api.datastream.DataStream)2 DataStreamSource (org.apache.flink.streaming.api.datastream.DataStreamSource)2 SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)2 KeyedProcessFunction (org.apache.flink.streaming.api.functions.KeyedProcessFunction)2 Test (org.junit.Test)2 Method (java.lang.reflect.Method)1 Duration (java.time.Duration)1 Arrays (java.util.Arrays)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 Objects (java.util.Objects)1