Search in sources :

Example 1 with KeyedMultipleInputTransformation

use of org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation in project flink by apache.

the class SortingBoundedInputITCase method testThreeInputOperator.

@Test
public void testThreeInputOperator() {
    long numberOfRecords = 500_000;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements1 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements2 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements3 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedMultipleInputTransformation<Long> assertingTransformation = new KeyedMultipleInputTransformation<>("Asserting operator", new AssertingThreeInputOperatorFactory(), BasicTypeInfo.LONG_TYPE_INFO, -1, BasicTypeInfo.INT_TYPE_INFO);
    assertingTransformation.addInput(elements1.getTransformation(), elements1.getKeySelector());
    assertingTransformation.addInput(elements2.getTransformation(), elements2.getKeySelector());
    assertingTransformation.addInput(elements3.getTransformation(), elements3.getKeySelector());
    env.addOperator(assertingTransformation);
    DataStream<Long> counts = new DataStream<>(env, assertingTransformation);
    long sum = CollectionUtil.iteratorToList(DataStreamUtils.collect(counts)).stream().mapToLong(l -> l).sum();
    assertThat(sum, equalTo(numberOfRecords * 3));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 2 with KeyedMultipleInputTransformation

use of org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation in project flink by apache.

the class StreamGraphGeneratorBatchExecutionTest method testInputSelectableMultiInputTransformation.

@Test
public void testInputSelectableMultiInputTransformation() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Integer> elements1 = env.fromElements(1, 2);
    DataStreamSource<Integer> elements2 = env.fromElements(1, 2);
    DataStreamSource<Integer> elements3 = env.fromElements(1, 2);
    MultipleInputOperatorFactory selectableOperator = new MultipleInputOperatorFactory(3, true);
    KeyedMultipleInputTransformation<Integer> multipleInputTransformation = new KeyedMultipleInputTransformation<>("operator", selectableOperator, BasicTypeInfo.INT_TYPE_INFO, 1, BasicTypeInfo.INT_TYPE_INFO);
    multipleInputTransformation.addInput(elements1.getTransformation(), e -> e);
    multipleInputTransformation.addInput(elements2.getTransformation(), e -> e);
    multipleInputTransformation.addInput(elements3.getTransformation(), e -> e);
    DataStreamSink<Integer> sink = new MultipleConnectedStreams(env).transform(multipleInputTransformation).addSink(new DiscardingSink<>());
    expectedException.expect(IllegalStateException.class);
    expectedException.expectMessage("Batch state backend and sorting inputs are not supported in graphs with an InputSelectable operator.");
    getStreamGraphInBatchMode(sink);
}
Also used : KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 3 with KeyedMultipleInputTransformation

use of org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation in project flink by apache.

the class StreamGraphGeneratorBatchExecutionTest method testMultiInputTransformation.

@Test
public void testMultiInputTransformation() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Integer> elements1 = env.fromElements(1, 2);
    DataStreamSource<Integer> elements2 = env.fromElements(1, 2);
    DataStreamSource<Integer> elements3 = env.fromElements(1, 2);
    MultipleInputOperatorFactory selectableOperator = new MultipleInputOperatorFactory(3, false);
    KeyedMultipleInputTransformation<Integer> multipleInputTransformation = new KeyedMultipleInputTransformation<>("operator", selectableOperator, BasicTypeInfo.INT_TYPE_INFO, 1, BasicTypeInfo.INT_TYPE_INFO);
    multipleInputTransformation.addInput(elements1.getTransformation(), e -> e);
    multipleInputTransformation.addInput(elements2.getTransformation(), e -> e);
    multipleInputTransformation.addInput(elements3.getTransformation(), e -> e);
    DataStreamSink<Integer> sink = new MultipleConnectedStreams(env).transform(multipleInputTransformation).addSink(new DiscardingSink<>());
    StreamGraph graph = getStreamGraphInBatchMode(sink);
    StreamNode operatorNode = graph.getStreamNode(multipleInputTransformation.getId());
    assertThat(operatorNode.getInputRequirements().get(0), equalTo(StreamConfig.InputRequirement.SORTED));
    assertThat(operatorNode.getInputRequirements().get(1), equalTo(StreamConfig.InputRequirement.SORTED));
    assertThat(operatorNode.getOperatorFactory().getChainingStrategy(), equalTo(ChainingStrategy.HEAD));
    assertThat(graph.getStateBackend(), instanceOf(BatchExecutionStateBackend.class));
    // the provider is passed as a lambda therefore we cannot assert the class of the provider
    assertThat(graph.getTimerServiceProvider(), notNullValue());
}
Also used : BatchExecutionStateBackend(org.apache.flink.streaming.api.operators.sorted.state.BatchExecutionStateBackend) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 4 with KeyedMultipleInputTransformation

use of org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation in project flink by apache.

the class MultiInputTransformationTranslator method translateInternal.

private Collection<Integer> translateInternal(final AbstractMultipleInputTransformation<OUT> transformation, final Context context) {
    checkNotNull(transformation);
    checkNotNull(context);
    final List<Transformation<?>> inputTransformations = transformation.getInputs();
    checkArgument(!inputTransformations.isEmpty(), "Empty inputs for MultipleInputTransformation. Did you forget to add inputs?");
    MultipleInputSelectionHandler.checkSupportedInputCount(inputTransformations.size());
    final StreamGraph streamGraph = context.getStreamGraph();
    final String slotSharingGroup = context.getSlotSharingGroup();
    final int transformationId = transformation.getId();
    final ExecutionConfig executionConfig = streamGraph.getExecutionConfig();
    streamGraph.addMultipleInputOperator(transformationId, slotSharingGroup, transformation.getCoLocationGroupKey(), transformation.getOperatorFactory(), transformation.getInputTypes(), transformation.getOutputType(), transformation.getName());
    final int parallelism = transformation.getParallelism() != ExecutionConfig.PARALLELISM_DEFAULT ? transformation.getParallelism() : executionConfig.getParallelism();
    streamGraph.setParallelism(transformationId, parallelism);
    streamGraph.setMaxParallelism(transformationId, transformation.getMaxParallelism());
    if (transformation instanceof KeyedMultipleInputTransformation) {
        KeyedMultipleInputTransformation<OUT> keyedTransform = (KeyedMultipleInputTransformation<OUT>) transformation;
        TypeSerializer<?> keySerializer = keyedTransform.getStateKeyType().createSerializer(executionConfig);
        streamGraph.setMultipleInputStateKey(transformationId, keyedTransform.getStateKeySelectors(), keySerializer);
    }
    for (int i = 0; i < inputTransformations.size(); i++) {
        final Transformation<?> inputTransformation = inputTransformations.get(i);
        final Collection<Integer> inputIds = context.getStreamNodeIds(inputTransformation);
        for (Integer inputId : inputIds) {
            streamGraph.addEdge(inputId, transformationId, i + 1);
        }
    }
    return Collections.singleton(transformationId);
}
Also used : MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AbstractMultipleInputTransformation(org.apache.flink.streaming.api.transformations.AbstractMultipleInputTransformation) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph)

Example 5 with KeyedMultipleInputTransformation

use of org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation in project flink by apache.

the class DataStreamBatchExecutionITCase method batchMixedKeyedAndNonKeyedMultiInputOperator.

@Test
public void batchMixedKeyedAndNonKeyedMultiInputOperator() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> bc1Input = env.fromElements(Tuple2.of("bc3", 3), Tuple2.of("bc2", 2)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1)).broadcast();
    DataStream<Tuple2<String, Integer>> bc2Input = env.fromElements(Tuple2.of("bc1", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1)).broadcast();
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular2", 3), Tuple2.of("regular2", 5), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1)).keyBy(input -> input.f0);
    KeyedMultipleInputTransformation<String> multipleInputTransformation = new KeyedMultipleInputTransformation<>("operator", mixedInputsOperatorFactory, BasicTypeInfo.STRING_TYPE_INFO, 1, BasicTypeInfo.STRING_TYPE_INFO);
    multipleInputTransformation.addInput(regularInput.getTransformation(), input -> ((Tuple2<String, Integer>) input).f0);
    multipleInputTransformation.addInput(bc1Input.getTransformation(), null);
    multipleInputTransformation.addInput(bc2Input.getTransformation(), null);
    DataStream<String> result = new MultipleConnectedStreams(env).transform(multipleInputTransformation);
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc3, bc2, bc1]", "(regular1,2): [bc3, bc2, bc1]", "(regular1,3): [bc3, bc2, bc1]", "(regular1,3): [bc3, bc2, bc1]", "(regular1,4): [bc3, bc2, bc1]", "(regular2,3): [bc3, bc2, bc1]", "(regular2,5): [bc3, bc2, bc1]")));
    }
}
Also used : KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Tuple2(org.apache.flink.api.java.tuple.Tuple2) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

KeyedMultipleInputTransformation (org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation)6 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)5 Test (org.junit.Test)5 MultipleConnectedStreams (org.apache.flink.streaming.api.datastream.MultipleConnectedStreams)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 Arrays (java.util.Arrays)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 Random (java.util.Random)1 Set (java.util.Set)1 Consumer (java.util.function.Consumer)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 RuntimeExecutionMode (org.apache.flink.api.common.RuntimeExecutionMode)1 Watermark (org.apache.flink.api.common.eventtime.Watermark)1 WatermarkGenerator (org.apache.flink.api.common.eventtime.WatermarkGenerator)1 WatermarkOutput (org.apache.flink.api.common.eventtime.WatermarkOutput)1 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)1