Search in sources :

Example 21 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class StreamGraphCoLocationConstraintTest method testSettingCoLocationConstraint.

@Test
public void testSettingCoLocationConstraint() throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(7);
    // set up the test program
    DataStream<Long> source = env.generateSequence(1L, 10_000_000);
    source.getTransformation().setCoLocationGroupKey("group1");
    DataStream<Long> step1 = source.keyBy(v -> v).map(v -> v);
    step1.getTransformation().setCoLocationGroupKey("group2");
    DataStream<Long> step2 = step1.keyBy(v -> v).map(v -> v);
    step2.getTransformation().setCoLocationGroupKey("group1");
    DataStreamSink<Long> result = step2.keyBy(v -> v).addSink(new DiscardingSink<>());
    result.getTransformation().setCoLocationGroupKey("group2");
    // get the graph
    final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    assertEquals(4, jobGraph.getNumberOfVertices());
    List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    for (JobVertex vertex : vertices) {
        assertNotNull(vertex.getCoLocationGroup());
    }
    assertEquals(vertices.get(0).getCoLocationGroup(), vertices.get(2).getCoLocationGroup());
    assertEquals(vertices.get(1).getCoLocationGroup(), vertices.get(3).getCoLocationGroup());
}
Also used : DataStream(org.apache.flink.streaming.api.datastream.DataStream) DataStreamSink(org.apache.flink.streaming.api.datastream.DataStreamSink) List(java.util.List) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Test(org.junit.Test) Assert.fail(org.junit.Assert.fail) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 22 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class StreamGraphCoLocationConstraintTest method testCoLocateDifferenSharingGroups.

@Test
public void testCoLocateDifferenSharingGroups() throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(7);
    // set up the test program
    DataStream<Long> source = env.generateSequence(1L, 10_000_000);
    source.getTransformation().setSlotSharingGroup("ssg1");
    source.getTransformation().setCoLocationGroupKey("co1");
    DataStream<Long> step1 = source.keyBy(v -> v).map(v -> v);
    step1.getTransformation().setSlotSharingGroup("ssg2");
    step1.getTransformation().setCoLocationGroupKey("co2");
    DataStream<Long> step2 = step1.keyBy(v -> v).map(v -> v);
    step2.getTransformation().setSlotSharingGroup("ssg3");
    step2.getTransformation().setCoLocationGroupKey("co1");
    DataStreamSink<Long> result = step2.keyBy(v -> v).addSink(new DiscardingSink<>());
    result.getTransformation().setSlotSharingGroup("ssg4");
    result.getTransformation().setCoLocationGroupKey("co2");
    // get the graph
    try {
        env.getStreamGraph().getJobGraph();
        fail("exception expected");
    } catch (IllegalStateException ignored) {
    }
}
Also used : DataStream(org.apache.flink.streaming.api.datastream.DataStream) DataStreamSink(org.apache.flink.streaming.api.datastream.DataStreamSink) List(java.util.List) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Test(org.junit.Test) Assert.fail(org.junit.Assert.fail) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 23 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class StreamExecLegacyTableSourceScan method createConversionTransformationIfNeeded.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> createConversionTransformationIfNeeded(StreamExecutionEnvironment streamExecEnv, ExecNodeConfig config, Transformation<?> sourceTransform, @Nullable RexNode rowtimeExpression) {
    final RowType outputType = (RowType) getOutputType();
    final Transformation<RowData> transformation;
    final int[] fieldIndexes = computeIndexMapping(true);
    if (needInternalConversion(fieldIndexes)) {
        final String extractElement, resetElement;
        if (ScanUtil.hasTimeAttributeField(fieldIndexes)) {
            String elementTerm = OperatorCodeGenerator.ELEMENT();
            extractElement = String.format("ctx.%s = %s;", elementTerm, elementTerm);
            resetElement = String.format("ctx.%s = null;", elementTerm);
        } else {
            extractElement = "";
            resetElement = "";
        }
        final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig()).setOperatorBaseClass(TableStreamOperator.class);
        // the produced type may not carry the correct precision user defined in DDL, because
        // it may be converted from legacy type. Fix precision using logical schema from DDL.
        // Code generation requires the correct precision of input fields.
        final DataType fixedProducedDataType = TableSourceUtil.fixPrecisionForProducedDataType(tableSource, outputType);
        transformation = ScanUtil.convertToInternalRow(ctx, (Transformation<Object>) sourceTransform, fieldIndexes, fixedProducedDataType, outputType, qualifiedName, (detailName, simplifyName) -> createFormattedTransformationName(detailName, simplifyName, config), (description) -> createFormattedTransformationDescription(description, config), JavaScalaConversionUtil.toScala(Optional.ofNullable(rowtimeExpression)), extractElement, resetElement);
    } else {
        transformation = (Transformation<RowData>) sourceTransform;
    }
    final RelDataType relDataType = FlinkTypeFactory.INSTANCE().buildRelNodeRowType(outputType);
    final DataStream<RowData> ingestedTable = new DataStream<>(streamExecEnv, transformation);
    final Optional<RowtimeAttributeDescriptor> rowtimeDesc = JavaScalaConversionUtil.toJava(TableSourceUtil.getRowtimeAttributeDescriptor(tableSource, relDataType));
    final DataStream<RowData> withWatermarks = rowtimeDesc.map(desc -> {
        int rowtimeFieldIdx = relDataType.getFieldNames().indexOf(desc.getAttributeName());
        WatermarkStrategy strategy = desc.getWatermarkStrategy();
        if (strategy instanceof PeriodicWatermarkAssigner) {
            PeriodicWatermarkAssignerWrapper watermarkGenerator = new PeriodicWatermarkAssignerWrapper((PeriodicWatermarkAssigner) strategy, rowtimeFieldIdx);
            return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
        } else if (strategy instanceof PunctuatedWatermarkAssigner) {
            PunctuatedWatermarkAssignerWrapper watermarkGenerator = new PunctuatedWatermarkAssignerWrapper((PunctuatedWatermarkAssigner) strategy, rowtimeFieldIdx, tableSource.getProducedDataType());
            return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
        } else {
            // underlying DataStream.
            return ingestedTable;
        }
    }).orElse(// No need to generate watermarks if no rowtime
    ingestedTable);
    // attribute is specified.
    return withWatermarks.getTransformation();
}
Also used : TableStreamOperator(org.apache.flink.table.runtime.operators.TableStreamOperator) DataType(org.apache.flink.table.types.DataType) TableSourceUtil(org.apache.flink.table.planner.sources.TableSourceUtil) RowtimeAttributeDescriptor(org.apache.flink.table.sources.RowtimeAttributeDescriptor) TableSource(org.apache.flink.table.sources.TableSource) PeriodicWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PeriodicWatermarkAssigner) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RowType(org.apache.flink.table.types.logical.RowType) ExecNode(org.apache.flink.table.planner.plan.nodes.exec.ExecNode) ScanUtil(org.apache.flink.table.planner.plan.utils.ScanUtil) RexNode(org.apache.calcite.rex.RexNode) InputFormat(org.apache.flink.api.common.io.InputFormat) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) Nullable(javax.annotation.Nullable) RelDataType(org.apache.calcite.rel.type.RelDataType) ExecNodeContext(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeContext) RowData(org.apache.flink.table.data.RowData) InputSplit(org.apache.flink.core.io.InputSplit) ExecNodeConfig(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig) WatermarkStrategy(org.apache.flink.table.sources.wmstrategies.WatermarkStrategy) PunctuatedWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PunctuatedWatermarkAssigner) StreamTableSource(org.apache.flink.table.sources.StreamTableSource) DataStream(org.apache.flink.streaming.api.datastream.DataStream) OperatorCodeGenerator(org.apache.flink.table.planner.codegen.OperatorCodeGenerator) CommonExecLegacyTableSourceScan(org.apache.flink.table.planner.plan.nodes.exec.common.CommonExecLegacyTableSourceScan) List(java.util.List) PunctuatedWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PunctuatedWatermarkAssignerWrapper) JavaScalaConversionUtil(org.apache.flink.table.planner.utils.JavaScalaConversionUtil) Optional(java.util.Optional) Transformation(org.apache.flink.api.dag.Transformation) PeriodicWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PeriodicWatermarkAssignerWrapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Transformation(org.apache.flink.api.dag.Transformation) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RowType(org.apache.flink.table.types.logical.RowType) RelDataType(org.apache.calcite.rel.type.RelDataType) PeriodicWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PeriodicWatermarkAssigner) RowData(org.apache.flink.table.data.RowData) PunctuatedWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PunctuatedWatermarkAssignerWrapper) RowtimeAttributeDescriptor(org.apache.flink.table.sources.RowtimeAttributeDescriptor) PunctuatedWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PunctuatedWatermarkAssigner) DataType(org.apache.flink.table.types.DataType) RelDataType(org.apache.calcite.rel.type.RelDataType) WatermarkStrategy(org.apache.flink.table.sources.wmstrategies.WatermarkStrategy) PeriodicWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PeriodicWatermarkAssignerWrapper)

Example 24 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class DataStreamBatchExecutionITCase method batchKeyedNonKeyedTwoInputOperator.

/**
 * Verifies that all regular input is processed before keyed input.
 *
 * <p>Here, the first input is keyed while the second input is not keyed.
 */
@Test
public void batchKeyedNonKeyedTwoInputOperator() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> keyedInput = env.fromElements(Tuple2.of("regular2", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 2), Tuple2.of("regular2", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular4", 4), Tuple2.of("regular3", 3), Tuple2.of("regular3", 2), Tuple2.of("regular4", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<String> result = keyedInput.keyBy(in -> in.f0).connect(regularInput).transform("operator", BasicTypeInfo.STRING_TYPE_INFO, new TwoInputIdentityOperator());
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        assertThat(results, equalTo(Arrays.asList("(regular4,4)", "(regular3,3)", "(regular3,2)", "(regular4,1)", "(regular1,2)", "(regular1,3)", "(regular2,1)", "(regular2,4)")));
    }
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) CollectionUtil.iteratorToList(org.apache.flink.util.CollectionUtil.iteratorToList) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) ReadOnlyBroadcastState(org.apache.flink.api.common.state.ReadOnlyBroadcastState) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ClassRule(org.junit.ClassRule) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 25 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class DataStreamBatchExecutionITCase method batchMixedKeyedAndNonKeyedTwoInputOperator.

@Test
public void batchMixedKeyedAndNonKeyedTwoInputOperator() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc3", 3), Tuple2.of("bc2", 2), Tuple2.of("bc1", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1)).broadcast();
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular2", 3), Tuple2.of("regular2", 5), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1)).keyBy(input -> input.f0);
    TwoInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>, String> twoInputTransformation = new TwoInputTransformation<>(regularInput.getTransformation(), bcInput.getTransformation(), "operator", new TestMixedTwoInputOperator(), BasicTypeInfo.STRING_TYPE_INFO, 1);
    twoInputTransformation.setStateKeyType(BasicTypeInfo.STRING_TYPE_INFO);
    twoInputTransformation.setStateKeySelectors(input -> input.f0, null);
    DataStream<String> result = new DataStream<>(env, twoInputTransformation);
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc3, bc2, bc1]", "(regular1,2): [bc3, bc2, bc1]", "(regular1,3): [bc3, bc2, bc1]", "(regular1,3): [bc3, bc2, bc1]", "(regular1,4): [bc3, bc2, bc1]", "(regular2,3): [bc3, bc2, bc1]", "(regular2,5): [bc3, bc2, bc1]")));
    }
}
Also used : DataStream(org.apache.flink.streaming.api.datastream.DataStream) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

DataStream (org.apache.flink.streaming.api.datastream.DataStream)87 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)78 Test (org.junit.Test)70 List (java.util.List)62 Collector (org.apache.flink.util.Collector)60 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)50 SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)48 Arrays (java.util.Arrays)46 ArrayList (java.util.ArrayList)40 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)40 Assert.assertEquals (org.junit.Assert.assertEquals)38 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)36 Configuration (org.apache.flink.configuration.Configuration)36 Assert.assertTrue (org.junit.Assert.assertTrue)33 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)32 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)32 Types (org.apache.flink.api.common.typeinfo.Types)31 Assert (org.junit.Assert)31 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)29 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)29