use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class StreamGraphCoLocationConstraintTest method testSettingCoLocationConstraint.
@Test
public void testSettingCoLocationConstraint() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(7);
// set up the test program
DataStream<Long> source = env.generateSequence(1L, 10_000_000);
source.getTransformation().setCoLocationGroupKey("group1");
DataStream<Long> step1 = source.keyBy(v -> v).map(v -> v);
step1.getTransformation().setCoLocationGroupKey("group2");
DataStream<Long> step2 = step1.keyBy(v -> v).map(v -> v);
step2.getTransformation().setCoLocationGroupKey("group1");
DataStreamSink<Long> result = step2.keyBy(v -> v).addSink(new DiscardingSink<>());
result.getTransformation().setCoLocationGroupKey("group2");
// get the graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
assertEquals(4, jobGraph.getNumberOfVertices());
List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
for (JobVertex vertex : vertices) {
assertNotNull(vertex.getCoLocationGroup());
}
assertEquals(vertices.get(0).getCoLocationGroup(), vertices.get(2).getCoLocationGroup());
assertEquals(vertices.get(1).getCoLocationGroup(), vertices.get(3).getCoLocationGroup());
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class StreamGraphCoLocationConstraintTest method testCoLocateDifferenSharingGroups.
@Test
public void testCoLocateDifferenSharingGroups() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(7);
// set up the test program
DataStream<Long> source = env.generateSequence(1L, 10_000_000);
source.getTransformation().setSlotSharingGroup("ssg1");
source.getTransformation().setCoLocationGroupKey("co1");
DataStream<Long> step1 = source.keyBy(v -> v).map(v -> v);
step1.getTransformation().setSlotSharingGroup("ssg2");
step1.getTransformation().setCoLocationGroupKey("co2");
DataStream<Long> step2 = step1.keyBy(v -> v).map(v -> v);
step2.getTransformation().setSlotSharingGroup("ssg3");
step2.getTransformation().setCoLocationGroupKey("co1");
DataStreamSink<Long> result = step2.keyBy(v -> v).addSink(new DiscardingSink<>());
result.getTransformation().setSlotSharingGroup("ssg4");
result.getTransformation().setCoLocationGroupKey("co2");
// get the graph
try {
env.getStreamGraph().getJobGraph();
fail("exception expected");
} catch (IllegalStateException ignored) {
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class StreamExecLegacyTableSourceScan method createConversionTransformationIfNeeded.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> createConversionTransformationIfNeeded(StreamExecutionEnvironment streamExecEnv, ExecNodeConfig config, Transformation<?> sourceTransform, @Nullable RexNode rowtimeExpression) {
final RowType outputType = (RowType) getOutputType();
final Transformation<RowData> transformation;
final int[] fieldIndexes = computeIndexMapping(true);
if (needInternalConversion(fieldIndexes)) {
final String extractElement, resetElement;
if (ScanUtil.hasTimeAttributeField(fieldIndexes)) {
String elementTerm = OperatorCodeGenerator.ELEMENT();
extractElement = String.format("ctx.%s = %s;", elementTerm, elementTerm);
resetElement = String.format("ctx.%s = null;", elementTerm);
} else {
extractElement = "";
resetElement = "";
}
final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig()).setOperatorBaseClass(TableStreamOperator.class);
// the produced type may not carry the correct precision user defined in DDL, because
// it may be converted from legacy type. Fix precision using logical schema from DDL.
// Code generation requires the correct precision of input fields.
final DataType fixedProducedDataType = TableSourceUtil.fixPrecisionForProducedDataType(tableSource, outputType);
transformation = ScanUtil.convertToInternalRow(ctx, (Transformation<Object>) sourceTransform, fieldIndexes, fixedProducedDataType, outputType, qualifiedName, (detailName, simplifyName) -> createFormattedTransformationName(detailName, simplifyName, config), (description) -> createFormattedTransformationDescription(description, config), JavaScalaConversionUtil.toScala(Optional.ofNullable(rowtimeExpression)), extractElement, resetElement);
} else {
transformation = (Transformation<RowData>) sourceTransform;
}
final RelDataType relDataType = FlinkTypeFactory.INSTANCE().buildRelNodeRowType(outputType);
final DataStream<RowData> ingestedTable = new DataStream<>(streamExecEnv, transformation);
final Optional<RowtimeAttributeDescriptor> rowtimeDesc = JavaScalaConversionUtil.toJava(TableSourceUtil.getRowtimeAttributeDescriptor(tableSource, relDataType));
final DataStream<RowData> withWatermarks = rowtimeDesc.map(desc -> {
int rowtimeFieldIdx = relDataType.getFieldNames().indexOf(desc.getAttributeName());
WatermarkStrategy strategy = desc.getWatermarkStrategy();
if (strategy instanceof PeriodicWatermarkAssigner) {
PeriodicWatermarkAssignerWrapper watermarkGenerator = new PeriodicWatermarkAssignerWrapper((PeriodicWatermarkAssigner) strategy, rowtimeFieldIdx);
return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
} else if (strategy instanceof PunctuatedWatermarkAssigner) {
PunctuatedWatermarkAssignerWrapper watermarkGenerator = new PunctuatedWatermarkAssignerWrapper((PunctuatedWatermarkAssigner) strategy, rowtimeFieldIdx, tableSource.getProducedDataType());
return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
} else {
// underlying DataStream.
return ingestedTable;
}
}).orElse(// No need to generate watermarks if no rowtime
ingestedTable);
// attribute is specified.
return withWatermarks.getTransformation();
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class DataStreamBatchExecutionITCase method batchKeyedNonKeyedTwoInputOperator.
/**
* Verifies that all regular input is processed before keyed input.
*
* <p>Here, the first input is keyed while the second input is not keyed.
*/
@Test
public void batchKeyedNonKeyedTwoInputOperator() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> keyedInput = env.fromElements(Tuple2.of("regular2", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 2), Tuple2.of("regular2", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular4", 4), Tuple2.of("regular3", 3), Tuple2.of("regular3", 2), Tuple2.of("regular4", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<String> result = keyedInput.keyBy(in -> in.f0).connect(regularInput).transform("operator", BasicTypeInfo.STRING_TYPE_INFO, new TwoInputIdentityOperator());
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
assertThat(results, equalTo(Arrays.asList("(regular4,4)", "(regular3,3)", "(regular3,2)", "(regular4,1)", "(regular1,2)", "(regular1,3)", "(regular2,1)", "(regular2,4)")));
}
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class DataStreamBatchExecutionITCase method batchMixedKeyedAndNonKeyedTwoInputOperator.
@Test
public void batchMixedKeyedAndNonKeyedTwoInputOperator() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc3", 3), Tuple2.of("bc2", 2), Tuple2.of("bc1", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1)).broadcast();
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular2", 3), Tuple2.of("regular2", 5), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1)).keyBy(input -> input.f0);
TwoInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>, String> twoInputTransformation = new TwoInputTransformation<>(regularInput.getTransformation(), bcInput.getTransformation(), "operator", new TestMixedTwoInputOperator(), BasicTypeInfo.STRING_TYPE_INFO, 1);
twoInputTransformation.setStateKeyType(BasicTypeInfo.STRING_TYPE_INFO);
twoInputTransformation.setStateKeySelectors(input -> input.f0, null);
DataStream<String> result = new DataStream<>(env, twoInputTransformation);
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc3, bc2, bc1]", "(regular1,2): [bc3, bc2, bc1]", "(regular1,3): [bc3, bc2, bc1]", "(regular1,3): [bc3, bc2, bc1]", "(regular1,4): [bc3, bc2, bc1]", "(regular2,3): [bc3, bc2, bc1]", "(regular2,5): [bc3, bc2, bc1]")));
}
}
Aggregations