use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecDataStreamScan method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final Transformation<?> sourceTransform = dataStream.getTransformation();
final Optional<RexNode> rowtimeExpr = getRowtimeExpression(planner.getRelBuilder());
final Transformation<RowData> transformation;
// conversion.
if (rowtimeExpr.isPresent() || ScanUtil.needsConversion(sourceType)) {
final String extractElement, resetElement;
if (ScanUtil.hasTimeAttributeField(fieldIndexes)) {
String elementTerm = OperatorCodeGenerator.ELEMENT();
extractElement = String.format("ctx.%s = %s;", elementTerm, elementTerm);
resetElement = String.format("ctx.%s = null;", elementTerm);
} else {
extractElement = "";
resetElement = "";
}
final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig()).setOperatorBaseClass(TableStreamOperator.class);
transformation = ScanUtil.convertToInternalRow(ctx, (Transformation<Object>) sourceTransform, fieldIndexes, sourceType, (RowType) getOutputType(), qualifiedName, (detailName, simplifyName) -> createFormattedTransformationName(detailName, simplifyName, config), (description) -> createFormattedTransformationDescription(description, config), JavaScalaConversionUtil.toScala(rowtimeExpr), extractElement, resetElement);
} else {
transformation = (Transformation<RowData>) sourceTransform;
}
return transformation;
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class KafkaDynamicTableFactoryTest method assertKafkaSource.
private KafkaSource<?> assertKafkaSource(ScanTableSource.ScanRuntimeProvider provider) {
assertThat(provider).isInstanceOf(DataStreamScanProvider.class);
final DataStreamScanProvider dataStreamScanProvider = (DataStreamScanProvider) provider;
final Transformation<RowData> transformation = dataStreamScanProvider.produceDataStream(n -> Optional.empty(), StreamExecutionEnvironment.createLocalEnvironment()).getTransformation();
assertThat(transformation).isInstanceOf(SourceTransformation.class);
SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState> sourceTransformation = (SourceTransformation<RowData, KafkaPartitionSplit, KafkaSourceEnumState>) transformation;
assertThat(sourceTransformation.getSource()).isInstanceOf(KafkaSource.class);
return (KafkaSource<?>) sourceTransformation.getSource();
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class PythonOperatorChainingOptimizerTest method testChainingNonKeyedOperators.
@Test
public void testChainingNonKeyedOperators() {
PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f2", Types.STRING(), Types.INT());
Transformation<?> sourceTransformation = mock(SourceTransformation.class);
OneInputTransformation<?, ?> processTransformation1 = new OneInputTransformation(sourceTransformation, "Process1", processOperator1, processOperator1.getProducedType(), 2);
Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process2", processOperator2, processOperator2.getProducedType(), 2);
List<Transformation<?>> transformations = new ArrayList<>();
transformations.add(sourceTransformation);
transformations.add(processTransformation1);
transformations.add(processTransformation2);
List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
assertEquals(2, optimized.size());
OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(1);
assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
assertEquals(processOperator2.getProducedType(), chainedTransformation.getOutputType());
OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
assertTrue(chainedOperator instanceof PythonProcessOperator);
validateChainedPythonFunctions(((PythonProcessOperator<?, ?>) chainedOperator).getPythonFunctionInfo(), "f2", "f1");
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class PythonOperatorChainingOptimizerTest method testChainedTransformationPropertiesCorrectlySet.
@Test
public void testChainedTransformationPropertiesCorrectlySet() {
PythonKeyedProcessOperator<?> keyedProcessOperator = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
PythonProcessOperator<?, ?> processOperator = createProcessOperator("f2", Types.STRING(), Types.STRING());
Transformation<?> sourceTransformation = mock(SourceTransformation.class);
OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator, keyedProcessOperator.getProducedType(), 2);
keyedProcessTransformation.setUid("uid");
keyedProcessTransformation.setSlotSharingGroup("group");
keyedProcessTransformation.setCoLocationGroupKey("col");
keyedProcessTransformation.setMaxParallelism(64);
keyedProcessTransformation.declareManagedMemoryUseCaseAtOperatorScope(ManagedMemoryUseCase.OPERATOR, 5);
keyedProcessTransformation.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
keyedProcessTransformation.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.STATE_BACKEND);
keyedProcessTransformation.setBufferTimeout(1000L);
keyedProcessTransformation.setChainingStrategy(ChainingStrategy.HEAD);
Transformation<?> processTransformation = new OneInputTransformation(keyedProcessTransformation, "process", processOperator, processOperator.getProducedType(), 2);
processTransformation.setSlotSharingGroup("group");
processTransformation.declareManagedMemoryUseCaseAtOperatorScope(ManagedMemoryUseCase.OPERATOR, 10);
processTransformation.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
processTransformation.setMaxParallelism(64);
processTransformation.setBufferTimeout(500L);
List<Transformation<?>> transformations = new ArrayList<>();
transformations.add(sourceTransformation);
transformations.add(keyedProcessTransformation);
transformations.add(processTransformation);
List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
assertEquals(2, optimized.size());
OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(1);
assertEquals(2, chainedTransformation.getParallelism());
assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
assertEquals(processOperator.getProducedType(), chainedTransformation.getOutputType());
assertEquals(keyedProcessTransformation.getUid(), chainedTransformation.getUid());
assertEquals("group", chainedTransformation.getSlotSharingGroup().get().getName());
assertEquals("col", chainedTransformation.getCoLocationGroupKey());
assertEquals(64, chainedTransformation.getMaxParallelism());
assertEquals(500L, chainedTransformation.getBufferTimeout());
assertEquals(15, (int) chainedTransformation.getManagedMemoryOperatorScopeUseCaseWeights().getOrDefault(ManagedMemoryUseCase.OPERATOR, 0));
assertEquals(ChainingStrategy.HEAD, chainedTransformation.getOperatorFactory().getChainingStrategy());
assertTrue(chainedTransformation.getManagedMemorySlotScopeUseCases().contains(ManagedMemoryUseCase.PYTHON));
assertTrue(chainedTransformation.getManagedMemorySlotScopeUseCases().contains(ManagedMemoryUseCase.STATE_BACKEND));
OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
assertTrue(chainedOperator instanceof PythonKeyedProcessOperator);
validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator).getPythonFunctionInfo(), "f2", "f1");
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class PythonOperatorChainingOptimizerTest method testChainingTwoInputOperators.
@Test
public void testChainingTwoInputOperators() {
PythonKeyedCoProcessOperator<?> keyedCoProcessOperator1 = createCoKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.STRING()), new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", new RowTypeInfo(Types.INT(), Types.INT()), Types.LONG());
PythonKeyedProcessOperator<?> keyedProcessOperator2 = createKeyedProcessOperator("f4", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
PythonProcessOperator<?, ?> processOperator3 = createProcessOperator("f5", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
Transformation<?> sourceTransformation1 = mock(SourceTransformation.class);
Transformation<?> sourceTransformation2 = mock(SourceTransformation.class);
TwoInputTransformation<?, ?, ?> keyedCoProcessTransformation = new TwoInputTransformation(sourceTransformation1, sourceTransformation2, "keyedCoProcess", keyedCoProcessOperator1, keyedCoProcessOperator1.getProducedType(), 2);
Transformation<?> processTransformation1 = new OneInputTransformation(keyedCoProcessTransformation, "process", processOperator1, processOperator1.getProducedType(), 2);
Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(processTransformation2, "keyedProcess", keyedProcessOperator2, keyedProcessOperator2.getProducedType(), 2);
Transformation<?> processTransformation3 = new OneInputTransformation(keyedProcessTransformation, "process", processOperator3, processOperator3.getProducedType(), 2);
List<Transformation<?>> transformations = new ArrayList<>();
transformations.add(sourceTransformation1);
transformations.add(sourceTransformation2);
transformations.add(keyedCoProcessTransformation);
transformations.add(processTransformation1);
transformations.add(processTransformation2);
transformations.add(keyedProcessTransformation);
transformations.add(processTransformation3);
List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
assertEquals(4, optimized.size());
TwoInputTransformation<?, ?, ?> chainedTransformation1 = (TwoInputTransformation<?, ?, ?>) optimized.get(2);
assertEquals(sourceTransformation1.getOutputType(), chainedTransformation1.getInputType1());
assertEquals(sourceTransformation2.getOutputType(), chainedTransformation1.getInputType2());
assertEquals(processOperator2.getProducedType(), chainedTransformation1.getOutputType());
OneInputTransformation<?, ?> chainedTransformation2 = (OneInputTransformation<?, ?>) optimized.get(3);
assertEquals(processOperator2.getProducedType(), chainedTransformation2.getInputType());
assertEquals(processOperator3.getProducedType(), chainedTransformation2.getOutputType());
TwoInputStreamOperator<?, ?, ?> chainedOperator1 = chainedTransformation1.getOperator();
assertTrue(chainedOperator1 instanceof PythonKeyedCoProcessOperator);
validateChainedPythonFunctions(((PythonKeyedCoProcessOperator<?>) chainedOperator1).getPythonFunctionInfo(), "f3", "f2", "f1");
OneInputStreamOperator<?, ?> chainedOperator2 = chainedTransformation2.getOperator();
assertTrue(chainedOperator2 instanceof PythonKeyedProcessOperator);
validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator2).getPythonFunctionInfo(), "f5", "f4");
}
Aggregations