Search in sources :

Example 51 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testMultipleChainedOperators.

@Test
public void testMultipleChainedOperators() {
    PythonKeyedProcessOperator<?> keyedProcessOperator1 = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", new RowTypeInfo(Types.INT(), Types.INT()), Types.LONG());
    PythonKeyedProcessOperator<?> keyedProcessOperator2 = createKeyedProcessOperator("f4", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator3 = createProcessOperator("f5", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> keyedProcessTransformation1 = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator1, keyedProcessOperator1.getProducedType(), 2);
    Transformation<?> processTransformation1 = new OneInputTransformation(keyedProcessTransformation1, "process", processOperator1, processOperator1.getProducedType(), 2);
    Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
    OneInputTransformation<?, ?> keyedProcessTransformation2 = new OneInputTransformation(processTransformation2, "keyedProcess", keyedProcessOperator2, keyedProcessOperator2.getProducedType(), 2);
    Transformation<?> processTransformation3 = new OneInputTransformation(keyedProcessTransformation2, "process", processOperator3, processOperator3.getProducedType(), 2);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(sourceTransformation);
    transformations.add(keyedProcessTransformation1);
    transformations.add(processTransformation1);
    transformations.add(processTransformation2);
    transformations.add(keyedProcessTransformation2);
    transformations.add(processTransformation3);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(3, optimized.size());
    OneInputTransformation<?, ?> chainedTransformation1 = (OneInputTransformation<?, ?>) optimized.get(1);
    assertEquals(sourceTransformation.getOutputType(), chainedTransformation1.getInputType());
    assertEquals(processOperator2.getProducedType(), chainedTransformation1.getOutputType());
    OneInputTransformation<?, ?> chainedTransformation2 = (OneInputTransformation<?, ?>) optimized.get(2);
    assertEquals(processOperator2.getProducedType(), chainedTransformation2.getInputType());
    assertEquals(processOperator3.getProducedType(), chainedTransformation2.getOutputType());
    OneInputStreamOperator<?, ?> chainedOperator1 = chainedTransformation1.getOperator();
    assertTrue(chainedOperator1 instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator1).getPythonFunctionInfo(), "f3", "f2", "f1");
    OneInputStreamOperator<?, ?> chainedOperator2 = chainedTransformation2.getOperator();
    assertTrue(chainedOperator2 instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator2).getPythonFunctionInfo(), "f5", "f4");
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) PythonKeyedProcessOperator(org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 52 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testChainingUnorderedTransformations.

@Test
public void testChainingUnorderedTransformations() {
    PythonKeyedProcessOperator<?> keyedProcessOperator = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", Types.STRING(), Types.LONG());
    PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", Types.LONG(), Types.INT());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator, keyedProcessOperator.getProducedType(), 2);
    Transformation<?> processTransformation1 = new OneInputTransformation(keyedProcessTransformation, "process", processOperator1, processOperator1.getProducedType(), 2);
    Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(sourceTransformation);
    transformations.add(processTransformation2);
    transformations.add(processTransformation1);
    transformations.add(keyedProcessTransformation);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(2, optimized.size());
    OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(1);
    assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
    assertEquals(processOperator2.getProducedType(), chainedTransformation.getOutputType());
    OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
    assertTrue(chainedOperator instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator).getPythonFunctionInfo(), "f3", "f2", "f1");
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) PythonKeyedProcessOperator(org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 53 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class PythonConfigUtil method configPythonOperator.

@SuppressWarnings("unchecked")
public static void configPythonOperator(StreamExecutionEnvironment env) throws IllegalAccessException, InvocationTargetException, NoSuchFieldException {
    Configuration mergedConfig = getEnvConfigWithDependencies(env);
    Field transformationsField = StreamExecutionEnvironment.class.getDeclaredField("transformations");
    transformationsField.setAccessible(true);
    List<Transformation<?>> transformations = (List<Transformation<?>>) transformationsField.get(env);
    for (Transformation<?> transformation : transformations) {
        alignTransformation(transformation);
        if (isPythonOperator(transformation)) {
            // declare it is a Python operator
            transformation.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
            AbstractPythonFunctionOperator<?> pythonFunctionOperator = getPythonOperator(transformation);
            if (pythonFunctionOperator != null) {
                Configuration oldConfig = pythonFunctionOperator.getConfiguration();
                // update dependency related configurations for Python operators
                pythonFunctionOperator.setConfiguration(generateNewPythonConfig(oldConfig, mergedConfig));
            }
        }
    }
}
Also used : Field(java.lang.reflect.Field) AbstractMultipleInputTransformation(org.apache.flink.streaming.api.transformations.AbstractMultipleInputTransformation) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) Configuration(org.apache.flink.configuration.Configuration) List(java.util.List)

Example 54 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class PythonOperatorChainingOptimizer method replaceInput.

private static void replaceInput(Transformation<?> transformation, Transformation<?> oldInput, Transformation<?> newInput) {
    try {
        if (transformation instanceof OneInputTransformation || transformation instanceof FeedbackTransformation || transformation instanceof SideOutputTransformation || transformation instanceof ReduceTransformation || transformation instanceof SinkTransformation || transformation instanceof LegacySinkTransformation || transformation instanceof TimestampsAndWatermarksTransformation || transformation instanceof PartitionTransformation) {
            final Field inputField = transformation.getClass().getDeclaredField("input");
            inputField.setAccessible(true);
            inputField.set(transformation, newInput);
        } else if (transformation instanceof TwoInputTransformation) {
            final Field inputField;
            if (((TwoInputTransformation<?, ?, ?>) transformation).getInput1() == oldInput) {
                inputField = transformation.getClass().getDeclaredField("input1");
            } else {
                inputField = transformation.getClass().getDeclaredField("input2");
            }
            inputField.setAccessible(true);
            inputField.set(transformation, newInput);
        } else if (transformation instanceof UnionTransformation || transformation instanceof AbstractMultipleInputTransformation) {
            final Field inputsField = transformation.getClass().getDeclaredField("inputs");
            inputsField.setAccessible(true);
            List<Transformation<?>> newInputs = Lists.newArrayList();
            newInputs.addAll(transformation.getInputs());
            newInputs.remove(oldInput);
            newInputs.add(newInput);
            inputsField.set(transformation, newInputs);
        } else if (transformation instanceof AbstractBroadcastStateTransformation) {
            final Field inputField;
            if (((AbstractBroadcastStateTransformation<?, ?, ?>) transformation).getRegularInput() == oldInput) {
                inputField = transformation.getClass().getDeclaredField("regularInput");
            } else {
                inputField = transformation.getClass().getDeclaredField("broadcastInput");
            }
            inputField.setAccessible(true);
            inputField.set(transformation, newInput);
        } else {
            throw new RuntimeException("Unsupported transformation: " + transformation);
        }
    } catch (NoSuchFieldException | IllegalAccessException e) {
        // This should never happen
        throw new RuntimeException(e);
    }
}
Also used : FeedbackTransformation(org.apache.flink.streaming.api.transformations.FeedbackTransformation) ReduceTransformation(org.apache.flink.streaming.api.transformations.ReduceTransformation) TimestampsAndWatermarksTransformation(org.apache.flink.streaming.api.transformations.TimestampsAndWatermarksTransformation) AbstractMultipleInputTransformation(org.apache.flink.streaming.api.transformations.AbstractMultipleInputTransformation) PhysicalTransformation(org.apache.flink.streaming.api.transformations.PhysicalTransformation) SinkTransformation(org.apache.flink.streaming.api.transformations.SinkTransformation) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) UnionTransformation(org.apache.flink.streaming.api.transformations.UnionTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) SideOutputTransformation(org.apache.flink.streaming.api.transformations.SideOutputTransformation) LegacySinkTransformation(org.apache.flink.streaming.api.transformations.LegacySinkTransformation) Transformation(org.apache.flink.api.dag.Transformation) AbstractBroadcastStateTransformation(org.apache.flink.streaming.api.transformations.AbstractBroadcastStateTransformation) AbstractMultipleInputTransformation(org.apache.flink.streaming.api.transformations.AbstractMultipleInputTransformation) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) SideOutputTransformation(org.apache.flink.streaming.api.transformations.SideOutputTransformation) Field(java.lang.reflect.Field) TimestampsAndWatermarksTransformation(org.apache.flink.streaming.api.transformations.TimestampsAndWatermarksTransformation) AbstractBroadcastStateTransformation(org.apache.flink.streaming.api.transformations.AbstractBroadcastStateTransformation) LegacySinkTransformation(org.apache.flink.streaming.api.transformations.LegacySinkTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) UnionTransformation(org.apache.flink.streaming.api.transformations.UnionTransformation) SinkTransformation(org.apache.flink.streaming.api.transformations.SinkTransformation) LegacySinkTransformation(org.apache.flink.streaming.api.transformations.LegacySinkTransformation) ReduceTransformation(org.apache.flink.streaming.api.transformations.ReduceTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) FeedbackTransformation(org.apache.flink.streaming.api.transformations.FeedbackTransformation)

Example 55 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class AbstractOneInputTransformationTranslator method translateInternal.

protected Collection<Integer> translateInternal(final Transformation<OUT> transformation, final StreamOperatorFactory<OUT> operatorFactory, final TypeInformation<IN> inputType, @Nullable final KeySelector<IN, ?> stateKeySelector, @Nullable final TypeInformation<?> stateKeyType, final Context context) {
    checkNotNull(transformation);
    checkNotNull(operatorFactory);
    checkNotNull(inputType);
    checkNotNull(context);
    final StreamGraph streamGraph = context.getStreamGraph();
    final String slotSharingGroup = context.getSlotSharingGroup();
    final int transformationId = transformation.getId();
    final ExecutionConfig executionConfig = streamGraph.getExecutionConfig();
    streamGraph.addOperator(transformationId, slotSharingGroup, transformation.getCoLocationGroupKey(), operatorFactory, inputType, transformation.getOutputType(), transformation.getName());
    if (stateKeySelector != null) {
        TypeSerializer<?> keySerializer = stateKeyType.createSerializer(executionConfig);
        streamGraph.setOneInputStateKey(transformationId, stateKeySelector, keySerializer);
    }
    int parallelism = transformation.getParallelism() != ExecutionConfig.PARALLELISM_DEFAULT ? transformation.getParallelism() : executionConfig.getParallelism();
    streamGraph.setParallelism(transformationId, parallelism);
    streamGraph.setMaxParallelism(transformationId, transformation.getMaxParallelism());
    final List<Transformation<?>> parentTransformations = transformation.getInputs();
    checkState(parentTransformations.size() == 1, "Expected exactly one input transformation but found " + parentTransformations.size());
    for (Integer inputId : context.getStreamNodeIds(parentTransformations.get(0))) {
        streamGraph.addEdge(inputId, transformationId, 0);
    }
    return Collections.singleton(transformationId);
}
Also used : Transformation(org.apache.flink.api.dag.Transformation) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig)

Aggregations

Transformation (org.apache.flink.api.dag.Transformation)98 RowData (org.apache.flink.table.data.RowData)69 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)53 RowType (org.apache.flink.table.types.logical.RowType)50 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)45 TableException (org.apache.flink.table.api.TableException)28 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)28 ArrayList (java.util.ArrayList)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)21 Configuration (org.apache.flink.configuration.Configuration)19 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)18 List (java.util.List)17 PartitionTransformation (org.apache.flink.streaming.api.transformations.PartitionTransformation)17 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)17 LogicalType (org.apache.flink.table.types.logical.LogicalType)16 Test (org.junit.Test)16 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)13 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)13 Arrays (java.util.Arrays)11 Collections (java.util.Collections)10