Search in sources :

Example 1 with PythonFunction

use of org.apache.flink.table.functions.python.PythonFunction in project flink by apache.

the class CommonPythonUtil method extractPythonAggregateFunctionInfosFromAggregateCall.

public static Tuple2<int[], PythonFunctionInfo[]> extractPythonAggregateFunctionInfosFromAggregateCall(AggregateCall[] aggCalls) {
    Map<Integer, Integer> inputNodes = new LinkedHashMap<>();
    List<PythonFunctionInfo> pythonFunctionInfos = new ArrayList<>();
    for (AggregateCall aggregateCall : aggCalls) {
        List<Integer> inputs = new ArrayList<>();
        List<Integer> argList = aggregateCall.getArgList();
        for (Integer arg : argList) {
            if (inputNodes.containsKey(arg)) {
                inputs.add(inputNodes.get(arg));
            } else {
                Integer inputOffset = inputNodes.size();
                inputs.add(inputOffset);
                inputNodes.put(arg, inputOffset);
            }
        }
        PythonFunction pythonFunction = null;
        SqlAggFunction aggregateFunction = aggregateCall.getAggregation();
        if (aggregateFunction instanceof AggSqlFunction) {
            pythonFunction = (PythonFunction) ((AggSqlFunction) aggregateFunction).aggregateFunction();
        } else if (aggregateFunction instanceof BridgingSqlAggFunction) {
            pythonFunction = (PythonFunction) ((BridgingSqlAggFunction) aggregateFunction).getDefinition();
        }
        PythonFunctionInfo pythonFunctionInfo = new PythonAggregateFunctionInfo(pythonFunction, inputs.toArray(), aggregateCall.filterArg, aggregateCall.isDistinct());
        pythonFunctionInfos.add(pythonFunctionInfo);
    }
    int[] udafInputOffsets = inputNodes.keySet().stream().mapToInt(i -> i).toArray();
    return Tuple2.of(udafInputOffsets, pythonFunctionInfos.toArray(new PythonFunctionInfo[0]));
}
Also used : TypeInference(org.apache.flink.table.types.inference.TypeInference) MapView(org.apache.flink.table.api.dataview.MapView) SumAggFunction(org.apache.flink.table.planner.functions.aggfunctions.SumAggFunction) DataType(org.apache.flink.table.types.DataType) Sum0AggFunction(org.apache.flink.table.planner.functions.aggfunctions.Sum0AggFunction) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) StructuredType(org.apache.flink.table.types.logical.StructuredType) LastValueWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.LastValueWithRetractAggFunction) ListViewSpec(org.apache.flink.table.runtime.dataview.ListViewSpec) CountAggFunction(org.apache.flink.table.planner.functions.aggfunctions.CountAggFunction) BigDecimal(java.math.BigDecimal) PythonFunction(org.apache.flink.table.functions.python.PythonFunction) RexNode(org.apache.calcite.rex.RexNode) MaxAggFunction(org.apache.flink.table.planner.functions.aggfunctions.MaxAggFunction) Map(java.util.Map) FirstValueAggFunction(org.apache.flink.table.runtime.functions.aggregate.FirstValueAggFunction) Method(java.lang.reflect.Method) TableSqlFunction(org.apache.flink.table.planner.functions.utils.TableSqlFunction) BuiltInPythonAggregateFunction(org.apache.flink.table.functions.python.BuiltInPythonAggregateFunction) TableConfig(org.apache.flink.table.api.TableConfig) ListAggWsWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.ListAggWsWithRetractAggFunction) RexLiteral(org.apache.calcite.rex.RexLiteral) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) MapViewSpec(org.apache.flink.table.runtime.dataview.MapViewSpec) UserDefinedFunction(org.apache.flink.table.functions.UserDefinedFunction) Count1AggFunction(org.apache.flink.table.planner.functions.aggfunctions.Count1AggFunction) FirstValueWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.FirstValueWithRetractAggFunction) InvocationTargetException(java.lang.reflect.InvocationTargetException) Objects(java.util.Objects) AggSqlFunction(org.apache.flink.table.planner.functions.utils.AggSqlFunction) List(java.util.List) ListAggFunction(org.apache.flink.table.planner.functions.aggfunctions.ListAggFunction) BridgingSqlAggFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlAggFunction) LogicalType(org.apache.flink.table.types.logical.LogicalType) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RexCall(org.apache.calcite.rex.RexCall) IntStream(java.util.stream.IntStream) MinAggFunction(org.apache.flink.table.planner.functions.aggfunctions.MinAggFunction) ListAggWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.ListAggWithRetractAggFunction) DummyStreamExecutionEnvironment(org.apache.flink.table.planner.utils.DummyStreamExecutionEnvironment) MinWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.MinWithRetractAggFunction) RowType(org.apache.flink.table.types.logical.RowType) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) DataView(org.apache.flink.table.api.dataview.DataView) FieldsDataType(org.apache.flink.table.types.FieldsDataType) ConfigOption(org.apache.flink.configuration.ConfigOption) SqlOperator(org.apache.calcite.sql.SqlOperator) ListView(org.apache.flink.table.api.dataview.ListView) AggregateInfo(org.apache.flink.table.planner.plan.utils.AggregateInfo) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) FunctionDefinition(org.apache.flink.table.functions.FunctionDefinition) AvgAggFunction(org.apache.flink.table.planner.functions.aggfunctions.AvgAggFunction) MaxWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.MaxWithRetractAggFunction) Configuration(org.apache.flink.configuration.Configuration) TableException(org.apache.flink.table.api.TableException) SumWithRetractAggFunction(org.apache.flink.table.planner.functions.aggfunctions.SumWithRetractAggFunction) PythonFunctionInfo(org.apache.flink.table.functions.python.PythonFunctionInfo) Field(java.lang.reflect.Field) LastValueAggFunction(org.apache.flink.table.runtime.functions.aggregate.LastValueAggFunction) BridgingSqlFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlFunction) ScalarSqlFunction(org.apache.flink.table.planner.functions.utils.ScalarSqlFunction) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) PythonFunctionInfo(org.apache.flink.table.functions.python.PythonFunctionInfo) ArrayList(java.util.ArrayList) BridgingSqlAggFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlAggFunction) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) BridgingSqlAggFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlAggFunction) LinkedHashMap(java.util.LinkedHashMap) AggregateCall(org.apache.calcite.rel.core.AggregateCall) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) PythonFunction(org.apache.flink.table.functions.python.PythonFunction) AggSqlFunction(org.apache.flink.table.planner.functions.utils.AggSqlFunction)

Example 2 with PythonFunction

use of org.apache.flink.table.functions.python.PythonFunction in project flink by apache.

the class CommonPythonUtil method extractPythonAggregateFunctionInfos.

public static Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> extractPythonAggregateFunctionInfos(AggregateInfoList pythonAggregateInfoList, AggregateCall[] aggCalls) {
    List<PythonAggregateFunctionInfo> pythonAggregateFunctionInfoList = new ArrayList<>();
    List<DataViewSpec[]> dataViewSpecList = new ArrayList<>();
    AggregateInfo[] aggInfos = pythonAggregateInfoList.aggInfos();
    for (int i = 0; i < aggInfos.length; i++) {
        AggregateInfo aggInfo = aggInfos[i];
        UserDefinedFunction function = aggInfo.function();
        if (function instanceof PythonFunction) {
            pythonAggregateFunctionInfoList.add(new PythonAggregateFunctionInfo((PythonFunction) function, Arrays.stream(aggInfo.argIndexes()).boxed().toArray(), aggCalls[i].filterArg, aggCalls[i].isDistinct()));
            TypeInference typeInference = function.getTypeInference(null);
            dataViewSpecList.add(extractDataViewSpecs(i, typeInference.getAccumulatorTypeStrategy().get().inferType(null).get()));
        } else {
            int filterArg = -1;
            boolean distinct = false;
            if (i < aggCalls.length) {
                filterArg = aggCalls[i].filterArg;
                distinct = aggCalls[i].isDistinct();
            }
            pythonAggregateFunctionInfoList.add(new PythonAggregateFunctionInfo(getBuiltInPythonAggregateFunction(function), Arrays.stream(aggInfo.argIndexes()).boxed().toArray(), filterArg, distinct));
            // The data views of the built in Python Aggregate Function are different from Java
            // side, we will create the spec at Python side.
            dataViewSpecList.add(new DataViewSpec[0]);
        }
    }
    return Tuple2.of(pythonAggregateFunctionInfoList.toArray(new PythonAggregateFunctionInfo[0]), dataViewSpecList.toArray(new DataViewSpec[0][0]));
}
Also used : TypeInference(org.apache.flink.table.types.inference.TypeInference) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) UserDefinedFunction(org.apache.flink.table.functions.UserDefinedFunction) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) ArrayList(java.util.ArrayList) PythonFunction(org.apache.flink.table.functions.python.PythonFunction) AggregateInfo(org.apache.flink.table.planner.plan.utils.AggregateInfo)

Aggregations

ArrayList (java.util.ArrayList)2 UserDefinedFunction (org.apache.flink.table.functions.UserDefinedFunction)2 PythonAggregateFunctionInfo (org.apache.flink.table.functions.python.PythonAggregateFunctionInfo)2 PythonFunction (org.apache.flink.table.functions.python.PythonFunction)2 Field (java.lang.reflect.Field)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 BigDecimal (java.math.BigDecimal)1 Arrays (java.util.Arrays)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 IntStream (java.util.stream.IntStream)1 AggregateCall (org.apache.calcite.rel.core.AggregateCall)1 RexCall (org.apache.calcite.rex.RexCall)1 RexLiteral (org.apache.calcite.rex.RexLiteral)1 RexNode (org.apache.calcite.rex.RexNode)1 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)1 SqlOperator (org.apache.calcite.sql.SqlOperator)1