Examples with PythonAggregateFunctionInfo - org.apache.flink.table.functions.python.PythonAggregateFunctionInfo

Example 1 with PythonAggregateFunctionInfo

use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.

the class CommonPythonUtil method extractPythonAggregateFunctionInfosFromAggregateCall.

public static Tuple2<int[], PythonFunctionInfo[]> extractPythonAggregateFunctionInfosFromAggregateCall(AggregateCall[] aggCalls) {
    Map<Integer, Integer> inputNodes = new LinkedHashMap<>();
    List<PythonFunctionInfo> pythonFunctionInfos = new ArrayList<>();
    for (AggregateCall aggregateCall : aggCalls) {
        List<Integer> inputs = new ArrayList<>();
        List<Integer> argList = aggregateCall.getArgList();
        for (Integer arg : argList) {
            if (inputNodes.containsKey(arg)) {
                inputs.add(inputNodes.get(arg));
            } else {
                Integer inputOffset = inputNodes.size();
                inputs.add(inputOffset);
                inputNodes.put(arg, inputOffset);
            }
        }
        PythonFunction pythonFunction = null;
        SqlAggFunction aggregateFunction = aggregateCall.getAggregation();
        if (aggregateFunction instanceof AggSqlFunction) {
            pythonFunction = (PythonFunction) ((AggSqlFunction) aggregateFunction).aggregateFunction();
        } else if (aggregateFunction instanceof BridgingSqlAggFunction) {
            pythonFunction = (PythonFunction) ((BridgingSqlAggFunction) aggregateFunction).getDefinition();
        }
        PythonFunctionInfo pythonFunctionInfo = new PythonAggregateFunctionInfo(pythonFunction, inputs.toArray(), aggregateCall.filterArg, aggregateCall.isDistinct());
        pythonFunctionInfos.add(pythonFunctionInfo);
    }
    int[] udafInputOffsets = inputNodes.keySet().stream().mapToInt(i -> i).toArray();
    return Tuple2.of(udafInputOffsets, pythonFunctionInfos.toArray(new PythonFunctionInfo[0]));
}

Also used : TypeInference(org.apache.flink.table.types.inference.TypeInference) MapView(org.apache.flink.table.api.dataview.MapView) SumAggFunction(org.apache.flink.table.planner.functions.aggfunctions.SumAggFunction) DataType(org.apache.flink.table.types.DataType) Sum0AggFunction(org.apache.flink.table.planner.functions.aggfunctions.Sum0AggFunction) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) StructuredType(org.apache.flink.table.types.logical.StructuredType) LastValueWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.LastValueWithRetractAggFunction) ListViewSpec(org.apache.flink.table.runtime.dataview.ListViewSpec) CountAggFunction(org.apache.flink.table.planner.functions.aggfunctions.CountAggFunction) BigDecimal(java.math.BigDecimal) PythonFunction(org.apache.flink.table.functions.python.PythonFunction) RexNode(org.apache.calcite.rex.RexNode) MaxAggFunction(org.apache.flink.table.planner.functions.aggfunctions.MaxAggFunction) Map(java.util.Map) FirstValueAggFunction(org.apache.flink.table.runtime.functions.aggregate.FirstValueAggFunction) Method(java.lang.reflect.Method) TableSqlFunction(org.apache.flink.table.planner.functions.utils.TableSqlFunction) BuiltInPythonAggregateFunction(org.apache.flink.table.functions.python.BuiltInPythonAggregateFunction) TableConfig(org.apache.flink.table.api.TableConfig) ListAggWsWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.ListAggWsWithRetractAggFunction) RexLiteral(org.apache.calcite.rex.RexLiteral) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) MapViewSpec(org.apache.flink.table.runtime.dataview.MapViewSpec) UserDefinedFunction(org.apache.flink.table.functions.UserDefinedFunction) Count1AggFunction(org.apache.flink.table.planner.functions.aggfunctions.Count1AggFunction) FirstValueWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.FirstValueWithRetractAggFunction) InvocationTargetException(java.lang.reflect.InvocationTargetException) Objects(java.util.Objects) AggSqlFunction(org.apache.flink.table.planner.functions.utils.AggSqlFunction) List(java.util.List) ListAggFunction(org.apache.flink.table.planner.functions.aggfunctions.ListAggFunction) BridgingSqlAggFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlAggFunction) LogicalType(org.apache.flink.table.types.logical.LogicalType) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RexCall(org.apache.calcite.rex.RexCall) IntStream(java.util.stream.IntStream) MinAggFunction(org.apache.flink.table.planner.functions.aggfunctions.MinAggFunction) ListAggWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.ListAggWithRetractAggFunction) DummyStreamExecutionEnvironment(org.apache.flink.table.planner.utils.DummyStreamExecutionEnvironment) MinWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.MinWithRetractAggFunction) RowType(org.apache.flink.table.types.logical.RowType) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) DataView(org.apache.flink.table.api.dataview.DataView) FieldsDataType(org.apache.flink.table.types.FieldsDataType) ConfigOption(org.apache.flink.configuration.ConfigOption) SqlOperator(org.apache.calcite.sql.SqlOperator) ListView(org.apache.flink.table.api.dataview.ListView) AggregateInfo(org.apache.flink.table.planner.plan.utils.AggregateInfo) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) FunctionDefinition(org.apache.flink.table.functions.FunctionDefinition) AvgAggFunction(org.apache.flink.table.planner.functions.aggfunctions.AvgAggFunction) MaxWithRetractAggFunction(org.apache.flink.table.runtime.functions.aggregate.MaxWithRetractAggFunction) Configuration(org.apache.flink.configuration.Configuration) TableException(org.apache.flink.table.api.TableException) SumWithRetractAggFunction(org.apache.flink.table.planner.functions.aggfunctions.SumWithRetractAggFunction) PythonFunctionInfo(org.apache.flink.table.functions.python.PythonFunctionInfo) Field(java.lang.reflect.Field) LastValueAggFunction(org.apache.flink.table.runtime.functions.aggregate.LastValueAggFunction) BridgingSqlFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlFunction) ScalarSqlFunction(org.apache.flink.table.planner.functions.utils.ScalarSqlFunction) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) PythonFunctionInfo(org.apache.flink.table.functions.python.PythonFunctionInfo) ArrayList(java.util.ArrayList) BridgingSqlAggFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlAggFunction) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) BridgingSqlAggFunction(org.apache.flink.table.planner.functions.bridging.BridgingSqlAggFunction) LinkedHashMap(java.util.LinkedHashMap) AggregateCall(org.apache.calcite.rel.core.AggregateCall) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) PythonFunction(org.apache.flink.table.functions.python.PythonFunction) AggSqlFunction(org.apache.flink.table.planner.functions.utils.AggSqlFunction)

Example 2 with PythonAggregateFunctionInfo

use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.

the class PythonStreamGroupWindowAggregateOperatorTest method getTestOperator.

@Override
OneInputStreamOperator getTestOperator(Configuration config) {
    long size = 10000L;
    long slide = 5000L;
    SlidingWindowAssigner windowAssigner = SlidingWindowAssigner.of(Duration.ofMillis(size), Duration.ofMillis(slide)).withEventTime();
    WindowReference windowRef = new WindowReference("w$", new TimestampType(3));
    return new PassThroughPythonStreamGroupWindowAggregateOperator(config, getInputType(), getOutputType(), new PythonAggregateFunctionInfo[] { new PythonAggregateFunctionInfo(PythonScalarFunctionOperatorTestBase.DummyPythonFunction.INSTANCE, new Integer[] { 2 }, -1, false) }, getGrouping(), -1, false, false, 3, windowAssigner, FlinkFnApi.GroupWindow.WindowType.SLIDING_GROUP_WINDOW, true, true, size, slide, 0L, 0L, new NamedWindowProperty[] { new NamedWindowProperty("start", new WindowStart(null)), new NamedWindowProperty("end", new WindowEnd(null)) }, UTC_ZONE_ID);
}

Also used : NamedWindowProperty(org.apache.flink.table.runtime.groupwindow.NamedWindowProperty) SlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SlidingWindowAssigner) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) WindowStart(org.apache.flink.table.runtime.groupwindow.WindowStart) TimestampType(org.apache.flink.table.types.logical.TimestampType) WindowEnd(org.apache.flink.table.runtime.groupwindow.WindowEnd) WindowReference(org.apache.flink.table.runtime.groupwindow.WindowReference)

Example 3 with PythonAggregateFunctionInfo

use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.

the class StreamExecPythonGroupAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    final int inputCountIndex = aggInfoList.getIndexOfCountStar();
    final boolean countStarInserted = aggInfoList.countStarInserted();
    Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
    PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
    DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    final OneInputStreamOperator<RowData, RowData> operator = getPythonAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), inputCountIndex, countStarInserted);
    // partitioned aggregation
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}

Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector)

Example 4 with PythonAggregateFunctionInfo

use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.

the class StreamExecPythonGroupWindowAggregate method getGeneralPythonStreamGroupWindowAggregateFunctionOperator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private OneInputStreamOperator<RowData, RowData> getGeneralPythonStreamGroupWindowAggregateFunctionOperator(Configuration config, RowType inputType, RowType outputType, WindowAssigner<?> windowAssigner, PythonAggregateFunctionInfo[] aggregateFunctions, DataViewSpec[][] dataViewSpecs, int inputTimeFieldIndex, int indexOfCountStar, boolean generateUpdateBefore, boolean countStarInserted, long allowance, ZoneId shiftTimeZone) {
    Class clazz = CommonPythonUtil.loadClass(GENERAL_STREAM_PYTHON_GROUP_WINDOW_AGGREGATE_FUNCTION_OPERATOR_NAME);
    boolean isRowTime = AggregateUtil.isRowtimeAttribute(window.timeAttribute());
    try {
        if (window instanceof TumblingGroupWindow) {
            ValueLiteralExpression size = ((TumblingGroupWindow) window).size();
            Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_TUMBLING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
            return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
        } else if (window instanceof SlidingGroupWindow) {
            ValueLiteralExpression size = ((SlidingGroupWindow) window).size();
            ValueLiteralExpression slide = ((SlidingGroupWindow) window).slide();
            Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SLIDING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
            return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), AggregateUtil.toDuration(slide).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
        } else if (window instanceof SessionGroupWindow) {
            ValueLiteralExpression gap = ((SessionGroupWindow) window).gap();
            Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SESSION_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
            return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.toDuration(gap).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
        }
    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
        throw new TableException("Python PythonStreamGroupWindowAggregateOperator constructed failed.", e);
    }
    throw new RuntimeException(String.format("Unsupported LogicWindow Type %s", window));
}

Also used : NamedWindowProperty(org.apache.flink.table.runtime.groupwindow.NamedWindowProperty) TableException(org.apache.flink.table.api.TableException) ValueLiteralExpression(org.apache.flink.table.expressions.ValueLiteralExpression) Configuration(org.apache.flink.configuration.Configuration) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) ZoneId(java.time.ZoneId) RowType(org.apache.flink.table.types.logical.RowType) Method(java.lang.reflect.Method) InvocationTargetException(java.lang.reflect.InvocationTargetException) SlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SlidingWindowAssigner) CountTumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountTumblingWindowAssigner) WindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.WindowAssigner) TumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.TumblingWindowAssigner) CountSlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountSlidingWindowAssigner) SessionWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SessionWindowAssigner) RowData(org.apache.flink.table.data.RowData) TumblingGroupWindow(org.apache.flink.table.planner.plan.logical.TumblingGroupWindow) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) SlidingGroupWindow(org.apache.flink.table.planner.plan.logical.SlidingGroupWindow) SessionGroupWindow(org.apache.flink.table.planner.plan.logical.SessionGroupWindow)

Example 5 with PythonAggregateFunctionInfo

use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.

the class StreamExecPythonGroupTableAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    int inputCountIndex = aggInfoList.getIndexOfCountStar();
    Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
    PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
    DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonTableAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), generateUpdateBefore, inputCountIndex);
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}

Aggregations

PythonAggregateFunctionInfo (org.apache.flink.table.functions.python.PythonAggregateFunctionInfo)6 Configuration (org.apache.flink.configuration.Configuration)4 DataViewSpec (org.apache.flink.table.runtime.dataview.DataViewSpec)4 RowData (org.apache.flink.table.data.RowData)3 RowType (org.apache.flink.table.types.logical.RowType)3 InvocationTargetException (java.lang.reflect.InvocationTargetException)2 Method (java.lang.reflect.Method)2 ArrayList (java.util.ArrayList)2 Transformation (org.apache.flink.api.dag.Transformation)2 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)2 TableException (org.apache.flink.table.api.TableException)2 UserDefinedFunction (org.apache.flink.table.functions.UserDefinedFunction)2 PythonFunction (org.apache.flink.table.functions.python.PythonFunction)2 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)2 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)2 NamedWindowProperty (org.apache.flink.table.runtime.groupwindow.NamedWindowProperty)2 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)2 SlidingWindowAssigner (org.apache.flink.table.runtime.operators.window.assigners.SlidingWindowAssigner)2 Field (java.lang.reflect.Field)1 BigDecimal (java.math.BigDecimal)1