Search in sources :

Example 1 with DataViewSpec

use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.

the class DataViewUtils method extractDataViews.

/**
 * Searches for data views in the data type of an accumulator and extracts them.
 */
public static List<DataViewSpec> extractDataViews(int aggIndex, DataType accumulatorDataType) {
    final LogicalType accumulatorType = accumulatorDataType.getLogicalType();
    if (!accumulatorType.is(ROW) && !accumulatorType.is(STRUCTURED_TYPE)) {
        return Collections.emptyList();
    }
    final List<String> fieldNames = getFieldNames(accumulatorType);
    final List<DataType> fieldDataTypes = accumulatorDataType.getChildren();
    final List<DataViewSpec> specs = new ArrayList<>();
    for (int fieldIndex = 0; fieldIndex < fieldDataTypes.size(); fieldIndex++) {
        final DataType fieldDataType = fieldDataTypes.get(fieldIndex);
        final LogicalType fieldType = fieldDataType.getLogicalType();
        if (isDataView(fieldType, ListView.class)) {
            specs.add(new ListViewSpec(createStateId(aggIndex, fieldNames.get(fieldIndex)), fieldIndex, fieldDataType.getChildren().get(0)));
        } else if (isDataView(fieldType, MapView.class)) {
            specs.add(new MapViewSpec(createStateId(aggIndex, fieldNames.get(fieldIndex)), fieldIndex, fieldDataType.getChildren().get(0), false));
        }
        if (fieldType.getChildren().stream().anyMatch(c -> hasNested(c, t -> isDataView(t, DataView.class)))) {
            throw new TableException("Data views are only supported in the first level of a composite accumulator type.");
        }
    }
    return specs;
}
Also used : MapView(org.apache.flink.table.api.dataview.MapView) DataType(org.apache.flink.table.types.DataType) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) StructuredType(org.apache.flink.table.types.logical.StructuredType) ListViewSpec(org.apache.flink.table.runtime.dataview.ListViewSpec) Function(java.util.function.Function) ArrayList(java.util.ArrayList) RawType(org.apache.flink.table.types.logical.RawType) DataView(org.apache.flink.table.api.dataview.DataView) LogicalTypeChecks.hasNested(org.apache.flink.table.types.logical.utils.LogicalTypeChecks.hasNested) ListView(org.apache.flink.table.api.dataview.ListView) LazyBinaryFormat(org.apache.flink.table.data.binary.LazyBinaryFormat) ROW(org.apache.flink.table.types.logical.LogicalTypeRoot.ROW) DataTypeUtils(org.apache.flink.table.types.utils.DataTypeUtils) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) NullSerializer(org.apache.flink.table.dataview.NullSerializer) LogicalTypeChecks.getFieldNames(org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getFieldNames) TableException(org.apache.flink.table.api.TableException) MapViewSpec(org.apache.flink.table.runtime.dataview.MapViewSpec) DataTypes(org.apache.flink.table.api.DataTypes) TypeTransformation(org.apache.flink.table.types.inference.TypeTransformation) List(java.util.List) STRUCTURED_TYPE(org.apache.flink.table.types.logical.LogicalTypeRoot.STRUCTURED_TYPE) LogicalType(org.apache.flink.table.types.logical.LogicalType) Internal(org.apache.flink.annotation.Internal) ExternalSerializer(org.apache.flink.table.runtime.typeutils.ExternalSerializer) Collections(java.util.Collections) TableException(org.apache.flink.table.api.TableException) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) ArrayList(java.util.ArrayList) LogicalType(org.apache.flink.table.types.logical.LogicalType) MapViewSpec(org.apache.flink.table.runtime.dataview.MapViewSpec) ListViewSpec(org.apache.flink.table.runtime.dataview.ListViewSpec) DataType(org.apache.flink.table.types.DataType) MapView(org.apache.flink.table.api.dataview.MapView)

Example 2 with DataViewSpec

use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.

the class StreamExecPythonGroupAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    final int inputCountIndex = aggInfoList.getIndexOfCountStar();
    final boolean countStarInserted = aggInfoList.countStarInserted();
    Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
    PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
    DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    final OneInputStreamOperator<RowData, RowData> operator = getPythonAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), inputCountIndex, countStarInserted);
    // partitioned aggregation
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector)

Example 3 with DataViewSpec

use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.

the class StreamExecPythonGroupWindowAggregate method getGeneralPythonStreamGroupWindowAggregateFunctionOperator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private OneInputStreamOperator<RowData, RowData> getGeneralPythonStreamGroupWindowAggregateFunctionOperator(Configuration config, RowType inputType, RowType outputType, WindowAssigner<?> windowAssigner, PythonAggregateFunctionInfo[] aggregateFunctions, DataViewSpec[][] dataViewSpecs, int inputTimeFieldIndex, int indexOfCountStar, boolean generateUpdateBefore, boolean countStarInserted, long allowance, ZoneId shiftTimeZone) {
    Class clazz = CommonPythonUtil.loadClass(GENERAL_STREAM_PYTHON_GROUP_WINDOW_AGGREGATE_FUNCTION_OPERATOR_NAME);
    boolean isRowTime = AggregateUtil.isRowtimeAttribute(window.timeAttribute());
    try {
        if (window instanceof TumblingGroupWindow) {
            ValueLiteralExpression size = ((TumblingGroupWindow) window).size();
            Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_TUMBLING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
            return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
        } else if (window instanceof SlidingGroupWindow) {
            ValueLiteralExpression size = ((SlidingGroupWindow) window).size();
            ValueLiteralExpression slide = ((SlidingGroupWindow) window).slide();
            Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SLIDING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
            return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), AggregateUtil.toDuration(slide).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
        } else if (window instanceof SessionGroupWindow) {
            ValueLiteralExpression gap = ((SessionGroupWindow) window).gap();
            Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SESSION_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
            return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.toDuration(gap).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
        }
    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
        throw new TableException("Python PythonStreamGroupWindowAggregateOperator constructed failed.", e);
    }
    throw new RuntimeException(String.format("Unsupported LogicWindow Type %s", window));
}
Also used : NamedWindowProperty(org.apache.flink.table.runtime.groupwindow.NamedWindowProperty) TableException(org.apache.flink.table.api.TableException) ValueLiteralExpression(org.apache.flink.table.expressions.ValueLiteralExpression) Configuration(org.apache.flink.configuration.Configuration) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) ZoneId(java.time.ZoneId) RowType(org.apache.flink.table.types.logical.RowType) Method(java.lang.reflect.Method) InvocationTargetException(java.lang.reflect.InvocationTargetException) SlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SlidingWindowAssigner) CountTumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountTumblingWindowAssigner) WindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.WindowAssigner) TumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.TumblingWindowAssigner) CountSlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountSlidingWindowAssigner) SessionWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SessionWindowAssigner) RowData(org.apache.flink.table.data.RowData) TumblingGroupWindow(org.apache.flink.table.planner.plan.logical.TumblingGroupWindow) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) SlidingGroupWindow(org.apache.flink.table.planner.plan.logical.SlidingGroupWindow) SessionGroupWindow(org.apache.flink.table.planner.plan.logical.SessionGroupWindow)

Example 4 with DataViewSpec

use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.

the class StreamExecPythonGroupTableAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    int inputCountIndex = aggInfoList.getIndexOfCountStar();
    Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
    PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
    DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonTableAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), generateUpdateBefore, inputCountIndex);
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector)

Example 5 with DataViewSpec

use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.

the class CommonPythonUtil method extractPythonAggregateFunctionInfos.

public static Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> extractPythonAggregateFunctionInfos(AggregateInfoList pythonAggregateInfoList, AggregateCall[] aggCalls) {
    List<PythonAggregateFunctionInfo> pythonAggregateFunctionInfoList = new ArrayList<>();
    List<DataViewSpec[]> dataViewSpecList = new ArrayList<>();
    AggregateInfo[] aggInfos = pythonAggregateInfoList.aggInfos();
    for (int i = 0; i < aggInfos.length; i++) {
        AggregateInfo aggInfo = aggInfos[i];
        UserDefinedFunction function = aggInfo.function();
        if (function instanceof PythonFunction) {
            pythonAggregateFunctionInfoList.add(new PythonAggregateFunctionInfo((PythonFunction) function, Arrays.stream(aggInfo.argIndexes()).boxed().toArray(), aggCalls[i].filterArg, aggCalls[i].isDistinct()));
            TypeInference typeInference = function.getTypeInference(null);
            dataViewSpecList.add(extractDataViewSpecs(i, typeInference.getAccumulatorTypeStrategy().get().inferType(null).get()));
        } else {
            int filterArg = -1;
            boolean distinct = false;
            if (i < aggCalls.length) {
                filterArg = aggCalls[i].filterArg;
                distinct = aggCalls[i].isDistinct();
            }
            pythonAggregateFunctionInfoList.add(new PythonAggregateFunctionInfo(getBuiltInPythonAggregateFunction(function), Arrays.stream(aggInfo.argIndexes()).boxed().toArray(), filterArg, distinct));
            // The data views of the built in Python Aggregate Function are different from Java
            // side, we will create the spec at Python side.
            dataViewSpecList.add(new DataViewSpec[0]);
        }
    }
    return Tuple2.of(pythonAggregateFunctionInfoList.toArray(new PythonAggregateFunctionInfo[0]), dataViewSpecList.toArray(new DataViewSpec[0][0]));
}
Also used : TypeInference(org.apache.flink.table.types.inference.TypeInference) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) UserDefinedFunction(org.apache.flink.table.functions.UserDefinedFunction) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) ArrayList(java.util.ArrayList) PythonFunction(org.apache.flink.table.functions.python.PythonFunction) AggregateInfo(org.apache.flink.table.planner.plan.utils.AggregateInfo)

Aggregations

DataViewSpec (org.apache.flink.table.runtime.dataview.DataViewSpec)6 PythonAggregateFunctionInfo (org.apache.flink.table.functions.python.PythonAggregateFunctionInfo)4 RowType (org.apache.flink.table.types.logical.RowType)4 Configuration (org.apache.flink.configuration.Configuration)3 TableException (org.apache.flink.table.api.TableException)3 RowData (org.apache.flink.table.data.RowData)3 ArrayList (java.util.ArrayList)2 Transformation (org.apache.flink.api.dag.Transformation)2 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)2 MapView (org.apache.flink.table.api.dataview.MapView)2 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)2 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)2 ListViewSpec (org.apache.flink.table.runtime.dataview.ListViewSpec)2 MapViewSpec (org.apache.flink.table.runtime.dataview.MapViewSpec)2 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)2 DataType (org.apache.flink.table.types.DataType)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 StructuredType (org.apache.flink.table.types.logical.StructuredType)2 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1