Search in sources :

Example 16 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class StreamExecPythonGroupWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final boolean isCountWindow;
    if (window instanceof TumblingGroupWindow) {
        isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
    } else if (window instanceof SlidingGroupWindow) {
        isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
    } else {
        isCountWindow = false;
    }
    if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOGGER.warn("No state retention interval configured for a query which accumulates state." + " Please provide a query configuration with valid retention interval to" + " prevent excessive state size. You may specify a retention time of 0 to" + " not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final RowType outputRowType = InternalTypeInfo.of(getOutputType()).toRowType();
    final int inputTimeFieldIndex;
    if (isRowtimeAttribute(window.timeAttribute())) {
        inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
        if (inputTimeFieldIndex < 0) {
            throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
        }
    } else {
        inputTimeFieldIndex = -1;
    }
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
    Tuple2<WindowAssigner<?>, Trigger<?>> windowAssignerAndTrigger = generateWindowAssignerAndTrigger();
    WindowAssigner<?> windowAssigner = windowAssignerAndTrigger.f0;
    Trigger<?> trigger = windowAssignerAndTrigger.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    boolean isGeneralPythonUDAF = Arrays.stream(aggCalls).anyMatch(x -> PythonUtil.isPythonAggregate(x, PythonFunctionKind.GENERAL));
    OneInputTransformation<RowData, RowData> transform;
    WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
    if (isGeneralPythonUDAF) {
        final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
        Arrays.fill(aggCallNeedRetractions, needRetraction);
        final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
        transform = createGeneralPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, aggInfoList, emitStrategy.getAllowLateness(), pythonConfig, shiftTimeZone);
    } else {
        transform = createPandasPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, trigger, emitStrategy.getAllowLateness(), pythonConfig, config, shiftTimeZone);
    }
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) TableException(org.apache.flink.table.api.TableException) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) AggregateUtil.transformToStreamAggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateUtil.transformToStreamAggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) SlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SlidingWindowAssigner) CountTumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountTumblingWindowAssigner) WindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.WindowAssigner) TumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.TumblingWindowAssigner) CountSlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountSlidingWindowAssigner) SessionWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SessionWindowAssigner) TumblingGroupWindow(org.apache.flink.table.planner.plan.logical.TumblingGroupWindow) Trigger(org.apache.flink.table.runtime.operators.window.triggers.Trigger) WindowEmitStrategy(org.apache.flink.table.planner.plan.utils.WindowEmitStrategy) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) SlidingGroupWindow(org.apache.flink.table.planner.plan.logical.SlidingGroupWindow)

Example 17 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class StreamExecLocalWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
    final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    false);
    final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    PagedTypeSerializer<RowData> keySer = (PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer();
    AbstractRowDataSerializer<RowData> valueSer = new RowDataSerializer(inputRowType);
    WindowBuffer.LocalFactory bufferFactory = new RecordsWindowBuffer.LocalFactory(keySer, valueSer, new LocalAggCombiner.Factory(generatedAggsHandler));
    final OneInputStreamOperator<RowData, RowData> localAggOperator = new LocalSlicingWindowAggOperator(selector, sliceAssigner, bufferFactory, shiftTimeZone);
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(LOCAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(localAggOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), // use less memory here to let the chained head operator can have more memory
    WINDOW_AGG_MEMORY_RATIO / 2);
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) LocalAggCombiner(org.apache.flink.table.runtime.operators.aggregate.window.combines.LocalAggCombiner) RowType(org.apache.flink.table.types.logical.RowType) WindowBuffer(org.apache.flink.table.runtime.operators.aggregate.window.buffers.WindowBuffer) RecordsWindowBuffer(org.apache.flink.table.runtime.operators.aggregate.window.buffers.RecordsWindowBuffer) RowData(org.apache.flink.table.data.RowData) PagedTypeSerializer(org.apache.flink.table.runtime.typeutils.PagedTypeSerializer) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) LocalSlicingWindowAggOperator(org.apache.flink.table.runtime.operators.aggregate.window.LocalSlicingWindowAggOperator) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer) AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer)

Example 18 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class StreamExecOverAggregate method createUnboundedOverProcessFunction.

/**
 * Create an ProcessFunction for unbounded OVER window to evaluate final aggregate value.
 *
 * @param ctx code generator context
 * @param aggCalls physical calls to aggregate functions and their output field names
 * @param constants the constants in aggregates parameters, such as sum(1)
 * @param aggInputRowType physical type of the input row which consists of input and constants.
 * @param inputRowType physical type of the input row which only consists of input.
 * @param rowTimeIdx the index of the rowtime field or None in case of processing time.
 * @param isRowsClause it is a tag that indicates whether the OVER clause is ROWS clause
 */
private KeyedProcessFunction<RowData, RowData, RowData> createUnboundedOverProcessFunction(CodeGeneratorContext ctx, List<AggregateCall> aggCalls, List<RexLiteral> constants, RowType aggInputRowType, RowType inputRowType, int rowTimeIdx, boolean isRowsClause, ExecNodeConfig config, RelBuilder relBuilder) {
    AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(// inputSchema.relDataType
    aggInputRowType, JavaScalaConversionUtil.toScala(aggCalls), new boolean[aggCalls.size()], // needRetraction
    false, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    LogicalType[] fieldTypes = inputRowType.getChildren().toArray(new LogicalType[0]);
    AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(ctx, relBuilder, JavaScalaConversionUtil.toScala(Arrays.asList(fieldTypes)), // copyInputField
    false);
    GeneratedAggsHandleFunction genAggsHandler = generator.needAccumulate().withConstants(JavaScalaConversionUtil.toScala(constants)).generateAggsHandler("UnboundedOverAggregateHelper", aggInfoList);
    LogicalType[] flattenAccTypes = Arrays.stream(aggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
    if (rowTimeIdx >= 0) {
        if (isRowsClause) {
            // ROWS unbounded over process function
            return new RowTimeRowsUnboundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, rowTimeIdx);
        } else {
            // RANGE unbounded over process function
            return new RowTimeRangeUnboundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, rowTimeIdx);
        }
    } else {
        return new ProcTimeUnboundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes);
    }
}
Also used : RowTimeRangeUnboundedPrecedingFunction(org.apache.flink.table.runtime.operators.over.RowTimeRangeUnboundedPrecedingFunction) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) LogicalType(org.apache.flink.table.types.logical.LogicalType) AggsHandlerCodeGenerator(org.apache.flink.table.planner.codegen.agg.AggsHandlerCodeGenerator) GeneratedAggsHandleFunction(org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction) RowTimeRowsUnboundedPrecedingFunction(org.apache.flink.table.runtime.operators.over.RowTimeRowsUnboundedPrecedingFunction) ProcTimeUnboundedPrecedingFunction(org.apache.flink.table.runtime.operators.over.ProcTimeUnboundedPrecedingFunction)

Example 19 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class StreamExecOverAggregate method createBoundedOverProcessFunction.

/**
 * Create an ProcessFunction for ROWS clause bounded OVER window to evaluate final aggregate
 * value.
 *
 * @param ctx code generator context
 * @param aggCalls physical calls to aggregate functions and their output field names
 * @param constants the constants in aggregates parameters, such as sum(1)
 * @param aggInputType physical type of the input row which consists of input and constants.
 * @param inputType physical type of the input row which only consists of input.
 * @param rowTimeIdx the index of the rowtime field or None in case of processing time.
 * @param isRowsClause it is a tag that indicates whether the OVER clause is ROWS clause
 */
private KeyedProcessFunction<RowData, RowData, RowData> createBoundedOverProcessFunction(CodeGeneratorContext ctx, List<AggregateCall> aggCalls, List<RexLiteral> constants, RowType aggInputType, RowType inputType, int rowTimeIdx, boolean isRowsClause, long precedingOffset, ExecNodeConfig config, RelBuilder relBuilder) {
    boolean[] aggCallNeedRetractions = new boolean[aggCalls.size()];
    Arrays.fill(aggCallNeedRetractions, true);
    AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(// inputSchema.relDataType
    aggInputType, JavaScalaConversionUtil.toScala(aggCalls), aggCallNeedRetractions, // needInputCount,
    true, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    LogicalType[] fieldTypes = inputType.getChildren().toArray(new LogicalType[0]);
    AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(ctx, relBuilder, JavaScalaConversionUtil.toScala(Arrays.asList(fieldTypes)), // copyInputField
    false);
    GeneratedAggsHandleFunction genAggsHandler = generator.needRetract().needAccumulate().withConstants(JavaScalaConversionUtil.toScala(constants)).generateAggsHandler("BoundedOverAggregateHelper", aggInfoList);
    LogicalType[] flattenAccTypes = Arrays.stream(aggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
    if (rowTimeIdx >= 0) {
        if (isRowsClause) {
            return new RowTimeRowsBoundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset, rowTimeIdx);
        } else {
            return new RowTimeRangeBoundedPrecedingFunction<>(genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset, rowTimeIdx);
        }
    } else {
        if (isRowsClause) {
            return new ProcTimeRowsBoundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset);
        } else {
            return new ProcTimeRangeBoundedPrecedingFunction<>(genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset);
        }
    }
}
Also used : ProcTimeRowsBoundedPrecedingFunction(org.apache.flink.table.runtime.operators.over.ProcTimeRowsBoundedPrecedingFunction) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) RowTimeRangeBoundedPrecedingFunction(org.apache.flink.table.runtime.operators.over.RowTimeRangeBoundedPrecedingFunction) ProcTimeRangeBoundedPrecedingFunction(org.apache.flink.table.runtime.operators.over.ProcTimeRangeBoundedPrecedingFunction) RowTimeRowsBoundedPrecedingFunction(org.apache.flink.table.runtime.operators.over.RowTimeRowsBoundedPrecedingFunction) LogicalType(org.apache.flink.table.types.logical.LogicalType) AggsHandlerCodeGenerator(org.apache.flink.table.planner.codegen.agg.AggsHandlerCodeGenerator) GeneratedAggsHandleFunction(org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction)

Example 20 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class StreamExecPythonGroupTableAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    int inputCountIndex = aggInfoList.getIndexOfCountStar();
    Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
    PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
    DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonTableAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), generateUpdateBefore, inputCountIndex);
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector)

Aggregations

AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)21 Transformation (org.apache.flink.api.dag.Transformation)17 RowData (org.apache.flink.table.data.RowData)17 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)17 RowType (org.apache.flink.table.types.logical.RowType)16 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)12 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)10 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)10 AggsHandlerCodeGenerator (org.apache.flink.table.planner.codegen.agg.AggsHandlerCodeGenerator)8 GeneratedAggsHandleFunction (org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction)8 LogicalType (org.apache.flink.table.types.logical.LogicalType)8 ZoneId (java.time.ZoneId)5 OneInputStreamOperator (org.apache.flink.streaming.api.operators.OneInputStreamOperator)4 TableException (org.apache.flink.table.api.TableException)4 Configuration (org.apache.flink.configuration.Configuration)3 EqualiserCodeGenerator (org.apache.flink.table.planner.codegen.EqualiserCodeGenerator)3 GeneratedRecordEqualiser (org.apache.flink.table.runtime.generated.GeneratedRecordEqualiser)3 ArrayList (java.util.ArrayList)2 AggregateCall (org.apache.calcite.rel.core.AggregateCall)2 FieldReferenceExpression (org.apache.flink.table.expressions.FieldReferenceExpression)2