Search in sources :

Example 11 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class BatchExecSortAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final RowType outputRowType = (RowType) getOutputType();
    final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig());
    final AggregateInfoList aggInfos = AggregateUtil.transformToBatchAggregateInfoList(aggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), null, null);
    final GeneratedOperator<OneInputStreamOperator<RowData, RowData>> generatedOperator;
    if (grouping.length == 0) {
        generatedOperator = AggWithoutKeysCodeGenerator.genWithoutKeys(ctx, planner.getRelBuilder(), aggInfos, inputRowType, outputRowType, isMerge, isFinal, "NoGrouping");
    } else {
        generatedOperator = SortAggCodeGenerator.genWithKeys(ctx, planner.getRelBuilder(), aggInfos, inputRowType, outputRowType, grouping, auxGrouping, isMerge, isFinal);
    }
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), new CodeGenOperatorFactory<>(generatedOperator), InternalTypeInfo.of(outputRowType), inputTransform.getParallelism());
}
Also used : RowData(org.apache.flink.table.data.RowData) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) RowType(org.apache.flink.table.types.logical.RowType)

Example 12 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class BatchExecSortWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final AggregateInfoList aggInfos = AggregateUtil.transformToBatchAggregateInfoList(aggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), // aggCallNeedRetractions
    null, // orderKeyIndexes
    null);
    final int groupBufferLimitSize = config.get(ExecutionConfigOptions.TABLE_EXEC_WINDOW_AGG_BUFFER_SIZE_LIMIT);
    final Tuple2<Long, Long> windowSizeAndSlideSize = WindowCodeGenerator.getWindowDef(window);
    final SortWindowCodeGenerator windowCodeGenerator = new SortWindowCodeGenerator(new CodeGeneratorContext(config.getTableConfig()), planner.getRelBuilder(), window, inputTimeFieldIndex, inputTimeIsDate, JavaScalaConversionUtil.toScala(Arrays.asList(namedWindowProperties)), aggInfos, (RowType) inputEdge.getOutputType(), (RowType) getOutputType(), groupBufferLimitSize, // windowStart
    0L, windowSizeAndSlideSize.f0, windowSizeAndSlideSize.f1, grouping, auxGrouping, enableAssignPane, isMerge, isFinal);
    final GeneratedOperator<OneInputStreamOperator<RowData, RowData>> generatedOperator;
    if (grouping.length == 0) {
        generatedOperator = windowCodeGenerator.genWithoutKeys();
    } else {
        generatedOperator = windowCodeGenerator.genWithKeys();
    }
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), new CodeGenOperatorFactory<>(generatedOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
}
Also used : Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) RowData(org.apache.flink.table.data.RowData) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) SortWindowCodeGenerator(org.apache.flink.table.planner.codegen.agg.batch.SortWindowCodeGenerator)

Example 13 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class BatchExecOverAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputType = (RowType) inputEdge.getOutputType();
    // The generated sort is used for generating the comparator among partitions.
    // So here not care the ASC or DESC for the grouping fields.
    // TODO just replace comparator to equaliser
    final int[] partitionFields = overSpec.getPartition().getFieldIndices();
    final GeneratedRecordComparator genComparator = ComparatorCodeGenerator.gen(config.getTableConfig(), "SortComparator", inputType, SortUtil.getAscendingSortSpec(partitionFields));
    // use aggInputType which considers constants as input instead of inputType
    final RowType inputTypeWithConstants = getInputTypeWithConstants();
    // Over operator could support different order-by keys with collation satisfied.
    // Currently, this operator requires all order keys (combined with partition keys) are
    // the same, but order-by keys may be different. Consider the following sql:
    // select *, sum(b) over partition by a order by a, count(c) over partition by a from T
    // So we can use any one from the groups. To keep the behavior with the rule, we use the
    // last one.
    final SortSpec sortSpec = overSpec.getGroups().get(overSpec.getGroups().size() - 1).getSort();
    final TableStreamOperator<RowData> operator;
    final long managedMemory;
    if (!needBufferData()) {
        // operator needn't cache data
        final int numOfGroup = overSpec.getGroups().size();
        final GeneratedAggsHandleFunction[] aggsHandlers = new GeneratedAggsHandleFunction[numOfGroup];
        final boolean[] resetAccumulators = new boolean[numOfGroup];
        for (int i = 0; i < numOfGroup; ++i) {
            GroupSpec group = overSpec.getGroups().get(i);
            AggregateInfoList aggInfoList = AggregateUtil.transformToBatchAggregateInfoList(inputTypeWithConstants, JavaScalaConversionUtil.toScala(group.getAggCalls()), // aggCallNeedRetractions
            null, sortSpec.getFieldIndices());
            AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(new CodeGeneratorContext(config.getTableConfig()), planner.getRelBuilder(), JavaScalaConversionUtil.toScala(inputType.getChildren()), // copyInputField
            false);
            // over agg code gen must pass the constants
            aggsHandlers[i] = generator.needAccumulate().withConstants(JavaScalaConversionUtil.toScala(getConstants())).generateAggsHandler("BoundedOverAggregateHelper", aggInfoList);
            OverWindowMode mode = inferGroupMode(group);
            resetAccumulators[i] = mode == OverWindowMode.ROW && group.getLowerBound().isCurrentRow() && group.getUpperBound().isCurrentRow();
        }
        operator = new NonBufferOverWindowOperator(aggsHandlers, genComparator, resetAccumulators);
        managedMemory = 0L;
    } else {
        List<OverWindowFrame> windowFrames = createOverWindowFrames(planner.getRelBuilder(), config, inputType, sortSpec, inputTypeWithConstants);
        operator = new BufferDataOverWindowOperator(windowFrames.toArray(new OverWindowFrame[0]), genComparator, inputType.getChildren().stream().allMatch(BinaryRowData::isInFixedLengthPart));
        managedMemory = config.get(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_EXTERNAL_BUFFER_MEMORY).getBytes();
    }
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), SimpleOperatorFactory.of(operator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), managedMemory);
}
Also used : Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) RowType(org.apache.flink.table.types.logical.RowType) AggsHandlerCodeGenerator(org.apache.flink.table.planner.codegen.agg.AggsHandlerCodeGenerator) GeneratedAggsHandleFunction(org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction) BufferDataOverWindowOperator(org.apache.flink.table.runtime.operators.over.BufferDataOverWindowOperator) NonBufferOverWindowOperator(org.apache.flink.table.runtime.operators.over.NonBufferOverWindowOperator) UnboundedOverWindowFrame(org.apache.flink.table.runtime.operators.over.frame.UnboundedOverWindowFrame) OverWindowFrame(org.apache.flink.table.runtime.operators.over.frame.OverWindowFrame) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) GroupSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.OverSpec.GroupSpec) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec)

Example 14 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class AggregatePushDownSpec method buildAggregateExpressions.

private static List<AggregateExpression> buildAggregateExpressions(RowType inputType, List<AggregateCall> aggregateCalls) {
    AggregateInfoList aggInfoList = AggregateUtil.transformToBatchAggregateInfoList(inputType, JavaScalaConversionUtil.toScala(aggregateCalls), null, null);
    if (aggInfoList.aggInfos().length == 0) {
        // no agg function need to be pushed down
        return Collections.emptyList();
    }
    List<AggregateExpression> aggExpressions = new ArrayList<>();
    for (AggregateInfo aggInfo : aggInfoList.aggInfos()) {
        List<FieldReferenceExpression> arguments = new ArrayList<>(1);
        for (int argIndex : aggInfo.argIndexes()) {
            DataType argType = TypeConversions.fromLogicalToDataType(inputType.getFields().get(argIndex).getType());
            FieldReferenceExpression field = new FieldReferenceExpression(inputType.getFieldNames().get(argIndex), argType, 0, argIndex);
            arguments.add(field);
        }
        if (aggInfo.function() instanceof AvgAggFunction) {
            Tuple2<Sum0AggFunction, CountAggFunction> sum0AndCountFunction = AggregateUtil.deriveSumAndCountFromAvg((AvgAggFunction) aggInfo.function());
            AggregateExpression sum0Expression = new AggregateExpression(sum0AndCountFunction._1(), arguments, null, aggInfo.externalResultType(), aggInfo.agg().isDistinct(), aggInfo.agg().isApproximate(), aggInfo.agg().ignoreNulls());
            aggExpressions.add(sum0Expression);
            AggregateExpression countExpression = new AggregateExpression(sum0AndCountFunction._2(), arguments, null, aggInfo.externalResultType(), aggInfo.agg().isDistinct(), aggInfo.agg().isApproximate(), aggInfo.agg().ignoreNulls());
            aggExpressions.add(countExpression);
        } else {
            AggregateExpression aggregateExpression = new AggregateExpression(aggInfo.function(), arguments, null, aggInfo.externalResultType(), aggInfo.agg().isDistinct(), aggInfo.agg().isApproximate(), aggInfo.agg().ignoreNulls());
            aggExpressions.add(aggregateExpression);
        }
    }
    return aggExpressions;
}
Also used : AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) CountAggFunction(org.apache.flink.table.planner.functions.aggfunctions.CountAggFunction) ArrayList(java.util.ArrayList) FieldReferenceExpression(org.apache.flink.table.expressions.FieldReferenceExpression) AggregateExpression(org.apache.flink.table.expressions.AggregateExpression) AvgAggFunction(org.apache.flink.table.planner.functions.aggfunctions.AvgAggFunction) DataType(org.apache.flink.table.types.DataType) AggregateInfo(org.apache.flink.table.planner.plan.utils.AggregateInfo) Sum0AggFunction(org.apache.flink.table.planner.functions.aggfunctions.Sum0AggFunction)

Example 15 with AggregateInfoList

use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.

the class StreamExecPythonGroupAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    final int inputCountIndex = aggInfoList.getIndexOfCountStar();
    final boolean countStarInserted = aggInfoList.countStarInserted();
    Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
    PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
    DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    final OneInputStreamOperator<RowData, RowData> operator = getPythonAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), inputCountIndex, countStarInserted);
    // partitioned aggregation
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector)

Aggregations

AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)21 Transformation (org.apache.flink.api.dag.Transformation)17 RowData (org.apache.flink.table.data.RowData)17 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)17 RowType (org.apache.flink.table.types.logical.RowType)16 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)12 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)10 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)10 AggsHandlerCodeGenerator (org.apache.flink.table.planner.codegen.agg.AggsHandlerCodeGenerator)8 GeneratedAggsHandleFunction (org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction)8 LogicalType (org.apache.flink.table.types.logical.LogicalType)8 ZoneId (java.time.ZoneId)5 OneInputStreamOperator (org.apache.flink.streaming.api.operators.OneInputStreamOperator)4 TableException (org.apache.flink.table.api.TableException)4 Configuration (org.apache.flink.configuration.Configuration)3 EqualiserCodeGenerator (org.apache.flink.table.planner.codegen.EqualiserCodeGenerator)3 GeneratedRecordEqualiser (org.apache.flink.table.runtime.generated.GeneratedRecordEqualiser)3 ArrayList (java.util.ArrayList)2 AggregateCall (org.apache.calcite.rel.core.AggregateCall)2 FieldReferenceExpression (org.apache.flink.table.expressions.FieldReferenceExpression)2