use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.
the class StreamExecPythonGroupWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final boolean isCountWindow;
if (window instanceof TumblingGroupWindow) {
isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
} else if (window instanceof SlidingGroupWindow) {
isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
} else {
isCountWindow = false;
}
if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOGGER.warn("No state retention interval configured for a query which accumulates state." + " Please provide a query configuration with valid retention interval to" + " prevent excessive state size. You may specify a retention time of 0 to" + " not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final RowType outputRowType = InternalTypeInfo.of(getOutputType()).toRowType();
final int inputTimeFieldIndex;
if (isRowtimeAttribute(window.timeAttribute())) {
inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
if (inputTimeFieldIndex < 0) {
throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
}
} else {
inputTimeFieldIndex = -1;
}
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
Tuple2<WindowAssigner<?>, Trigger<?>> windowAssignerAndTrigger = generateWindowAssignerAndTrigger();
WindowAssigner<?> windowAssigner = windowAssignerAndTrigger.f0;
Trigger<?> trigger = windowAssignerAndTrigger.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
boolean isGeneralPythonUDAF = Arrays.stream(aggCalls).anyMatch(x -> PythonUtil.isPythonAggregate(x, PythonFunctionKind.GENERAL));
OneInputTransformation<RowData, RowData> transform;
WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
if (isGeneralPythonUDAF) {
final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
Arrays.fill(aggCallNeedRetractions, needRetraction);
final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
transform = createGeneralPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, aggInfoList, emitStrategy.getAllowLateness(), pythonConfig, shiftTimeZone);
} else {
transform = createPandasPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, trigger, emitStrategy.getAllowLateness(), pythonConfig, config, shiftTimeZone);
}
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.
the class StreamExecLocalWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
false);
final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
PagedTypeSerializer<RowData> keySer = (PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer();
AbstractRowDataSerializer<RowData> valueSer = new RowDataSerializer(inputRowType);
WindowBuffer.LocalFactory bufferFactory = new RecordsWindowBuffer.LocalFactory(keySer, valueSer, new LocalAggCombiner.Factory(generatedAggsHandler));
final OneInputStreamOperator<RowData, RowData> localAggOperator = new LocalSlicingWindowAggOperator(selector, sliceAssigner, bufferFactory, shiftTimeZone);
return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(LOCAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(localAggOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), // use less memory here to let the chained head operator can have more memory
WINDOW_AGG_MEMORY_RATIO / 2);
}
use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.
the class StreamExecOverAggregate method createUnboundedOverProcessFunction.
/**
* Create an ProcessFunction for unbounded OVER window to evaluate final aggregate value.
*
* @param ctx code generator context
* @param aggCalls physical calls to aggregate functions and their output field names
* @param constants the constants in aggregates parameters, such as sum(1)
* @param aggInputRowType physical type of the input row which consists of input and constants.
* @param inputRowType physical type of the input row which only consists of input.
* @param rowTimeIdx the index of the rowtime field or None in case of processing time.
* @param isRowsClause it is a tag that indicates whether the OVER clause is ROWS clause
*/
private KeyedProcessFunction<RowData, RowData, RowData> createUnboundedOverProcessFunction(CodeGeneratorContext ctx, List<AggregateCall> aggCalls, List<RexLiteral> constants, RowType aggInputRowType, RowType inputRowType, int rowTimeIdx, boolean isRowsClause, ExecNodeConfig config, RelBuilder relBuilder) {
AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(// inputSchema.relDataType
aggInputRowType, JavaScalaConversionUtil.toScala(aggCalls), new boolean[aggCalls.size()], // needRetraction
false, // isStateBackendDataViews
true, // needDistinctInfo
true);
LogicalType[] fieldTypes = inputRowType.getChildren().toArray(new LogicalType[0]);
AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(ctx, relBuilder, JavaScalaConversionUtil.toScala(Arrays.asList(fieldTypes)), // copyInputField
false);
GeneratedAggsHandleFunction genAggsHandler = generator.needAccumulate().withConstants(JavaScalaConversionUtil.toScala(constants)).generateAggsHandler("UnboundedOverAggregateHelper", aggInfoList);
LogicalType[] flattenAccTypes = Arrays.stream(aggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
if (rowTimeIdx >= 0) {
if (isRowsClause) {
// ROWS unbounded over process function
return new RowTimeRowsUnboundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, rowTimeIdx);
} else {
// RANGE unbounded over process function
return new RowTimeRangeUnboundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, rowTimeIdx);
}
} else {
return new ProcTimeUnboundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes);
}
}
use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.
the class StreamExecOverAggregate method createBoundedOverProcessFunction.
/**
* Create an ProcessFunction for ROWS clause bounded OVER window to evaluate final aggregate
* value.
*
* @param ctx code generator context
* @param aggCalls physical calls to aggregate functions and their output field names
* @param constants the constants in aggregates parameters, such as sum(1)
* @param aggInputType physical type of the input row which consists of input and constants.
* @param inputType physical type of the input row which only consists of input.
* @param rowTimeIdx the index of the rowtime field or None in case of processing time.
* @param isRowsClause it is a tag that indicates whether the OVER clause is ROWS clause
*/
private KeyedProcessFunction<RowData, RowData, RowData> createBoundedOverProcessFunction(CodeGeneratorContext ctx, List<AggregateCall> aggCalls, List<RexLiteral> constants, RowType aggInputType, RowType inputType, int rowTimeIdx, boolean isRowsClause, long precedingOffset, ExecNodeConfig config, RelBuilder relBuilder) {
boolean[] aggCallNeedRetractions = new boolean[aggCalls.size()];
Arrays.fill(aggCallNeedRetractions, true);
AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(// inputSchema.relDataType
aggInputType, JavaScalaConversionUtil.toScala(aggCalls), aggCallNeedRetractions, // needInputCount,
true, // isStateBackendDataViews
true, // needDistinctInfo
true);
LogicalType[] fieldTypes = inputType.getChildren().toArray(new LogicalType[0]);
AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(ctx, relBuilder, JavaScalaConversionUtil.toScala(Arrays.asList(fieldTypes)), // copyInputField
false);
GeneratedAggsHandleFunction genAggsHandler = generator.needRetract().needAccumulate().withConstants(JavaScalaConversionUtil.toScala(constants)).generateAggsHandler("BoundedOverAggregateHelper", aggInfoList);
LogicalType[] flattenAccTypes = Arrays.stream(aggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
if (rowTimeIdx >= 0) {
if (isRowsClause) {
return new RowTimeRowsBoundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset, rowTimeIdx);
} else {
return new RowTimeRangeBoundedPrecedingFunction<>(genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset, rowTimeIdx);
}
} else {
if (isRowsClause) {
return new ProcTimeRowsBoundedPrecedingFunction<>(config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset);
} else {
return new ProcTimeRangeBoundedPrecedingFunction<>(genAggsHandler, flattenAccTypes, fieldTypes, precedingOffset);
}
}
}
use of org.apache.flink.table.planner.plan.utils.AggregateInfoList in project flink by apache.
the class StreamExecPythonGroupTableAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
int inputCountIndex = aggInfoList.getIndexOfCountStar();
Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonTableAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), generateUpdateBefore, inputCountIndex);
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
Aggregations