use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.
the class CommonPythonUtil method extractPythonAggregateFunctionInfosFromAggregateCall.
public static Tuple2<int[], PythonFunctionInfo[]> extractPythonAggregateFunctionInfosFromAggregateCall(AggregateCall[] aggCalls) {
Map<Integer, Integer> inputNodes = new LinkedHashMap<>();
List<PythonFunctionInfo> pythonFunctionInfos = new ArrayList<>();
for (AggregateCall aggregateCall : aggCalls) {
List<Integer> inputs = new ArrayList<>();
List<Integer> argList = aggregateCall.getArgList();
for (Integer arg : argList) {
if (inputNodes.containsKey(arg)) {
inputs.add(inputNodes.get(arg));
} else {
Integer inputOffset = inputNodes.size();
inputs.add(inputOffset);
inputNodes.put(arg, inputOffset);
}
}
PythonFunction pythonFunction = null;
SqlAggFunction aggregateFunction = aggregateCall.getAggregation();
if (aggregateFunction instanceof AggSqlFunction) {
pythonFunction = (PythonFunction) ((AggSqlFunction) aggregateFunction).aggregateFunction();
} else if (aggregateFunction instanceof BridgingSqlAggFunction) {
pythonFunction = (PythonFunction) ((BridgingSqlAggFunction) aggregateFunction).getDefinition();
}
PythonFunctionInfo pythonFunctionInfo = new PythonAggregateFunctionInfo(pythonFunction, inputs.toArray(), aggregateCall.filterArg, aggregateCall.isDistinct());
pythonFunctionInfos.add(pythonFunctionInfo);
}
int[] udafInputOffsets = inputNodes.keySet().stream().mapToInt(i -> i).toArray();
return Tuple2.of(udafInputOffsets, pythonFunctionInfos.toArray(new PythonFunctionInfo[0]));
}
use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.
the class PythonStreamGroupWindowAggregateOperatorTest method getTestOperator.
@Override
OneInputStreamOperator getTestOperator(Configuration config) {
long size = 10000L;
long slide = 5000L;
SlidingWindowAssigner windowAssigner = SlidingWindowAssigner.of(Duration.ofMillis(size), Duration.ofMillis(slide)).withEventTime();
WindowReference windowRef = new WindowReference("w$", new TimestampType(3));
return new PassThroughPythonStreamGroupWindowAggregateOperator(config, getInputType(), getOutputType(), new PythonAggregateFunctionInfo[] { new PythonAggregateFunctionInfo(PythonScalarFunctionOperatorTestBase.DummyPythonFunction.INSTANCE, new Integer[] { 2 }, -1, false) }, getGrouping(), -1, false, false, 3, windowAssigner, FlinkFnApi.GroupWindow.WindowType.SLIDING_GROUP_WINDOW, true, true, size, slide, 0L, 0L, new NamedWindowProperty[] { new NamedWindowProperty("start", new WindowStart(null)), new NamedWindowProperty("end", new WindowEnd(null)) }, UTC_ZONE_ID);
}
use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.
the class StreamExecPythonGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final boolean countStarInserted = aggInfoList.countStarInserted();
Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
final OneInputStreamOperator<RowData, RowData> operator = getPythonAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), inputCountIndex, countStarInserted);
// partitioned aggregation
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.
the class StreamExecPythonGroupWindowAggregate method getGeneralPythonStreamGroupWindowAggregateFunctionOperator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private OneInputStreamOperator<RowData, RowData> getGeneralPythonStreamGroupWindowAggregateFunctionOperator(Configuration config, RowType inputType, RowType outputType, WindowAssigner<?> windowAssigner, PythonAggregateFunctionInfo[] aggregateFunctions, DataViewSpec[][] dataViewSpecs, int inputTimeFieldIndex, int indexOfCountStar, boolean generateUpdateBefore, boolean countStarInserted, long allowance, ZoneId shiftTimeZone) {
Class clazz = CommonPythonUtil.loadClass(GENERAL_STREAM_PYTHON_GROUP_WINDOW_AGGREGATE_FUNCTION_OPERATOR_NAME);
boolean isRowTime = AggregateUtil.isRowtimeAttribute(window.timeAttribute());
try {
if (window instanceof TumblingGroupWindow) {
ValueLiteralExpression size = ((TumblingGroupWindow) window).size();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_TUMBLING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
} else if (window instanceof SlidingGroupWindow) {
ValueLiteralExpression size = ((SlidingGroupWindow) window).size();
ValueLiteralExpression slide = ((SlidingGroupWindow) window).slide();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SLIDING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), AggregateUtil.toDuration(slide).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
} else if (window instanceof SessionGroupWindow) {
ValueLiteralExpression gap = ((SessionGroupWindow) window).gap();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SESSION_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.toDuration(gap).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
}
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
throw new TableException("Python PythonStreamGroupWindowAggregateOperator constructed failed.", e);
}
throw new RuntimeException(String.format("Unsupported LogicWindow Type %s", window));
}
use of org.apache.flink.table.functions.python.PythonAggregateFunctionInfo in project flink by apache.
the class StreamExecPythonGroupTableAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
int inputCountIndex = aggInfoList.getIndexOfCountStar();
Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonTableAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), generateUpdateBefore, inputCountIndex);
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
Aggregations