use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class AbstractPythonScalarFunctionOperator method getUserDefinedFunctionsProto.
/**
* Gets the proto representation of the Python user-defined functions to be executed.
*/
@Override
public FlinkFnApi.UserDefinedFunctions getUserDefinedFunctionsProto() {
FlinkFnApi.UserDefinedFunctions.Builder builder = FlinkFnApi.UserDefinedFunctions.newBuilder();
// add udf proto
for (PythonFunctionInfo pythonFunctionInfo : scalarFunctions) {
builder.addUdfs(ProtoUtils.getUserDefinedFunctionProto(pythonFunctionInfo));
}
builder.setMetricEnabled(pythonConfig.isMetricEnabled());
builder.setProfileEnabled(pythonConfig.isProfileEnabled());
return builder.build();
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class AbstractBatchArrowPythonAggregateFunctionOperatorTest method getTestHarness.
public OneInputStreamOperatorTestHarness<RowData, RowData> getTestHarness(Configuration config) throws Exception {
RowType inputType = getInputType();
RowType outputType = getOutputType();
AbstractArrowPythonAggregateFunctionOperator operator = getTestOperator(config, new PythonFunctionInfo[] { new PythonFunctionInfo(PythonScalarFunctionOperatorTestBase.DummyPythonFunction.INSTANCE, new Integer[] { 0 }) }, inputType, outputType, new int[] { 0 }, new int[] { 2 });
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.PYTHON, 0.5);
testHarness.setup(new RowDataSerializer(outputType));
return testHarness;
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class AbstractStreamArrowPythonAggregateFunctionOperatorTest method getTestHarness.
public OneInputStreamOperatorTestHarness<RowData, RowData> getTestHarness(Configuration config) throws Exception {
RowType inputType = getInputType();
RowType outputType = getOutputType();
AbstractArrowPythonAggregateFunctionOperator operator = getTestOperator(config, new PythonFunctionInfo[] { new PythonFunctionInfo(PythonScalarFunctionOperatorTestBase.DummyPythonFunction.INSTANCE, new Integer[] { 0 }) }, inputType, outputType, new int[] { 0 }, new int[] { 2 });
int[] grouping = new int[] { 0 };
RowDataKeySelector keySelector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(getInputType()));
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, keySelector, keySelector.getProducedType());
testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.PYTHON, 0.5);
testHarness.setup(new RowDataSerializer(outputType));
return testHarness;
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class CommonPythonUtil method extractPythonAggregateFunctionInfosFromAggregateCall.
public static Tuple2<int[], PythonFunctionInfo[]> extractPythonAggregateFunctionInfosFromAggregateCall(AggregateCall[] aggCalls) {
Map<Integer, Integer> inputNodes = new LinkedHashMap<>();
List<PythonFunctionInfo> pythonFunctionInfos = new ArrayList<>();
for (AggregateCall aggregateCall : aggCalls) {
List<Integer> inputs = new ArrayList<>();
List<Integer> argList = aggregateCall.getArgList();
for (Integer arg : argList) {
if (inputNodes.containsKey(arg)) {
inputs.add(inputNodes.get(arg));
} else {
Integer inputOffset = inputNodes.size();
inputs.add(inputOffset);
inputNodes.put(arg, inputOffset);
}
}
PythonFunction pythonFunction = null;
SqlAggFunction aggregateFunction = aggregateCall.getAggregation();
if (aggregateFunction instanceof AggSqlFunction) {
pythonFunction = (PythonFunction) ((AggSqlFunction) aggregateFunction).aggregateFunction();
} else if (aggregateFunction instanceof BridgingSqlAggFunction) {
pythonFunction = (PythonFunction) ((BridgingSqlAggFunction) aggregateFunction).getDefinition();
}
PythonFunctionInfo pythonFunctionInfo = new PythonAggregateFunctionInfo(pythonFunction, inputs.toArray(), aggregateCall.filterArg, aggregateCall.isDistinct());
pythonFunctionInfos.add(pythonFunctionInfo);
}
int[] udafInputOffsets = inputNodes.keySet().stream().mapToInt(i -> i).toArray();
return Tuple2.of(udafInputOffsets, pythonFunctionInfos.toArray(new PythonFunctionInfo[0]));
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class StreamExecPythonOverAggregate method getPythonOverWindowAggregateFunctionOperator.
@SuppressWarnings("unchecked")
private OneInputStreamOperator<RowData, RowData> getPythonOverWindowAggregateFunctionOperator(ExecNodeConfig config, Configuration pythonConfig, RowType inputRowType, RowType outputRowType, int rowTiemIdx, long lowerBoundary, boolean isRowsClause, int[] udafInputOffsets, PythonFunctionInfo[] pythonFunctionInfos, long minIdleStateRetentionTime, long maxIdleStateRetentionTime) {
RowType userDefinedFunctionInputType = (RowType) Projection.of(udafInputOffsets).project(inputRowType);
RowType userDefinedFunctionOutputType = (RowType) Projection.range(inputRowType.getFieldCount(), outputRowType.getFieldCount()).project(outputRowType);
GeneratedProjection generatedProjection = ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "UdafInputProjection", inputRowType, userDefinedFunctionInputType, udafInputOffsets);
if (isRowsClause) {
String className;
if (rowTiemIdx != -1) {
className = ARROW_PYTHON_OVER_WINDOW_ROWS_ROW_TIME_AGGREGATE_FUNCTION_OPERATOR_NAME;
} else {
className = ARROW_PYTHON_OVER_WINDOW_ROWS_PROC_TIME_AGGREGATE_FUNCTION_OPERATOR_NAME;
}
Class<?> clazz = CommonPythonUtil.loadClass(className);
try {
Constructor<?> ctor = clazz.getConstructor(Configuration.class, long.class, long.class, PythonFunctionInfo[].class, RowType.class, RowType.class, RowType.class, int.class, long.class, GeneratedProjection.class);
return (OneInputStreamOperator<RowData, RowData>) ctor.newInstance(pythonConfig, minIdleStateRetentionTime, maxIdleStateRetentionTime, pythonFunctionInfos, inputRowType, userDefinedFunctionInputType, userDefinedFunctionOutputType, rowTiemIdx, lowerBoundary, generatedProjection);
} catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
throw new TableException("Python Arrow Over Rows Window Function Operator constructed failed.", e);
}
} else {
String className;
if (rowTiemIdx != -1) {
className = ARROW_PYTHON_OVER_WINDOW_RANGE_ROW_TIME_AGGREGATE_FUNCTION_OPERATOR_NAME;
} else {
className = ARROW_PYTHON_OVER_WINDOW_RANGE_PROC_TIME_AGGREGATE_FUNCTION_OPERATOR_NAME;
}
Class<?> clazz = CommonPythonUtil.loadClass(className);
try {
Constructor<?> ctor = clazz.getConstructor(Configuration.class, PythonFunctionInfo[].class, RowType.class, RowType.class, RowType.class, int.class, long.class, GeneratedProjection.class);
return (OneInputStreamOperator<RowData, RowData>) ctor.newInstance(pythonConfig, pythonFunctionInfos, inputRowType, userDefinedFunctionInputType, userDefinedFunctionOutputType, rowTiemIdx, lowerBoundary, generatedProjection);
} catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
throw new TableException("Python Arrow Over Range Window Function Operator constructed failed.", e);
}
}
}
Aggregations