use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class BatchExecPythonOverAggregate method getPythonOverWindowAggregateFunctionOperator.
@SuppressWarnings("unchecked")
private OneInputStreamOperator<RowData, RowData> getPythonOverWindowAggregateFunctionOperator(ExecNodeConfig config, Configuration pythonConfig, RowType inputRowType, RowType outputRowType, boolean[] isRangeWindows, int[] udafInputOffsets, PythonFunctionInfo[] pythonFunctionInfos) {
Class<?> clazz = CommonPythonUtil.loadClass(ARROW_PYTHON_OVER_WINDOW_AGGREGATE_FUNCTION_OPERATOR_NAME);
RowType udfInputType = (RowType) Projection.of(udafInputOffsets).project(inputRowType);
RowType udfOutputType = (RowType) Projection.range(inputRowType.getFieldCount(), outputRowType.getFieldCount()).project(outputRowType);
PartitionSpec partitionSpec = overSpec.getPartition();
List<OverSpec.GroupSpec> groups = overSpec.getGroups();
SortSpec sortSpec = groups.get(groups.size() - 1).getSort();
try {
Constructor<?> ctor = clazz.getConstructor(Configuration.class, PythonFunctionInfo[].class, RowType.class, RowType.class, RowType.class, long[].class, long[].class, boolean[].class, int[].class, int.class, boolean.class, GeneratedProjection.class, GeneratedProjection.class, GeneratedProjection.class);
return (OneInputStreamOperator<RowData, RowData>) ctor.newInstance(pythonConfig, pythonFunctionInfos, inputRowType, udfInputType, udfOutputType, lowerBoundary.stream().mapToLong(i -> i).toArray(), upperBoundary.stream().mapToLong(i -> i).toArray(), isRangeWindows, aggWindowIndex.stream().mapToInt(i -> i).toArray(), sortSpec.getFieldIndices()[0], sortSpec.getAscendingOrders()[0], ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "UdafInputProjection", inputRowType, udfInputType, udafInputOffsets), ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "GroupKey", inputRowType, (RowType) Projection.of(partitionSpec.getFieldIndices()).project(inputRowType), partitionSpec.getFieldIndices()), ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "GroupSet", inputRowType, (RowType) Projection.of(partitionSpec.getFieldIndices()).project(inputRowType), partitionSpec.getFieldIndices()));
} catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
throw new TableException("Python BatchArrowPythonOverWindowAggregateFunctionOperator constructed failed.", e);
}
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class CommonExecPythonCorrelate method extractPythonTableFunctionInfo.
private Tuple2<int[], PythonFunctionInfo> extractPythonTableFunctionInfo() {
LinkedHashMap<RexNode, Integer> inputNodes = new LinkedHashMap<>();
PythonFunctionInfo pythonTableFunctionInfo = CommonPythonUtil.createPythonFunctionInfo(invocation, inputNodes);
int[] udtfInputOffsets = inputNodes.keySet().stream().filter(x -> x instanceof RexInputRef).map(x -> ((RexInputRef) x).getIndex()).mapToInt(i -> i).toArray();
return Tuple2.of(udtfInputOffsets, pythonTableFunctionInfo);
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class BatchExecPythonGroupWindowAggregate method createPythonOneInputTransformation.
private OneInputTransformation<RowData, RowData> createPythonOneInputTransformation(Transformation<RowData> inputTransform, RowType inputRowType, RowType outputRowType, int maxLimitSize, long windowSize, long slideSize, Configuration pythonConfig, ExecNodeConfig config) {
int[] namePropertyTypeArray = Arrays.stream(namedWindowProperties).mapToInt(p -> {
WindowProperty property = p.getProperty();
if (property instanceof WindowStart) {
return 0;
}
if (property instanceof WindowEnd) {
return 1;
}
if (property instanceof RowtimeAttribute) {
return 2;
}
throw new TableException("Unexpected property " + property);
}).toArray();
Tuple2<int[], PythonFunctionInfo[]> aggInfos = CommonPythonUtil.extractPythonAggregateFunctionInfosFromAggregateCall(aggCalls);
int[] pythonUdafInputOffsets = aggInfos.f0;
PythonFunctionInfo[] pythonFunctionInfos = aggInfos.f1;
OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonGroupWindowAggregateFunctionOperator(config, pythonConfig, inputRowType, outputRowType, maxLimitSize, windowSize, slideSize, namePropertyTypeArray, pythonUdafInputOffsets, pythonFunctionInfos);
return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(outputRowType), inputTransform.getParallelism());
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class CommonExecPythonCalc method getPythonScalarFunctionOperator.
@SuppressWarnings("unchecked")
private OneInputStreamOperator<RowData, RowData> getPythonScalarFunctionOperator(ExecNodeConfig config, Configuration pythonConfig, InternalTypeInfo<RowData> inputRowTypeInfo, InternalTypeInfo<RowData> outputRowTypeInfo, int[] udfInputOffsets, PythonFunctionInfo[] pythonFunctionInfos, int[] forwardedFields, boolean isArrow) {
Class<?> clazz;
boolean isInProcessMode = CommonPythonUtil.isPythonWorkerInProcessMode(pythonConfig);
if (isArrow) {
clazz = CommonPythonUtil.loadClass(ARROW_PYTHON_SCALAR_FUNCTION_OPERATOR_NAME);
} else {
if (isInProcessMode) {
clazz = CommonPythonUtil.loadClass(PYTHON_SCALAR_FUNCTION_OPERATOR_NAME);
} else {
clazz = CommonPythonUtil.loadClass(EMBEDDED_PYTHON_SCALAR_FUNCTION_OPERATOR_NAME);
}
}
final RowType inputType = inputRowTypeInfo.toRowType();
final RowType outputType = outputRowTypeInfo.toRowType();
final RowType udfInputType = (RowType) Projection.of(udfInputOffsets).project(inputType);
final RowType forwardedFieldType = (RowType) Projection.of(forwardedFields).project(inputType);
final RowType udfOutputType = (RowType) Projection.range(forwardedFields.length, outputType.getFieldCount()).project(outputType);
try {
if (isInProcessMode) {
Constructor<?> ctor = clazz.getConstructor(Configuration.class, PythonFunctionInfo[].class, RowType.class, RowType.class, RowType.class, GeneratedProjection.class, GeneratedProjection.class);
return (OneInputStreamOperator<RowData, RowData>) ctor.newInstance(pythonConfig, pythonFunctionInfos, inputType, udfInputType, udfOutputType, ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "UdfInputProjection", inputType, udfInputType, udfInputOffsets), ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "ForwardedFieldProjection", inputType, forwardedFieldType, forwardedFields));
} else {
if (forwardedFields.length > 0) {
Constructor<?> ctor = clazz.getConstructor(Configuration.class, PythonFunctionInfo[].class, RowType.class, RowType.class, RowType.class, int[].class, GeneratedProjection.class);
return (OneInputStreamOperator<RowData, RowData>) ctor.newInstance(pythonConfig, pythonFunctionInfos, inputType, udfInputType, udfOutputType, udfInputOffsets, ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "ForwardedFieldProjection", inputType, forwardedFieldType, forwardedFields));
} else {
Constructor<?> ctor = clazz.getConstructor(Configuration.class, PythonFunctionInfo[].class, RowType.class, RowType.class, RowType.class, int[].class);
return (OneInputStreamOperator<RowData, RowData>) ctor.newInstance(pythonConfig, pythonFunctionInfos, inputType, udfInputType, udfOutputType, udfInputOffsets);
}
}
} catch (Exception e) {
throw new TableException("Python Scalar Function Operator constructed failed.", e);
}
}
use of org.apache.flink.table.functions.python.PythonFunctionInfo in project flink by apache.
the class EmbeddedPythonScalarFunctionOperator method getUserDefinedFunctionsProto.
@Override
public FlinkFnApi.UserDefinedFunctions getUserDefinedFunctionsProto() {
FlinkFnApi.UserDefinedFunctions.Builder builder = FlinkFnApi.UserDefinedFunctions.newBuilder();
// add udf proto
for (PythonFunctionInfo pythonFunctionInfo : scalarFunctions) {
builder.addUdfs(ProtoUtils.getUserDefinedFunctionProto(pythonFunctionInfo));
}
builder.setMetricEnabled(pythonConfig.isMetricEnabled());
builder.setProfileEnabled(pythonConfig.isProfileEnabled());
return builder.build();
}
Aggregations