use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.
the class DataViewUtils method extractDataViews.
/**
* Searches for data views in the data type of an accumulator and extracts them.
*/
public static List<DataViewSpec> extractDataViews(int aggIndex, DataType accumulatorDataType) {
final LogicalType accumulatorType = accumulatorDataType.getLogicalType();
if (!accumulatorType.is(ROW) && !accumulatorType.is(STRUCTURED_TYPE)) {
return Collections.emptyList();
}
final List<String> fieldNames = getFieldNames(accumulatorType);
final List<DataType> fieldDataTypes = accumulatorDataType.getChildren();
final List<DataViewSpec> specs = new ArrayList<>();
for (int fieldIndex = 0; fieldIndex < fieldDataTypes.size(); fieldIndex++) {
final DataType fieldDataType = fieldDataTypes.get(fieldIndex);
final LogicalType fieldType = fieldDataType.getLogicalType();
if (isDataView(fieldType, ListView.class)) {
specs.add(new ListViewSpec(createStateId(aggIndex, fieldNames.get(fieldIndex)), fieldIndex, fieldDataType.getChildren().get(0)));
} else if (isDataView(fieldType, MapView.class)) {
specs.add(new MapViewSpec(createStateId(aggIndex, fieldNames.get(fieldIndex)), fieldIndex, fieldDataType.getChildren().get(0), false));
}
if (fieldType.getChildren().stream().anyMatch(c -> hasNested(c, t -> isDataView(t, DataView.class)))) {
throw new TableException("Data views are only supported in the first level of a composite accumulator type.");
}
}
return specs;
}
use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.
the class StreamExecPythonGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final boolean countStarInserted = aggInfoList.countStarInserted();
Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
final OneInputStreamOperator<RowData, RowData> operator = getPythonAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), inputCountIndex, countStarInserted);
// partitioned aggregation
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.
the class StreamExecPythonGroupWindowAggregate method getGeneralPythonStreamGroupWindowAggregateFunctionOperator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private OneInputStreamOperator<RowData, RowData> getGeneralPythonStreamGroupWindowAggregateFunctionOperator(Configuration config, RowType inputType, RowType outputType, WindowAssigner<?> windowAssigner, PythonAggregateFunctionInfo[] aggregateFunctions, DataViewSpec[][] dataViewSpecs, int inputTimeFieldIndex, int indexOfCountStar, boolean generateUpdateBefore, boolean countStarInserted, long allowance, ZoneId shiftTimeZone) {
Class clazz = CommonPythonUtil.loadClass(GENERAL_STREAM_PYTHON_GROUP_WINDOW_AGGREGATE_FUNCTION_OPERATOR_NAME);
boolean isRowTime = AggregateUtil.isRowtimeAttribute(window.timeAttribute());
try {
if (window instanceof TumblingGroupWindow) {
ValueLiteralExpression size = ((TumblingGroupWindow) window).size();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_TUMBLING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
} else if (window instanceof SlidingGroupWindow) {
ValueLiteralExpression size = ((SlidingGroupWindow) window).size();
ValueLiteralExpression slide = ((SlidingGroupWindow) window).slide();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SLIDING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), AggregateUtil.toDuration(slide).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
} else if (window instanceof SessionGroupWindow) {
ValueLiteralExpression gap = ((SessionGroupWindow) window).gap();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SESSION_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.toDuration(gap).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
}
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
throw new TableException("Python PythonStreamGroupWindowAggregateOperator constructed failed.", e);
}
throw new RuntimeException(String.format("Unsupported LogicWindow Type %s", window));
}
use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.
the class StreamExecPythonGroupTableAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
int inputCountIndex = aggInfoList.getIndexOfCountStar();
Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonTableAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), generateUpdateBefore, inputCountIndex);
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.dataview.DataViewSpec in project flink by apache.
the class CommonPythonUtil method extractPythonAggregateFunctionInfos.
public static Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> extractPythonAggregateFunctionInfos(AggregateInfoList pythonAggregateInfoList, AggregateCall[] aggCalls) {
List<PythonAggregateFunctionInfo> pythonAggregateFunctionInfoList = new ArrayList<>();
List<DataViewSpec[]> dataViewSpecList = new ArrayList<>();
AggregateInfo[] aggInfos = pythonAggregateInfoList.aggInfos();
for (int i = 0; i < aggInfos.length; i++) {
AggregateInfo aggInfo = aggInfos[i];
UserDefinedFunction function = aggInfo.function();
if (function instanceof PythonFunction) {
pythonAggregateFunctionInfoList.add(new PythonAggregateFunctionInfo((PythonFunction) function, Arrays.stream(aggInfo.argIndexes()).boxed().toArray(), aggCalls[i].filterArg, aggCalls[i].isDistinct()));
TypeInference typeInference = function.getTypeInference(null);
dataViewSpecList.add(extractDataViewSpecs(i, typeInference.getAccumulatorTypeStrategy().get().inferType(null).get()));
} else {
int filterArg = -1;
boolean distinct = false;
if (i < aggCalls.length) {
filterArg = aggCalls[i].filterArg;
distinct = aggCalls[i].isDistinct();
}
pythonAggregateFunctionInfoList.add(new PythonAggregateFunctionInfo(getBuiltInPythonAggregateFunction(function), Arrays.stream(aggInfo.argIndexes()).boxed().toArray(), filterArg, distinct));
// The data views of the built in Python Aggregate Function are different from Java
// side, we will create the spec at Python side.
dataViewSpecList.add(new DataViewSpec[0]);
}
}
return Tuple2.of(pythonAggregateFunctionInfoList.toArray(new PythonAggregateFunctionInfo[0]), dataViewSpecList.toArray(new DataViewSpec[0][0]));
}
Aggregations