use of org.apache.flink.table.planner.plan.logical.TumblingGroupWindow in project flink by apache.
the class StreamExecGroupWindowAggregate method createWindowOperator.
private WindowOperator<?, ?> createWindowOperator(ReadableConfig config, GeneratedClass<?> aggsHandler, GeneratedRecordEqualiser recordEqualiser, LogicalType[] accTypes, LogicalType[] windowPropertyTypes, LogicalType[] aggValueTypes, LogicalType[] inputFields, int timeFieldIndex, ZoneId shiftTimeZone, int inputCountIndex) {
WindowOperatorBuilder builder = WindowOperatorBuilder.builder().withInputFields(inputFields).withShiftTimezone(shiftTimeZone).withInputCountIndex(inputCountIndex);
if (window instanceof TumblingGroupWindow) {
TumblingGroupWindow tumblingWindow = (TumblingGroupWindow) window;
FieldReferenceExpression timeField = tumblingWindow.timeField();
ValueLiteralExpression size = tumblingWindow.size();
if (isProctimeAttribute(timeField) && hasTimeIntervalType(size)) {
builder = builder.tumble(toDuration(size)).withProcessingTime();
} else if (isRowtimeAttribute(timeField) && hasTimeIntervalType(size)) {
builder = builder.tumble(toDuration(size)).withEventTime(timeFieldIndex);
} else if (isProctimeAttribute(timeField) && hasRowIntervalType(size)) {
builder = builder.countWindow(toLong(size));
} else {
// ProcessingTimeTumblingGroupWindow
throw new UnsupportedOperationException("Event-time grouping windows on row intervals are currently not supported.");
}
} else if (window instanceof SlidingGroupWindow) {
SlidingGroupWindow slidingWindow = (SlidingGroupWindow) window;
FieldReferenceExpression timeField = slidingWindow.timeField();
ValueLiteralExpression size = slidingWindow.size();
ValueLiteralExpression slide = slidingWindow.slide();
if (isProctimeAttribute(timeField) && hasTimeIntervalType(size)) {
builder = builder.sliding(toDuration(size), toDuration(slide)).withProcessingTime();
} else if (isRowtimeAttribute(timeField) && hasTimeIntervalType(size)) {
builder = builder.sliding(toDuration(size), toDuration(slide)).withEventTime(timeFieldIndex);
} else if (isProctimeAttribute(timeField) && hasRowIntervalType(size)) {
builder = builder.countWindow(toLong(size), toLong(slide));
} else {
// ProcessingTimeTumblingGroupWindow
throw new UnsupportedOperationException("Event-time grouping windows on row intervals are currently not supported.");
}
} else if (window instanceof SessionGroupWindow) {
SessionGroupWindow sessionWindow = (SessionGroupWindow) window;
FieldReferenceExpression timeField = sessionWindow.timeField();
ValueLiteralExpression gap = sessionWindow.gap();
if (isProctimeAttribute(timeField)) {
builder = builder.session(toDuration(gap)).withProcessingTime();
} else if (isRowtimeAttribute(timeField)) {
builder = builder.session(toDuration(gap)).withEventTime(timeFieldIndex);
} else {
throw new UnsupportedOperationException("This should not happen.");
}
} else {
throw new TableException("Unsupported window: " + window.toString());
}
WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
if (emitStrategy.produceUpdates()) {
// mark this operator will send retraction and set new trigger
builder.produceUpdates().triggering(emitStrategy.getTrigger()).withAllowedLateness(Duration.ofMillis(emitStrategy.getAllowLateness()));
}
if (aggsHandler instanceof GeneratedNamespaceAggsHandleFunction) {
return builder.aggregate((GeneratedNamespaceAggsHandleFunction<?>) aggsHandler, recordEqualiser, accTypes, aggValueTypes, windowPropertyTypes).build();
} else if (aggsHandler instanceof GeneratedNamespaceTableAggsHandleFunction) {
return builder.aggregate((GeneratedNamespaceTableAggsHandleFunction<?>) aggsHandler, accTypes, aggValueTypes, windowPropertyTypes).build();
} else {
throw new TableException("Unsupported agg handler class: " + aggsHandler.getClass().getSimpleName());
}
}
use of org.apache.flink.table.planner.plan.logical.TumblingGroupWindow in project flink by apache.
the class BatchPhysicalPythonWindowAggregateRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
FlinkLogicalWindowAggregate agg = call.rel(0);
RelNode input = agg.getInput();
LogicalWindow window = agg.getWindow();
if (!(window instanceof TumblingGroupWindow && AggregateUtil.hasTimeIntervalType(((TumblingGroupWindow) window).size()) || window instanceof SlidingGroupWindow && AggregateUtil.hasTimeIntervalType(((SlidingGroupWindow) window).size()) || window instanceof SessionGroupWindow)) {
// sliding & tumbling count window and session window not supported
throw new TableException("Window " + window + " is not supported right now.");
}
int[] groupSet = agg.getGroupSet().toArray();
RelTraitSet traitSet = agg.getTraitSet().replace(FlinkConventions.BATCH_PHYSICAL());
Tuple2<int[], Seq<AggregateCall>> auxGroupSetAndCallsTuple = AggregateUtil.checkAndSplitAggCalls(agg);
int[] auxGroupSet = auxGroupSetAndCallsTuple._1;
Seq<AggregateCall> aggCallsWithoutAuxGroupCalls = auxGroupSetAndCallsTuple._2;
Tuple3<int[][], DataType[][], UserDefinedFunction[]> aggBufferTypesAndFunctions = AggregateUtil.transformToBatchAggregateFunctions(FlinkTypeFactory.toLogicalRowType(input.getRowType()), aggCallsWithoutAuxGroupCalls, null);
UserDefinedFunction[] aggFunctions = aggBufferTypesAndFunctions._3();
int inputTimeFieldIndex = AggregateUtil.timeFieldIndex(input.getRowType(), call.builder(), window.timeAttribute());
RelDataType inputTimeFieldType = input.getRowType().getFieldList().get(inputTimeFieldIndex).getType();
boolean inputTimeIsDate = inputTimeFieldType.getSqlTypeName() == SqlTypeName.DATE;
RelTraitSet requiredTraitSet = agg.getTraitSet().replace(FlinkConventions.BATCH_PHYSICAL());
if (groupSet.length != 0) {
FlinkRelDistribution requiredDistribution = FlinkRelDistribution.hash(groupSet, false);
requiredTraitSet = requiredTraitSet.replace(requiredDistribution);
} else {
requiredTraitSet = requiredTraitSet.replace(FlinkRelDistribution.SINGLETON());
}
RelCollation sortCollation = createRelCollation(groupSet, inputTimeFieldIndex);
requiredTraitSet = requiredTraitSet.replace(sortCollation);
RelNode newInput = RelOptRule.convert(input, requiredTraitSet);
BatchPhysicalPythonGroupWindowAggregate windowAgg = new BatchPhysicalPythonGroupWindowAggregate(agg.getCluster(), traitSet, newInput, agg.getRowType(), newInput.getRowType(), groupSet, auxGroupSet, aggCallsWithoutAuxGroupCalls, aggFunctions, window, inputTimeFieldIndex, inputTimeIsDate, agg.getNamedProperties());
call.transformTo(windowAgg);
}
use of org.apache.flink.table.planner.plan.logical.TumblingGroupWindow in project flink by apache.
the class StreamExecPythonGroupWindowAggregate method getGeneralPythonStreamGroupWindowAggregateFunctionOperator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private OneInputStreamOperator<RowData, RowData> getGeneralPythonStreamGroupWindowAggregateFunctionOperator(Configuration config, RowType inputType, RowType outputType, WindowAssigner<?> windowAssigner, PythonAggregateFunctionInfo[] aggregateFunctions, DataViewSpec[][] dataViewSpecs, int inputTimeFieldIndex, int indexOfCountStar, boolean generateUpdateBefore, boolean countStarInserted, long allowance, ZoneId shiftTimeZone) {
Class clazz = CommonPythonUtil.loadClass(GENERAL_STREAM_PYTHON_GROUP_WINDOW_AGGREGATE_FUNCTION_OPERATOR_NAME);
boolean isRowTime = AggregateUtil.isRowtimeAttribute(window.timeAttribute());
try {
if (window instanceof TumblingGroupWindow) {
ValueLiteralExpression size = ((TumblingGroupWindow) window).size();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_TUMBLING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
} else if (window instanceof SlidingGroupWindow) {
ValueLiteralExpression size = ((SlidingGroupWindow) window).size();
ValueLiteralExpression slide = ((SlidingGroupWindow) window).slide();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SLIDING_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, boolean.class, long.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.hasTimeIntervalType(size), AggregateUtil.toDuration(size).toMillis(), AggregateUtil.toDuration(slide).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
} else if (window instanceof SessionGroupWindow) {
ValueLiteralExpression gap = ((SessionGroupWindow) window).gap();
Method create = clazz.getMethod(GENERAL_STREAM_PYTHON_CREATE_SESSION_GROUP_WINDOW_METHOD, Configuration.class, RowType.class, RowType.class, PythonAggregateFunctionInfo[].class, DataViewSpec[][].class, int[].class, int.class, boolean.class, boolean.class, int.class, WindowAssigner.class, boolean.class, long.class, long.class, NamedWindowProperty[].class, ZoneId.class);
return (OneInputStreamOperator<RowData, RowData>) create.invoke(null, config, inputType, outputType, aggregateFunctions, dataViewSpecs, grouping, indexOfCountStar, generateUpdateBefore, countStarInserted, inputTimeFieldIndex, windowAssigner, isRowTime, AggregateUtil.toDuration(gap).toMillis(), allowance, namedWindowProperties, shiftTimeZone);
}
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
throw new TableException("Python PythonStreamGroupWindowAggregateOperator constructed failed.", e);
}
throw new RuntimeException(String.format("Unsupported LogicWindow Type %s", window));
}
use of org.apache.flink.table.planner.plan.logical.TumblingGroupWindow in project flink by apache.
the class StreamExecPythonGroupWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final boolean isCountWindow;
if (window instanceof TumblingGroupWindow) {
isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
} else if (window instanceof SlidingGroupWindow) {
isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
} else {
isCountWindow = false;
}
if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOGGER.warn("No state retention interval configured for a query which accumulates state." + " Please provide a query configuration with valid retention interval to" + " prevent excessive state size. You may specify a retention time of 0 to" + " not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final RowType outputRowType = InternalTypeInfo.of(getOutputType()).toRowType();
final int inputTimeFieldIndex;
if (isRowtimeAttribute(window.timeAttribute())) {
inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
if (inputTimeFieldIndex < 0) {
throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
}
} else {
inputTimeFieldIndex = -1;
}
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
Tuple2<WindowAssigner<?>, Trigger<?>> windowAssignerAndTrigger = generateWindowAssignerAndTrigger();
WindowAssigner<?> windowAssigner = windowAssignerAndTrigger.f0;
Trigger<?> trigger = windowAssignerAndTrigger.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
boolean isGeneralPythonUDAF = Arrays.stream(aggCalls).anyMatch(x -> PythonUtil.isPythonAggregate(x, PythonFunctionKind.GENERAL));
OneInputTransformation<RowData, RowData> transform;
WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
if (isGeneralPythonUDAF) {
final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
Arrays.fill(aggCallNeedRetractions, needRetraction);
final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
transform = createGeneralPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, aggInfoList, emitStrategy.getAllowLateness(), pythonConfig, shiftTimeZone);
} else {
transform = createPandasPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, trigger, emitStrategy.getAllowLateness(), pythonConfig, config, shiftTimeZone);
}
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
Aggregations