Search in sources :

Example 1 with WindowEmitStrategy

use of org.apache.flink.table.planner.plan.utils.WindowEmitStrategy in project flink by apache.

the class StreamPhysicalPythonGroupWindowAggregateRule method convert.

@Override
public RelNode convert(RelNode rel) {
    FlinkLogicalWindowAggregate agg = (FlinkLogicalWindowAggregate) rel;
    LogicalWindow window = agg.getWindow();
    List<AggregateCall> aggCalls = agg.getAggCallList();
    boolean isPandasPythonUDAF = aggCalls.stream().anyMatch(x -> PythonUtil.isPythonAggregate(x, PythonFunctionKind.PANDAS));
    if (isPandasPythonUDAF && window instanceof SessionGroupWindow) {
        throw new TableException("Session Group Window is currently not supported for Pandas UDAF.");
    }
    RelNode input = agg.getInput();
    RelOptCluster cluster = rel.getCluster();
    FlinkRelDistribution requiredDistribution;
    if (agg.getGroupCount() != 0) {
        requiredDistribution = FlinkRelDistribution.hash(agg.getGroupSet().asList(), true);
    } else {
        requiredDistribution = FlinkRelDistribution.SINGLETON();
    }
    RelTraitSet requiredTraitSet = input.getTraitSet().replace(FlinkConventions.STREAM_PHYSICAL()).replace(requiredDistribution);
    RelTraitSet providedTraitSet = rel.getTraitSet().replace(FlinkConventions.STREAM_PHYSICAL());
    RelNode newInput = RelOptRule.convert(input, requiredTraitSet);
    ReadableConfig config = ShortcutUtils.unwrapTableConfig(rel);
    WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, agg.getWindow());
    if (emitStrategy.produceUpdates()) {
        throw new TableException("Python Group Window Aggregate Function is currently not supported for early fired or lately fired.");
    }
    return new StreamPhysicalPythonGroupWindowAggregate(cluster, providedTraitSet, newInput, rel.getRowType(), agg.getGroupSet().toArray(), JavaScalaConversionUtil.toScala(aggCalls), agg.getWindow(), agg.getNamedProperties(), emitStrategy);
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) StreamPhysicalPythonGroupWindowAggregate(org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalPythonGroupWindowAggregate) TableException(org.apache.flink.table.api.TableException) RelTraitSet(org.apache.calcite.plan.RelTraitSet) AggregateCall(org.apache.calcite.rel.core.AggregateCall) FlinkRelDistribution(org.apache.flink.table.planner.plan.trait.FlinkRelDistribution) ReadableConfig(org.apache.flink.configuration.ReadableConfig) LogicalWindow(org.apache.flink.table.planner.plan.logical.LogicalWindow) RelNode(org.apache.calcite.rel.RelNode) WindowEmitStrategy(org.apache.flink.table.planner.plan.utils.WindowEmitStrategy) FlinkLogicalWindowAggregate(org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalWindowAggregate) SessionGroupWindow(org.apache.flink.table.planner.plan.logical.SessionGroupWindow)

Example 2 with WindowEmitStrategy

use of org.apache.flink.table.planner.plan.utils.WindowEmitStrategy in project flink by apache.

the class StreamExecGroupWindowAggregate method createWindowOperator.

private WindowOperator<?, ?> createWindowOperator(ReadableConfig config, GeneratedClass<?> aggsHandler, GeneratedRecordEqualiser recordEqualiser, LogicalType[] accTypes, LogicalType[] windowPropertyTypes, LogicalType[] aggValueTypes, LogicalType[] inputFields, int timeFieldIndex, ZoneId shiftTimeZone, int inputCountIndex) {
    WindowOperatorBuilder builder = WindowOperatorBuilder.builder().withInputFields(inputFields).withShiftTimezone(shiftTimeZone).withInputCountIndex(inputCountIndex);
    if (window instanceof TumblingGroupWindow) {
        TumblingGroupWindow tumblingWindow = (TumblingGroupWindow) window;
        FieldReferenceExpression timeField = tumblingWindow.timeField();
        ValueLiteralExpression size = tumblingWindow.size();
        if (isProctimeAttribute(timeField) && hasTimeIntervalType(size)) {
            builder = builder.tumble(toDuration(size)).withProcessingTime();
        } else if (isRowtimeAttribute(timeField) && hasTimeIntervalType(size)) {
            builder = builder.tumble(toDuration(size)).withEventTime(timeFieldIndex);
        } else if (isProctimeAttribute(timeField) && hasRowIntervalType(size)) {
            builder = builder.countWindow(toLong(size));
        } else {
            // ProcessingTimeTumblingGroupWindow
            throw new UnsupportedOperationException("Event-time grouping windows on row intervals are currently not supported.");
        }
    } else if (window instanceof SlidingGroupWindow) {
        SlidingGroupWindow slidingWindow = (SlidingGroupWindow) window;
        FieldReferenceExpression timeField = slidingWindow.timeField();
        ValueLiteralExpression size = slidingWindow.size();
        ValueLiteralExpression slide = slidingWindow.slide();
        if (isProctimeAttribute(timeField) && hasTimeIntervalType(size)) {
            builder = builder.sliding(toDuration(size), toDuration(slide)).withProcessingTime();
        } else if (isRowtimeAttribute(timeField) && hasTimeIntervalType(size)) {
            builder = builder.sliding(toDuration(size), toDuration(slide)).withEventTime(timeFieldIndex);
        } else if (isProctimeAttribute(timeField) && hasRowIntervalType(size)) {
            builder = builder.countWindow(toLong(size), toLong(slide));
        } else {
            // ProcessingTimeTumblingGroupWindow
            throw new UnsupportedOperationException("Event-time grouping windows on row intervals are currently not supported.");
        }
    } else if (window instanceof SessionGroupWindow) {
        SessionGroupWindow sessionWindow = (SessionGroupWindow) window;
        FieldReferenceExpression timeField = sessionWindow.timeField();
        ValueLiteralExpression gap = sessionWindow.gap();
        if (isProctimeAttribute(timeField)) {
            builder = builder.session(toDuration(gap)).withProcessingTime();
        } else if (isRowtimeAttribute(timeField)) {
            builder = builder.session(toDuration(gap)).withEventTime(timeFieldIndex);
        } else {
            throw new UnsupportedOperationException("This should not happen.");
        }
    } else {
        throw new TableException("Unsupported window: " + window.toString());
    }
    WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
    if (emitStrategy.produceUpdates()) {
        // mark this operator will send retraction and set new trigger
        builder.produceUpdates().triggering(emitStrategy.getTrigger()).withAllowedLateness(Duration.ofMillis(emitStrategy.getAllowLateness()));
    }
    if (aggsHandler instanceof GeneratedNamespaceAggsHandleFunction) {
        return builder.aggregate((GeneratedNamespaceAggsHandleFunction<?>) aggsHandler, recordEqualiser, accTypes, aggValueTypes, windowPropertyTypes).build();
    } else if (aggsHandler instanceof GeneratedNamespaceTableAggsHandleFunction) {
        return builder.aggregate((GeneratedNamespaceTableAggsHandleFunction<?>) aggsHandler, accTypes, aggValueTypes, windowPropertyTypes).build();
    } else {
        throw new TableException("Unsupported agg handler class: " + aggsHandler.getClass().getSimpleName());
    }
}
Also used : TableException(org.apache.flink.table.api.TableException) TumblingGroupWindow(org.apache.flink.table.planner.plan.logical.TumblingGroupWindow) ValueLiteralExpression(org.apache.flink.table.expressions.ValueLiteralExpression) WindowEmitStrategy(org.apache.flink.table.planner.plan.utils.WindowEmitStrategy) GeneratedNamespaceAggsHandleFunction(org.apache.flink.table.runtime.generated.GeneratedNamespaceAggsHandleFunction) FieldReferenceExpression(org.apache.flink.table.expressions.FieldReferenceExpression) WindowOperatorBuilder(org.apache.flink.table.runtime.operators.window.WindowOperatorBuilder) GeneratedNamespaceTableAggsHandleFunction(org.apache.flink.table.runtime.generated.GeneratedNamespaceTableAggsHandleFunction) SlidingGroupWindow(org.apache.flink.table.planner.plan.logical.SlidingGroupWindow) SessionGroupWindow(org.apache.flink.table.planner.plan.logical.SessionGroupWindow)

Example 3 with WindowEmitStrategy

use of org.apache.flink.table.planner.plan.utils.WindowEmitStrategy in project flink by apache.

the class StreamExecPythonGroupWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final boolean isCountWindow;
    if (window instanceof TumblingGroupWindow) {
        isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
    } else if (window instanceof SlidingGroupWindow) {
        isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
    } else {
        isCountWindow = false;
    }
    if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOGGER.warn("No state retention interval configured for a query which accumulates state." + " Please provide a query configuration with valid retention interval to" + " prevent excessive state size. You may specify a retention time of 0 to" + " not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final RowType outputRowType = InternalTypeInfo.of(getOutputType()).toRowType();
    final int inputTimeFieldIndex;
    if (isRowtimeAttribute(window.timeAttribute())) {
        inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
        if (inputTimeFieldIndex < 0) {
            throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
        }
    } else {
        inputTimeFieldIndex = -1;
    }
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
    Tuple2<WindowAssigner<?>, Trigger<?>> windowAssignerAndTrigger = generateWindowAssignerAndTrigger();
    WindowAssigner<?> windowAssigner = windowAssignerAndTrigger.f0;
    Trigger<?> trigger = windowAssignerAndTrigger.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    boolean isGeneralPythonUDAF = Arrays.stream(aggCalls).anyMatch(x -> PythonUtil.isPythonAggregate(x, PythonFunctionKind.GENERAL));
    OneInputTransformation<RowData, RowData> transform;
    WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
    if (isGeneralPythonUDAF) {
        final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
        Arrays.fill(aggCallNeedRetractions, needRetraction);
        final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
        transform = createGeneralPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, aggInfoList, emitStrategy.getAllowLateness(), pythonConfig, shiftTimeZone);
    } else {
        transform = createPandasPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, trigger, emitStrategy.getAllowLateness(), pythonConfig, config, shiftTimeZone);
    }
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) TableException(org.apache.flink.table.api.TableException) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) AggregateUtil.transformToStreamAggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateUtil.transformToStreamAggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) SlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SlidingWindowAssigner) CountTumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountTumblingWindowAssigner) WindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.WindowAssigner) TumblingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.TumblingWindowAssigner) CountSlidingWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.CountSlidingWindowAssigner) SessionWindowAssigner(org.apache.flink.table.runtime.operators.window.assigners.SessionWindowAssigner) TumblingGroupWindow(org.apache.flink.table.planner.plan.logical.TumblingGroupWindow) Trigger(org.apache.flink.table.runtime.operators.window.triggers.Trigger) WindowEmitStrategy(org.apache.flink.table.planner.plan.utils.WindowEmitStrategy) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) SlidingGroupWindow(org.apache.flink.table.planner.plan.logical.SlidingGroupWindow)

Aggregations

TableException (org.apache.flink.table.api.TableException)3 WindowEmitStrategy (org.apache.flink.table.planner.plan.utils.WindowEmitStrategy)3 SessionGroupWindow (org.apache.flink.table.planner.plan.logical.SessionGroupWindow)2 SlidingGroupWindow (org.apache.flink.table.planner.plan.logical.SlidingGroupWindow)2 TumblingGroupWindow (org.apache.flink.table.planner.plan.logical.TumblingGroupWindow)2 ZoneId (java.time.ZoneId)1 RelOptCluster (org.apache.calcite.plan.RelOptCluster)1 RelTraitSet (org.apache.calcite.plan.RelTraitSet)1 RelNode (org.apache.calcite.rel.RelNode)1 AggregateCall (org.apache.calcite.rel.core.AggregateCall)1 Transformation (org.apache.flink.api.dag.Transformation)1 Configuration (org.apache.flink.configuration.Configuration)1 ReadableConfig (org.apache.flink.configuration.ReadableConfig)1 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)1 RowData (org.apache.flink.table.data.RowData)1 FieldReferenceExpression (org.apache.flink.table.expressions.FieldReferenceExpression)1 ValueLiteralExpression (org.apache.flink.table.expressions.ValueLiteralExpression)1 LogicalWindow (org.apache.flink.table.planner.plan.logical.LogicalWindow)1 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)1 FlinkLogicalWindowAggregate (org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalWindowAggregate)1