Search in sources :

Example 16 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecPythonOverAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (overSpec.getGroups().size() > 1) {
        throw new TableException("All aggregates must be computed on the same window.");
    }
    final OverSpec.GroupSpec group = overSpec.getGroups().get(0);
    final int[] orderKeys = group.getSort().getFieldIndices();
    final boolean[] isAscendingOrders = group.getSort().getAscendingOrders();
    if (orderKeys.length != 1 || isAscendingOrders.length != 1) {
        throw new TableException("The window can only be ordered by a single time column.");
    }
    if (!isAscendingOrders[0]) {
        throw new TableException("The window can only be ordered in ASCENDING mode.");
    }
    final int[] partitionKeys = overSpec.getPartition().getFieldIndices();
    if (partitionKeys.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent " + "excessive state size. You may specify a retention time of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final int orderKey = orderKeys[0];
    final LogicalType orderKeyType = inputRowType.getFields().get(orderKey).getType();
    // check time field && identify window rowtime attribute
    final int rowTimeIdx;
    if (isRowtimeAttribute(orderKeyType)) {
        rowTimeIdx = orderKey;
    } else if (isProctimeAttribute(orderKeyType)) {
        rowTimeIdx = -1;
    } else {
        throw new TableException("OVER windows' ordering in stream mode must be defined on a time attribute.");
    }
    if (group.getLowerBound().isPreceding() && group.getLowerBound().isUnbounded()) {
        throw new TableException("Python UDAF is not supported to be used in UNBOUNDED PRECEDING OVER windows.");
    } else if (!group.getUpperBound().isCurrentRow()) {
        throw new TableException("Python UDAF is not supported to be used in UNBOUNDED FOLLOWING OVER windows.");
    }
    Object boundValue = OverAggregateUtil.getBoundary(overSpec, group.getLowerBound());
    if (boundValue instanceof BigDecimal) {
        throw new TableException("the specific value is decimal which haven not supported yet.");
    }
    long precedingOffset = -1 * (long) boundValue;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    OneInputTransformation<RowData, RowData> transform = createPythonOneInputTransformation(inputTransform, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), rowTimeIdx, group.getAggCalls().toArray(new AggregateCall[0]), precedingOffset, group.isRows(), config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), pythonConfig, config);
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(partitionKeys, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : TableException(org.apache.flink.table.api.TableException) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) OverSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.OverSpec) BigDecimal(java.math.BigDecimal) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RowData(org.apache.flink.table.data.RowData) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector)

Example 17 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecTemporalSort method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    // time ordering needs to be ascending
    if (sortSpec.getFieldSize() == 0 || !sortSpec.getFieldSpec(0).getIsAscendingOrder()) {
        throw new TableException("Sort: Primary sort order of a streaming table must be ascending on time.\n" + "please re-check sort statement according to the description above");
    }
    ExecEdge inputEdge = getInputEdges().get(0);
    Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    RowType inputType = (RowType) inputEdge.getOutputType();
    LogicalType timeType = inputType.getTypeAt(sortSpec.getFieldSpec(0).getFieldIndex());
    if (isRowtimeAttribute(timeType)) {
        return createSortRowTime(inputType, inputTransform, config);
    } else if (isProctimeAttribute(timeType)) {
        return createSortProcTime(inputType, inputTransform, config);
    } else {
        throw new TableException(String.format("Sort: Internal Error\n" + "First field in temporal sort is not a time attribute, %s is given.", timeType));
    }
}
Also used : TableException(org.apache.flink.table.api.TableException) RowData(org.apache.flink.table.data.RowData) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType)

Example 18 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
    // Hopping window requires additional COUNT(*) to determine whether to register next timer
    // through whether the current fired window is empty, see SliceSharedWindowAggProcessor.
    final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    true);
    final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    final LogicalType[] accTypes = convertToLogicalTypes(aggInfoList.getAccTypes());
    final OneInputStreamOperator<RowData, RowData> windowOperator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputRowType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).assigner(sliceAssigner).countStarIndex(aggInfoList.getIndexOfCountStar()).aggregate(generatedAggsHandler, new RowDataSerializer(accTypes)).build();
    final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(windowOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_AGG_MEMORY_RATIO);
    // set KeyType and Selector for state
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowData(org.apache.flink.table.data.RowData) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 19 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecWatermarkAssigner method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final GeneratedWatermarkGenerator watermarkGenerator = WatermarkGeneratorCodeGenerator.generateWatermarkGenerator(config.getTableConfig(), (RowType) inputEdge.getOutputType(), watermarkExpr, JavaScalaConversionUtil.toScala(Optional.empty()));
    final long idleTimeout = config.get(ExecutionConfigOptions.TABLE_EXEC_SOURCE_IDLE_TIMEOUT).toMillis();
    final WatermarkAssignerOperatorFactory operatorFactory = new WatermarkAssignerOperatorFactory(rowtimeFieldIndex, idleTimeout, watermarkGenerator);
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(WATERMARK_ASSIGNER_TRANSFORMATION, config), operatorFactory, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
}
Also used : GeneratedWatermarkGenerator(org.apache.flink.table.runtime.generated.GeneratedWatermarkGenerator) RowData(org.apache.flink.table.data.RowData) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) WatermarkAssignerOperatorFactory(org.apache.flink.table.runtime.operators.wmassigners.WatermarkAssignerOperatorFactory)

Example 20 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecGlobalWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
    final AggregateInfoList localAggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(// should use original input here
    localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    false);
    final AggregateInfoList globalAggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(// should use original input here
    localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    true);
    // handler used to merge multiple local accumulators into one accumulator,
    // where the accumulators are all on memory
    final GeneratedNamespaceAggsHandleFunction<Long> localAggsHandler = createAggsHandler("LocalWindowAggsHandler", sliceAssigner, localAggInfoList, grouping.length, true, localAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
    // handler used to merge the single local accumulator (on memory) into state accumulator
    final GeneratedNamespaceAggsHandleFunction<Long> globalAggsHandler = createAggsHandler("GlobalWindowAggsHandler", sliceAssigner, globalAggInfoList, 0, true, localAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
    // handler used to merge state accumulators for merging slices into window,
    // e.g. Hop and Cumulate
    final GeneratedNamespaceAggsHandleFunction<Long> stateAggsHandler = createAggsHandler("StateWindowAggsHandler", sliceAssigner, globalAggInfoList, 0, false, globalAggInfoList.getAccTypes(), config, planner.getRelBuilder(), shiftTimeZone);
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    final LogicalType[] accTypes = convertToLogicalTypes(globalAggInfoList.getAccTypes());
    final OneInputStreamOperator<RowData, RowData> windowOperator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputRowType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).assigner(sliceAssigner).countStarIndex(globalAggInfoList.getIndexOfCountStar()).globalAggregate(localAggsHandler, globalAggsHandler, stateAggsHandler, new RowDataSerializer(accTypes)).build();
    final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GLOBAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(windowOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_AGG_MEMORY_RATIO);
    // set KeyType and Selector for state
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowData(org.apache.flink.table.data.RowData) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Aggregations

Transformation (org.apache.flink.api.dag.Transformation)98 RowData (org.apache.flink.table.data.RowData)69 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)53 RowType (org.apache.flink.table.types.logical.RowType)50 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)45 TableException (org.apache.flink.table.api.TableException)28 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)28 ArrayList (java.util.ArrayList)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)21 Configuration (org.apache.flink.configuration.Configuration)19 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)18 List (java.util.List)17 PartitionTransformation (org.apache.flink.streaming.api.transformations.PartitionTransformation)17 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)17 LogicalType (org.apache.flink.table.types.logical.LogicalType)16 Test (org.junit.Test)16 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)13 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)13 Arrays (java.util.Arrays)11 Collections (java.util.Collections)10