Search in sources :

Example 86 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecRank method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    switch(rankType) {
        case ROW_NUMBER:
            break;
        case RANK:
            throw new TableException("RANK() on streaming table is not supported currently");
        case DENSE_RANK:
            throw new TableException("DENSE_RANK() on streaming table is not supported currently");
        default:
            throw new TableException(String.format("Streaming tables do not support %s rank function.", rankType));
    }
    ExecEdge inputEdge = getInputEdges().get(0);
    Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    RowType inputType = (RowType) inputEdge.getOutputType();
    InternalTypeInfo<RowData> inputRowTypeInfo = InternalTypeInfo.of(inputType);
    int[] sortFields = sortSpec.getFieldIndices();
    RowDataKeySelector sortKeySelector = KeySelectorUtil.getRowDataSelector(sortFields, inputRowTypeInfo);
    // create a sort spec on sort keys.
    int[] sortKeyPositions = IntStream.range(0, sortFields.length).toArray();
    SortSpec.SortSpecBuilder builder = SortSpec.builder();
    IntStream.range(0, sortFields.length).forEach(idx -> builder.addField(idx, sortSpec.getFieldSpec(idx).getIsAscendingOrder(), sortSpec.getFieldSpec(idx).getNullIsLast()));
    SortSpec sortSpecInSortKey = builder.build();
    GeneratedRecordComparator sortKeyComparator = ComparatorCodeGenerator.gen(config.getTableConfig(), "StreamExecSortComparator", RowType.of(sortSpec.getFieldTypes(inputType)), sortSpecInSortKey);
    long cacheSize = config.get(TABLE_EXEC_RANK_TOPN_CACHE_SIZE);
    StateTtlConfig ttlConfig = StateConfigUtil.createTtlConfig(config.getStateRetentionTime());
    AbstractTopNFunction processFunction;
    if (rankStrategy instanceof RankProcessStrategy.AppendFastStrategy) {
        if (sortFields.length == 1 && TypeCheckUtils.isProcTime(inputType.getChildren().get(sortFields[0])) && sortSpec.getFieldSpec(0).getIsAscendingOrder()) {
            processFunction = new AppendOnlyFirstNFunction(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber);
        } else if (RankUtil.isTop1(rankRange)) {
            processFunction = new FastTop1Function(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        } else {
            processFunction = new AppendOnlyTopNFunction(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        }
    } else if (rankStrategy instanceof RankProcessStrategy.UpdateFastStrategy) {
        if (RankUtil.isTop1(rankRange)) {
            processFunction = new FastTop1Function(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        } else {
            RankProcessStrategy.UpdateFastStrategy updateFastStrategy = (RankProcessStrategy.UpdateFastStrategy) rankStrategy;
            int[] primaryKeys = updateFastStrategy.getPrimaryKeys();
            RowDataKeySelector rowKeySelector = KeySelectorUtil.getRowDataSelector(primaryKeys, inputRowTypeInfo);
            processFunction = new UpdatableTopNFunction(ttlConfig, inputRowTypeInfo, rowKeySelector, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        }
    // TODO Use UnaryUpdateTopNFunction after SortedMapState is merged
    } else if (rankStrategy instanceof RankProcessStrategy.RetractStrategy) {
        EqualiserCodeGenerator equaliserCodeGen = new EqualiserCodeGenerator(inputType.getFields().stream().map(RowType.RowField::getType).toArray(LogicalType[]::new));
        GeneratedRecordEqualiser generatedEqualiser = equaliserCodeGen.generateRecordEqualiser("RankValueEqualiser");
        ComparableRecordComparator comparator = new ComparableRecordComparator(sortKeyComparator, sortKeyPositions, sortSpec.getFieldTypes(inputType), sortSpec.getAscendingOrders(), sortSpec.getNullsIsLast());
        processFunction = new RetractableTopNFunction(ttlConfig, inputRowTypeInfo, comparator, sortKeySelector, rankType, rankRange, generatedEqualiser, generateUpdateBefore, outputRankNumber);
    } else {
        throw new TableException(String.format("rank strategy:%s is not supported.", rankStrategy));
    }
    KeyedProcessOperator<RowData, RowData, RowData> operator = new KeyedProcessOperator<>(processFunction);
    processFunction.setKeyContext(operator);
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(RANK_TRANSFORMATION, config), operator, InternalTypeInfo.of((RowType) getOutputType()), inputTransform.getParallelism());
    // set KeyType and Selector for state
    RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(partitionSpec.getFieldIndices(), inputRowTypeInfo);
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) UpdatableTopNFunction(org.apache.flink.table.runtime.operators.rank.UpdatableTopNFunction) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) FastTop1Function(org.apache.flink.table.runtime.operators.rank.FastTop1Function) GeneratedRecordEqualiser(org.apache.flink.table.runtime.generated.GeneratedRecordEqualiser) RankProcessStrategy(org.apache.flink.table.planner.plan.utils.RankProcessStrategy) RowData(org.apache.flink.table.data.RowData) AbstractTopNFunction(org.apache.flink.table.runtime.operators.rank.AbstractTopNFunction) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) TableException(org.apache.flink.table.api.TableException) AppendOnlyTopNFunction(org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction) AppendOnlyFirstNFunction(org.apache.flink.table.runtime.operators.rank.AppendOnlyFirstNFunction) EqualiserCodeGenerator(org.apache.flink.table.planner.codegen.EqualiserCodeGenerator) StateTtlConfig(org.apache.flink.api.common.state.StateTtlConfig) RetractableTopNFunction(org.apache.flink.table.runtime.operators.rank.RetractableTopNFunction) ComparableRecordComparator(org.apache.flink.table.runtime.operators.rank.ComparableRecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec)

Example 87 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecWindowJoin method translateToPlanInternal.

@Override
@SuppressWarnings("unchecked")
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    int leftWindowEndIndex = ((WindowAttachedWindowingStrategy) leftWindowing).getWindowEnd();
    int rightWindowEndIndex = ((WindowAttachedWindowingStrategy) rightWindowing).getWindowEnd();
    final ExecEdge leftInputEdge = getInputEdges().get(0);
    final ExecEdge rightInputEdge = getInputEdges().get(1);
    final Transformation<RowData> leftTransform = (Transformation<RowData>) leftInputEdge.translateToPlan(planner);
    final Transformation<RowData> rightTransform = (Transformation<RowData>) rightInputEdge.translateToPlan(planner);
    final RowType leftType = (RowType) leftInputEdge.getOutputType();
    final RowType rightType = (RowType) rightInputEdge.getOutputType();
    JoinUtil.validateJoinSpec(joinSpec, leftType, rightType, true);
    final int[] leftJoinKey = joinSpec.getLeftKeys();
    final int[] rightJoinKey = joinSpec.getRightKeys();
    final InternalTypeInfo<RowData> leftTypeInfo = InternalTypeInfo.of(leftType);
    final InternalTypeInfo<RowData> rightTypeInfo = InternalTypeInfo.of(rightType);
    GeneratedJoinCondition generatedCondition = JoinUtil.generateConditionFunction(config.getTableConfig(), joinSpec, leftType, rightType);
    ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(leftWindowing.getTimeAttributeType(), config.getLocalTimeZone());
    WindowJoinOperator operator = WindowJoinOperatorBuilder.builder().leftSerializer(leftTypeInfo.toRowSerializer()).rightSerializer(rightTypeInfo.toRowSerializer()).generatedJoinCondition(generatedCondition).leftWindowEndIndex(leftWindowEndIndex).rightWindowEndIndex(rightWindowEndIndex).filterNullKeys(joinSpec.getFilterNulls()).joinType(joinSpec.getJoinType()).withShiftTimezone(shiftTimeZone).build();
    final RowType returnType = (RowType) getOutputType();
    final TwoInputTransformation<RowData, RowData, RowData> transform = ExecNodeUtil.createTwoInputTransformation(leftTransform, rightTransform, createTransformationMeta(WINDOW_JOIN_TRANSFORMATION, config), operator, InternalTypeInfo.of(returnType), leftTransform.getParallelism());
    // set KeyType and Selector for state
    RowDataKeySelector leftSelect = KeySelectorUtil.getRowDataSelector(leftJoinKey, leftTypeInfo);
    RowDataKeySelector rightSelect = KeySelectorUtil.getRowDataSelector(rightJoinKey, rightTypeInfo);
    transform.setStateKeySelectors(leftSelect, rightSelect);
    transform.setStateKeyType(leftSelect.getProducedType());
    return transform;
}
Also used : TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) WindowAttachedWindowingStrategy(org.apache.flink.table.planner.plan.logical.WindowAttachedWindowingStrategy) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) GeneratedJoinCondition(org.apache.flink.table.runtime.generated.GeneratedJoinCondition) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) WindowJoinOperator(org.apache.flink.table.runtime.operators.join.window.WindowJoinOperator)

Example 88 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecLocalWindowAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    final SliceAssigner sliceAssigner = createSliceAssigner(windowing, shiftTimeZone);
    final AggregateInfoList aggInfoList = AggregateUtil.deriveStreamWindowAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), windowing.getWindow(), // isStateBackendDataViews
    false);
    final GeneratedNamespaceAggsHandleFunction<Long> generatedAggsHandler = createAggsHandler(sliceAssigner, aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    PagedTypeSerializer<RowData> keySer = (PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer();
    AbstractRowDataSerializer<RowData> valueSer = new RowDataSerializer(inputRowType);
    WindowBuffer.LocalFactory bufferFactory = new RecordsWindowBuffer.LocalFactory(keySer, valueSer, new LocalAggCombiner.Factory(generatedAggsHandler));
    final OneInputStreamOperator<RowData, RowData> localAggOperator = new LocalSlicingWindowAggOperator(selector, sliceAssigner, bufferFactory, shiftTimeZone);
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(LOCAL_WINDOW_AGGREGATE_TRANSFORMATION, config), SimpleOperatorFactory.of(localAggOperator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), // use less memory here to let the chained head operator can have more memory
    WINDOW_AGG_MEMORY_RATIO / 2);
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) LocalAggCombiner(org.apache.flink.table.runtime.operators.aggregate.window.combines.LocalAggCombiner) RowType(org.apache.flink.table.types.logical.RowType) WindowBuffer(org.apache.flink.table.runtime.operators.aggregate.window.buffers.WindowBuffer) RecordsWindowBuffer(org.apache.flink.table.runtime.operators.aggregate.window.buffers.RecordsWindowBuffer) RowData(org.apache.flink.table.data.RowData) PagedTypeSerializer(org.apache.flink.table.runtime.typeutils.PagedTypeSerializer) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) LocalSlicingWindowAggOperator(org.apache.flink.table.runtime.operators.aggregate.window.LocalSlicingWindowAggOperator) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer) AbstractRowDataSerializer(org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer)

Example 89 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecPythonGroupTableAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
        LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
    }
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputRowType = (RowType) inputEdge.getOutputType();
    final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
    true, // needDistinctInfo
    true);
    int inputCountIndex = aggInfoList.getIndexOfCountStar();
    Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
    PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
    DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
    Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
    OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonTableAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), generateUpdateBefore, inputCountIndex);
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
    if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
        transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
    }
    // set KeyType and Selector for state
    final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) DataViewSpec(org.apache.flink.table.runtime.dataview.DataViewSpec) Configuration(org.apache.flink.configuration.Configuration) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) PythonAggregateFunctionInfo(org.apache.flink.table.functions.python.PythonAggregateFunctionInfo) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector)

Example 90 with Transformation

use of org.apache.flink.api.dag.Transformation in project flink by apache.

the class StreamExecWindowDeduplicate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    // validate window strategy
    if (!windowing.isRowtime()) {
        throw new TableException("Processing time Window Deduplication is not supported yet.");
    }
    int windowEndIndex;
    if (windowing instanceof WindowAttachedWindowingStrategy) {
        windowEndIndex = ((WindowAttachedWindowingStrategy) windowing).getWindowEnd();
    } else {
        throw new UnsupportedOperationException(windowing.getClass().getName() + " is not supported yet.");
    }
    ExecEdge inputEdge = getInputEdges().get(0);
    Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    RowType inputType = (RowType) inputEdge.getOutputType();
    RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(partitionKeys, InternalTypeInfo.of(inputType));
    OneInputStreamOperator<RowData, RowData> operator = RowTimeWindowDeduplicateOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).keepLastRow(keepLastRow).rowtimeIndex(orderKey).windowEndIndex(windowEndIndex).build();
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(WINDOW_DEDUPLICATE_TRANSFORMATION, config), SimpleOperatorFactory.of(operator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_RANK_MEMORY_RATIO);
    // set KeyType and Selector for state
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : TableException(org.apache.flink.table.api.TableException) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) WindowAttachedWindowingStrategy(org.apache.flink.table.planner.plan.logical.WindowAttachedWindowingStrategy) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Aggregations

Transformation (org.apache.flink.api.dag.Transformation)98 RowData (org.apache.flink.table.data.RowData)69 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)53 RowType (org.apache.flink.table.types.logical.RowType)50 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)45 TableException (org.apache.flink.table.api.TableException)28 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)28 ArrayList (java.util.ArrayList)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)21 Configuration (org.apache.flink.configuration.Configuration)19 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)18 List (java.util.List)17 PartitionTransformation (org.apache.flink.streaming.api.transformations.PartitionTransformation)17 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)17 LogicalType (org.apache.flink.table.types.logical.LogicalType)16 Test (org.junit.Test)16 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)13 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)13 Arrays (java.util.Arrays)11 Collections (java.util.Collections)10