Search in sources :

Example 6 with SortSpec

use of org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec in project flink by apache.

the class BatchExecOverAggregate method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    final ExecEdge inputEdge = getInputEdges().get(0);
    final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    final RowType inputType = (RowType) inputEdge.getOutputType();
    // The generated sort is used for generating the comparator among partitions.
    // So here not care the ASC or DESC for the grouping fields.
    // TODO just replace comparator to equaliser
    final int[] partitionFields = overSpec.getPartition().getFieldIndices();
    final GeneratedRecordComparator genComparator = ComparatorCodeGenerator.gen(config.getTableConfig(), "SortComparator", inputType, SortUtil.getAscendingSortSpec(partitionFields));
    // use aggInputType which considers constants as input instead of inputType
    final RowType inputTypeWithConstants = getInputTypeWithConstants();
    // Over operator could support different order-by keys with collation satisfied.
    // Currently, this operator requires all order keys (combined with partition keys) are
    // the same, but order-by keys may be different. Consider the following sql:
    // select *, sum(b) over partition by a order by a, count(c) over partition by a from T
    // So we can use any one from the groups. To keep the behavior with the rule, we use the
    // last one.
    final SortSpec sortSpec = overSpec.getGroups().get(overSpec.getGroups().size() - 1).getSort();
    final TableStreamOperator<RowData> operator;
    final long managedMemory;
    if (!needBufferData()) {
        // operator needn't cache data
        final int numOfGroup = overSpec.getGroups().size();
        final GeneratedAggsHandleFunction[] aggsHandlers = new GeneratedAggsHandleFunction[numOfGroup];
        final boolean[] resetAccumulators = new boolean[numOfGroup];
        for (int i = 0; i < numOfGroup; ++i) {
            GroupSpec group = overSpec.getGroups().get(i);
            AggregateInfoList aggInfoList = AggregateUtil.transformToBatchAggregateInfoList(inputTypeWithConstants, JavaScalaConversionUtil.toScala(group.getAggCalls()), // aggCallNeedRetractions
            null, sortSpec.getFieldIndices());
            AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(new CodeGeneratorContext(config.getTableConfig()), planner.getRelBuilder(), JavaScalaConversionUtil.toScala(inputType.getChildren()), // copyInputField
            false);
            // over agg code gen must pass the constants
            aggsHandlers[i] = generator.needAccumulate().withConstants(JavaScalaConversionUtil.toScala(getConstants())).generateAggsHandler("BoundedOverAggregateHelper", aggInfoList);
            OverWindowMode mode = inferGroupMode(group);
            resetAccumulators[i] = mode == OverWindowMode.ROW && group.getLowerBound().isCurrentRow() && group.getUpperBound().isCurrentRow();
        }
        operator = new NonBufferOverWindowOperator(aggsHandlers, genComparator, resetAccumulators);
        managedMemory = 0L;
    } else {
        List<OverWindowFrame> windowFrames = createOverWindowFrames(planner.getRelBuilder(), config, inputType, sortSpec, inputTypeWithConstants);
        operator = new BufferDataOverWindowOperator(windowFrames.toArray(new OverWindowFrame[0]), genComparator, inputType.getChildren().stream().allMatch(BinaryRowData::isInFixedLengthPart));
        managedMemory = config.get(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_EXTERNAL_BUFFER_MEMORY).getBytes();
    }
    return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), SimpleOperatorFactory.of(operator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), managedMemory);
}
Also used : Transformation(org.apache.flink.api.dag.Transformation) AggregateInfoList(org.apache.flink.table.planner.plan.utils.AggregateInfoList) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) RowType(org.apache.flink.table.types.logical.RowType) AggsHandlerCodeGenerator(org.apache.flink.table.planner.codegen.agg.AggsHandlerCodeGenerator) GeneratedAggsHandleFunction(org.apache.flink.table.runtime.generated.GeneratedAggsHandleFunction) BufferDataOverWindowOperator(org.apache.flink.table.runtime.operators.over.BufferDataOverWindowOperator) NonBufferOverWindowOperator(org.apache.flink.table.runtime.operators.over.NonBufferOverWindowOperator) UnboundedOverWindowFrame(org.apache.flink.table.runtime.operators.over.frame.UnboundedOverWindowFrame) OverWindowFrame(org.apache.flink.table.runtime.operators.over.frame.OverWindowFrame) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) GroupSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.OverSpec.GroupSpec) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec)

Example 7 with SortSpec

use of org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec in project flink by apache.

the class SortSpecSerdeTest method testSortSpec.

@Test
public void testSortSpec() throws JsonProcessingException {
    SortSpec sortSpec = SortSpec.builder().addField(1, true, true).addField(2, true, false).addField(3, false, true).addField(4, false, false).build();
    ObjectMapper mapper = new ObjectMapper();
    assertEquals(sortSpec, mapper.readValue(mapper.writeValueAsString(sortSpec), SortSpec.class));
}
Also used : SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec) ObjectMapper(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 8 with SortSpec

use of org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec in project flink by apache.

the class StreamExecRank method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    switch(rankType) {
        case ROW_NUMBER:
            break;
        case RANK:
            throw new TableException("RANK() on streaming table is not supported currently");
        case DENSE_RANK:
            throw new TableException("DENSE_RANK() on streaming table is not supported currently");
        default:
            throw new TableException(String.format("Streaming tables do not support %s rank function.", rankType));
    }
    ExecEdge inputEdge = getInputEdges().get(0);
    Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    RowType inputType = (RowType) inputEdge.getOutputType();
    InternalTypeInfo<RowData> inputRowTypeInfo = InternalTypeInfo.of(inputType);
    int[] sortFields = sortSpec.getFieldIndices();
    RowDataKeySelector sortKeySelector = KeySelectorUtil.getRowDataSelector(sortFields, inputRowTypeInfo);
    // create a sort spec on sort keys.
    int[] sortKeyPositions = IntStream.range(0, sortFields.length).toArray();
    SortSpec.SortSpecBuilder builder = SortSpec.builder();
    IntStream.range(0, sortFields.length).forEach(idx -> builder.addField(idx, sortSpec.getFieldSpec(idx).getIsAscendingOrder(), sortSpec.getFieldSpec(idx).getNullIsLast()));
    SortSpec sortSpecInSortKey = builder.build();
    GeneratedRecordComparator sortKeyComparator = ComparatorCodeGenerator.gen(config.getTableConfig(), "StreamExecSortComparator", RowType.of(sortSpec.getFieldTypes(inputType)), sortSpecInSortKey);
    long cacheSize = config.get(TABLE_EXEC_RANK_TOPN_CACHE_SIZE);
    StateTtlConfig ttlConfig = StateConfigUtil.createTtlConfig(config.getStateRetentionTime());
    AbstractTopNFunction processFunction;
    if (rankStrategy instanceof RankProcessStrategy.AppendFastStrategy) {
        if (sortFields.length == 1 && TypeCheckUtils.isProcTime(inputType.getChildren().get(sortFields[0])) && sortSpec.getFieldSpec(0).getIsAscendingOrder()) {
            processFunction = new AppendOnlyFirstNFunction(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber);
        } else if (RankUtil.isTop1(rankRange)) {
            processFunction = new FastTop1Function(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        } else {
            processFunction = new AppendOnlyTopNFunction(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        }
    } else if (rankStrategy instanceof RankProcessStrategy.UpdateFastStrategy) {
        if (RankUtil.isTop1(rankRange)) {
            processFunction = new FastTop1Function(ttlConfig, inputRowTypeInfo, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        } else {
            RankProcessStrategy.UpdateFastStrategy updateFastStrategy = (RankProcessStrategy.UpdateFastStrategy) rankStrategy;
            int[] primaryKeys = updateFastStrategy.getPrimaryKeys();
            RowDataKeySelector rowKeySelector = KeySelectorUtil.getRowDataSelector(primaryKeys, inputRowTypeInfo);
            processFunction = new UpdatableTopNFunction(ttlConfig, inputRowTypeInfo, rowKeySelector, sortKeyComparator, sortKeySelector, rankType, rankRange, generateUpdateBefore, outputRankNumber, cacheSize);
        }
    // TODO Use UnaryUpdateTopNFunction after SortedMapState is merged
    } else if (rankStrategy instanceof RankProcessStrategy.RetractStrategy) {
        EqualiserCodeGenerator equaliserCodeGen = new EqualiserCodeGenerator(inputType.getFields().stream().map(RowType.RowField::getType).toArray(LogicalType[]::new));
        GeneratedRecordEqualiser generatedEqualiser = equaliserCodeGen.generateRecordEqualiser("RankValueEqualiser");
        ComparableRecordComparator comparator = new ComparableRecordComparator(sortKeyComparator, sortKeyPositions, sortSpec.getFieldTypes(inputType), sortSpec.getAscendingOrders(), sortSpec.getNullsIsLast());
        processFunction = new RetractableTopNFunction(ttlConfig, inputRowTypeInfo, comparator, sortKeySelector, rankType, rankRange, generatedEqualiser, generateUpdateBefore, outputRankNumber);
    } else {
        throw new TableException(String.format("rank strategy:%s is not supported.", rankStrategy));
    }
    KeyedProcessOperator<RowData, RowData, RowData> operator = new KeyedProcessOperator<>(processFunction);
    processFunction.setKeyContext(operator);
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(RANK_TRANSFORMATION, config), operator, InternalTypeInfo.of((RowType) getOutputType()), inputTransform.getParallelism());
    // set KeyType and Selector for state
    RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(partitionSpec.getFieldIndices(), inputRowTypeInfo);
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) UpdatableTopNFunction(org.apache.flink.table.runtime.operators.rank.UpdatableTopNFunction) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) FastTop1Function(org.apache.flink.table.runtime.operators.rank.FastTop1Function) GeneratedRecordEqualiser(org.apache.flink.table.runtime.generated.GeneratedRecordEqualiser) RankProcessStrategy(org.apache.flink.table.planner.plan.utils.RankProcessStrategy) RowData(org.apache.flink.table.data.RowData) AbstractTopNFunction(org.apache.flink.table.runtime.operators.rank.AbstractTopNFunction) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) TableException(org.apache.flink.table.api.TableException) AppendOnlyTopNFunction(org.apache.flink.table.runtime.operators.rank.AppendOnlyTopNFunction) AppendOnlyFirstNFunction(org.apache.flink.table.runtime.operators.rank.AppendOnlyFirstNFunction) EqualiserCodeGenerator(org.apache.flink.table.planner.codegen.EqualiserCodeGenerator) StateTtlConfig(org.apache.flink.api.common.state.StateTtlConfig) RetractableTopNFunction(org.apache.flink.table.runtime.operators.rank.RetractableTopNFunction) ComparableRecordComparator(org.apache.flink.table.runtime.operators.rank.ComparableRecordComparator) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec)

Example 9 with SortSpec

use of org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec in project flink by apache.

the class StreamExecWindowRank method translateToPlanInternal.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
    // validate rank type
    switch(rankType) {
        case ROW_NUMBER:
            break;
        case RANK:
            throw new TableException("RANK() function is not supported on Window TopN currently, only ROW_NUMBER() is supported.");
        case DENSE_RANK:
            throw new TableException("DENSE_RANK() function is not supported on Window TopN currently, only ROW_NUMBER() is supported.");
        default:
            throw new TableException(String.format("%s() function is not supported on Window TopN currently, only ROW_NUMBER() is supported.", rankType));
    }
    // validate window strategy
    if (!windowing.isRowtime()) {
        throw new TableException("Processing time Window TopN is not supported yet.");
    }
    int windowEndIndex;
    if (windowing instanceof WindowAttachedWindowingStrategy) {
        windowEndIndex = ((WindowAttachedWindowingStrategy) windowing).getWindowEnd();
    } else {
        throw new UnsupportedOperationException(windowing.getClass().getName() + " is not supported yet.");
    }
    ExecEdge inputEdge = getInputEdges().get(0);
    RowType inputType = (RowType) inputEdge.getOutputType();
    // validate rank range
    ConstantRankRange constantRankRange;
    if (rankRange instanceof ConstantRankRange) {
        constantRankRange = (ConstantRankRange) rankRange;
    } else {
        throw new TableException(String.format("Rank strategy %s is not supported on window rank currently.", rankRange.toString(inputType.getFieldNames())));
    }
    Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
    InternalTypeInfo<RowData> inputRowTypeInfo = InternalTypeInfo.of(inputType);
    int[] sortFields = sortSpec.getFieldIndices();
    RowDataKeySelector sortKeySelector = KeySelectorUtil.getRowDataSelector(sortFields, inputRowTypeInfo);
    SortSpec.SortSpecBuilder builder = SortSpec.builder();
    IntStream.range(0, sortFields.length).forEach(idx -> builder.addField(idx, sortSpec.getFieldSpec(idx).getIsAscendingOrder(), sortSpec.getFieldSpec(idx).getNullIsLast()));
    SortSpec sortSpecInSortKey = builder.build();
    ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(windowing.getTimeAttributeType(), config.getLocalTimeZone());
    GeneratedRecordComparator sortKeyComparator = ComparatorCodeGenerator.gen(config.getTableConfig(), "StreamExecSortComparator", RowType.of(sortSpec.getFieldTypes(inputType)), sortSpecInSortKey);
    RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(partitionSpec.getFieldIndices(), inputRowTypeInfo);
    OneInputStreamOperator<RowData, RowData> operator = WindowRankOperatorBuilder.builder().inputSerializer(new RowDataSerializer(inputType)).shiftTimeZone(shiftTimeZone).keySerializer((PagedTypeSerializer<RowData>) selector.getProducedType().toSerializer()).sortKeySelector(sortKeySelector).sortKeyComparator(sortKeyComparator).outputRankNumber(outputRankNumber).rankStart(constantRankRange.getRankStart()).rankEnd(constantRankRange.getRankEnd()).windowEndIndex(windowEndIndex).build();
    OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(WINDOW_RANK_TRANSFORMATION, config), SimpleOperatorFactory.of(operator), InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism(), WINDOW_RANK_MEMORY_RATIO);
    // set KeyType and Selector for state
    transform.setStateKeySelector(selector);
    transform.setStateKeyType(selector.getProducedType());
    return transform;
}
Also used : TableException(org.apache.flink.table.api.TableException) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) ExecEdge(org.apache.flink.table.planner.plan.nodes.exec.ExecEdge) ZoneId(java.time.ZoneId) ConstantRankRange(org.apache.flink.table.runtime.operators.rank.ConstantRankRange) WindowAttachedWindowingStrategy(org.apache.flink.table.planner.plan.logical.WindowAttachedWindowingStrategy) RowType(org.apache.flink.table.types.logical.RowType) RowData(org.apache.flink.table.data.RowData) PagedTypeSerializer(org.apache.flink.table.runtime.typeutils.PagedTypeSerializer) RowDataKeySelector(org.apache.flink.table.runtime.keyselector.RowDataKeySelector) GeneratedRecordComparator(org.apache.flink.table.runtime.generated.GeneratedRecordComparator) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec) RowDataSerializer(org.apache.flink.table.runtime.typeutils.RowDataSerializer)

Example 10 with SortSpec

use of org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec in project flink by apache.

the class SortCodeGeneratorTest method randomKeysAndOrders.

private void randomKeysAndOrders() {
    Random rnd = new Random();
    LogicalType[] fields = new LogicalType[rnd.nextInt(9) + 1];
    for (int i = 0; i < fields.length; i++) {
        fields[i] = types[rnd.nextInt(types.length)];
    }
    inputType = RowType.of(fields);
    int keyCount = rnd.nextInt(fields.length) + 1;
    LinkedList<Integer> indexQueue = new LinkedList<>();
    for (int i = 0; i < keyCount; i++) {
        indexQueue.add(i);
    }
    Collections.shuffle(indexQueue);
    SortSpec.SortSpecBuilder builder = SortSpec.builder();
    for (int i = 0; i < keyCount; i++) {
        boolean order = rnd.nextBoolean();
        builder.addField(indexQueue.poll(), order, SortUtil.getNullDefaultOrder(order));
    }
    sortSpec = builder.build();
}
Also used : Random(java.util.Random) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) LogicalType(org.apache.flink.table.types.logical.LogicalType) LinkedList(java.util.LinkedList) SortSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec)

Aggregations

SortSpec (org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec)11 RowData (org.apache.flink.table.data.RowData)6 GeneratedRecordComparator (org.apache.flink.table.runtime.generated.GeneratedRecordComparator)5 LogicalType (org.apache.flink.table.types.logical.LogicalType)5 RowType (org.apache.flink.table.types.logical.RowType)5 Transformation (org.apache.flink.api.dag.Transformation)4 TableException (org.apache.flink.table.api.TableException)4 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)4 Random (java.util.Random)3 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 ThreadLocalRandom (java.util.concurrent.ThreadLocalRandom)2 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)2 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)2 EmptyRowDataKeySelector (org.apache.flink.table.runtime.keyselector.EmptyRowDataKeySelector)2 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)2 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1