Search in sources :

Example 1 with FlinkStatistic

use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.

the class PushPartitionIntoTableSourceScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    Filter filter = call.rel(0);
    LogicalTableScan scan = call.rel(1);
    TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
    RelDataType inputFieldTypes = filter.getInput().getRowType();
    List<String> inputFieldNames = inputFieldTypes.getFieldNames();
    List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
    // extract partition predicates
    RelBuilder relBuilder = call.builder();
    RexBuilder rexBuilder = relBuilder.getRexBuilder();
    Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
    RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
    if (partitionPredicate.isAlwaysTrue()) {
        return;
    }
    // build pruner
    LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
        int index = inputFieldNames.indexOf(name);
        if (index < 0) {
            throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
        }
        return inputFieldTypes.getFieldList().get(index).getType();
    }).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
    RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
    FlinkContext context = ShortcutUtils.unwrapContext(scan);
    Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
    // prune partitions
    List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
    // apply push down
    DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
    PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
    partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
    // build new statistic
    TableStats newTableStat = null;
    if (tableSourceTable.contextResolvedTable().isPermanent()) {
        ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
        ObjectPath tablePath = identifier.toObjectPath();
        Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
        for (Map<String, String> partition : remainingPartitions) {
            Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
            if (!partitionStats.isPresent()) {
                // clear all information before
                newTableStat = null;
                break;
            } else {
                newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
            }
        }
    }
    FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
    TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
    LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
    // transform to new node
    RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
    if (nonPartitionPredicate.isAlwaysTrue()) {
        call.transformTo(newScan);
    } else {
        Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
        call.transformTo(newFilter);
    }
}
Also used : CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) Arrays(java.util.Arrays) SourceAbilityContext(org.apache.flink.table.planner.plan.abilities.source.SourceAbilityContext) PartitionNotExistException(org.apache.flink.table.catalog.exceptions.PartitionNotExistException) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ShortcutUtils(org.apache.flink.table.planner.utils.ShortcutUtils) SupportsPartitionPushDown(org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RexUtil(org.apache.calcite.rex.RexUtil) RexNode(org.apache.calcite.rex.RexNode) RelBuilder(org.apache.calcite.tools.RelBuilder) ResolvedExpression(org.apache.flink.table.expressions.ResolvedExpression) Map(java.util.Map) TableStats(org.apache.flink.table.plan.stats.TableStats) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) TimeZone(java.util.TimeZone) Seq(scala.collection.Seq) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) SourceAbilitySpec(org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) TableNotPartitionedException(org.apache.flink.table.catalog.exceptions.TableNotPartitionedException) LogicalType(org.apache.flink.table.types.logical.LogicalType) Optional(java.util.Optional) RexNodeToExpressionConverter(org.apache.flink.table.planner.plan.utils.RexNodeToExpressionConverter) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) CatalogTableStatisticsConverter(org.apache.flink.table.planner.utils.CatalogTableStatisticsConverter) RexNodeExtractor(org.apache.flink.table.planner.plan.utils.RexNodeExtractor) Expression(org.apache.flink.table.expressions.Expression) Filter(org.apache.calcite.rel.core.Filter) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Function(java.util.function.Function) ArrayList(java.util.ArrayList) CatalogTableStatistics(org.apache.flink.table.catalog.stats.CatalogTableStatistics) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPruner(org.apache.flink.table.planner.plan.utils.PartitionPruner) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) RelDataType(org.apache.calcite.rel.type.RelDataType) JavaConversions(scala.collection.JavaConversions) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) RexBuilder(org.apache.calcite.rex.RexBuilder) TableException(org.apache.flink.table.api.TableException) Option(scala.Option) FlinkRelOptUtil(org.apache.flink.table.planner.plan.utils.FlinkRelOptUtil) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) RelOptRule(org.apache.calcite.plan.RelOptRule) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexShuttle(org.apache.calcite.rex.RexShuttle) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) LogicalType(org.apache.flink.table.types.logical.LogicalType) RelDataType(org.apache.calcite.rel.type.RelDataType) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexBuilder(org.apache.calcite.rex.RexBuilder) List(java.util.List) ArrayList(java.util.ArrayList) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) TableException(org.apache.flink.table.api.TableException) RelBuilder(org.apache.calcite.tools.RelBuilder) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) TableStats(org.apache.flink.table.plan.stats.TableStats) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) Filter(org.apache.calcite.rel.core.Filter) Map(java.util.Map) Seq(scala.collection.Seq) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RexNode(org.apache.calcite.rex.RexNode)

Example 2 with FlinkStatistic

use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.

the class PushLimitIntoTableSourceScanRule method applyLimit.

private TableSourceTable applyLimit(long limit, FlinkLogicalTableSourceScan scan) {
    TableSourceTable relOptTable = scan.getTable().unwrap(TableSourceTable.class);
    TableSourceTable oldTableSourceTable = relOptTable.unwrap(TableSourceTable.class);
    DynamicTableSource newTableSource = oldTableSourceTable.tableSource().copy();
    LimitPushDownSpec limitPushDownSpec = new LimitPushDownSpec(limit);
    limitPushDownSpec.apply(newTableSource, SourceAbilityContext.from(scan));
    FlinkStatistic statistic = relOptTable.getStatistic();
    final long newRowCount;
    if (statistic.getRowCount() != null) {
        newRowCount = Math.min(limit, statistic.getRowCount().longValue());
    } else {
        newRowCount = limit;
    }
    // update TableStats after limit push down
    TableStats newTableStats = new TableStats(newRowCount);
    FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(statistic).tableStats(newTableStats).build();
    return oldTableSourceTable.copy(newTableSource, newStatistic, new SourceAbilitySpec[] { limitPushDownSpec });
}
Also used : LimitPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.LimitPushDownSpec) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) TableStats(org.apache.flink.table.plan.stats.TableStats) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource)

Example 3 with FlinkStatistic

use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.

the class CatalogSourceTable method createAnonymous.

/**
 * Create a {@link CatalogSourceTable} from an anonymous {@link ContextResolvedTable}. This is
 * required to manually create a preparing table skipping the calcite catalog resolution.
 */
public static CatalogSourceTable createAnonymous(FlinkRelBuilder relBuilder, ContextResolvedTable contextResolvedTable, boolean isBatchMode) {
    Preconditions.checkArgument(contextResolvedTable.isAnonymous(), "ContextResolvedTable must be anonymous");
    // Statistics are unknown for anonymous tables
    // Look at DatabaseCalciteSchema#getStatistic for more details
    FlinkStatistic flinkStatistic = FlinkStatistic.unknown(contextResolvedTable.getResolvedSchema()).build();
    CatalogSchemaTable catalogSchemaTable = new CatalogSchemaTable(contextResolvedTable, flinkStatistic, !isBatchMode);
    return new CatalogSourceTable(relBuilder.getRelOptSchema(), contextResolvedTable.getIdentifier().toList(), catalogSchemaTable.getRowType(relBuilder.getTypeFactory()), catalogSchemaTable);
}
Also used : FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) CatalogSchemaTable(org.apache.flink.table.planner.catalog.CatalogSchemaTable)

Example 4 with FlinkStatistic

use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.

the class PushFilterIntoSourceScanRuleBase method getNewFlinkStatistic.

protected FlinkStatistic getNewFlinkStatistic(TableSourceTable tableSourceTable, int originPredicatesSize, int updatedPredicatesSize) {
    FlinkStatistic oldStatistic = tableSourceTable.getStatistic();
    FlinkStatistic newStatistic;
    if (originPredicatesSize == updatedPredicatesSize) {
        // Keep all Statistics if no predicates can be pushed down
        newStatistic = oldStatistic;
    } else if (oldStatistic == FlinkStatistic.UNKNOWN()) {
        newStatistic = oldStatistic;
    } else {
        // Remove tableStats after predicates pushed down
        newStatistic = FlinkStatistic.builder().statistic(oldStatistic).tableStats(null).build();
    }
    return newStatistic;
}
Also used : FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic)

Example 5 with FlinkStatistic

use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.

the class DynamicSourceUtils method convertDataStreamToRel.

/**
 * Converts a given {@link DataStream} to a {@link RelNode}. It adds helper projections if
 * necessary.
 */
public static RelNode convertDataStreamToRel(boolean isBatchMode, ReadableConfig config, FlinkRelBuilder relBuilder, ContextResolvedTable contextResolvedTable, DataStream<?> dataStream, DataType physicalDataType, boolean isTopLevelRecord, ChangelogMode changelogMode) {
    final DynamicTableSource tableSource = new ExternalDynamicSource<>(contextResolvedTable.getIdentifier(), dataStream, physicalDataType, isTopLevelRecord, changelogMode);
    final FlinkStatistic statistic = FlinkStatistic.unknown(contextResolvedTable.getResolvedSchema()).build();
    return convertSourceToRel(isBatchMode, config, relBuilder, contextResolvedTable, statistic, Collections.emptyList(), tableSource);
}
Also used : FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource)

Aggregations

FlinkStatistic (org.apache.flink.table.planner.plan.stats.FlinkStatistic)5 DynamicTableSource (org.apache.flink.table.connector.source.DynamicTableSource)3 TableStats (org.apache.flink.table.plan.stats.TableStats)2 TableSourceTable (org.apache.flink.table.planner.plan.schema.TableSourceTable)2 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 TimeZone (java.util.TimeZone)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 RelOptRule (org.apache.calcite.plan.RelOptRule)1 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)1 Filter (org.apache.calcite.rel.core.Filter)1 LogicalTableScan (org.apache.calcite.rel.logical.LogicalTableScan)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexBuilder (org.apache.calcite.rex.RexBuilder)1 RexInputRef (org.apache.calcite.rex.RexInputRef)1 RexNode (org.apache.calcite.rex.RexNode)1