Search in sources :

Example 1 with PartitionPushDownSpec

use of org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec in project flink by apache.

the class PushPartitionIntoTableSourceScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    Filter filter = call.rel(0);
    LogicalTableScan scan = call.rel(1);
    TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
    RelDataType inputFieldTypes = filter.getInput().getRowType();
    List<String> inputFieldNames = inputFieldTypes.getFieldNames();
    List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
    // extract partition predicates
    RelBuilder relBuilder = call.builder();
    RexBuilder rexBuilder = relBuilder.getRexBuilder();
    Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
    RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
    if (partitionPredicate.isAlwaysTrue()) {
        return;
    }
    // build pruner
    LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
        int index = inputFieldNames.indexOf(name);
        if (index < 0) {
            throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
        }
        return inputFieldTypes.getFieldList().get(index).getType();
    }).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
    RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
    FlinkContext context = ShortcutUtils.unwrapContext(scan);
    Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
    // prune partitions
    List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
    // apply push down
    DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
    PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
    partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
    // build new statistic
    TableStats newTableStat = null;
    if (tableSourceTable.contextResolvedTable().isPermanent()) {
        ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
        ObjectPath tablePath = identifier.toObjectPath();
        Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
        for (Map<String, String> partition : remainingPartitions) {
            Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
            if (!partitionStats.isPresent()) {
                // clear all information before
                newTableStat = null;
                break;
            } else {
                newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
            }
        }
    }
    FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
    TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
    LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
    // transform to new node
    RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
    if (nonPartitionPredicate.isAlwaysTrue()) {
        call.transformTo(newScan);
    } else {
        Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
        call.transformTo(newFilter);
    }
}
Also used : CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) Arrays(java.util.Arrays) SourceAbilityContext(org.apache.flink.table.planner.plan.abilities.source.SourceAbilityContext) PartitionNotExistException(org.apache.flink.table.catalog.exceptions.PartitionNotExistException) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ShortcutUtils(org.apache.flink.table.planner.utils.ShortcutUtils) SupportsPartitionPushDown(org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RexUtil(org.apache.calcite.rex.RexUtil) RexNode(org.apache.calcite.rex.RexNode) RelBuilder(org.apache.calcite.tools.RelBuilder) ResolvedExpression(org.apache.flink.table.expressions.ResolvedExpression) Map(java.util.Map) TableStats(org.apache.flink.table.plan.stats.TableStats) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) TimeZone(java.util.TimeZone) Seq(scala.collection.Seq) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) SourceAbilitySpec(org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) TableNotPartitionedException(org.apache.flink.table.catalog.exceptions.TableNotPartitionedException) LogicalType(org.apache.flink.table.types.logical.LogicalType) Optional(java.util.Optional) RexNodeToExpressionConverter(org.apache.flink.table.planner.plan.utils.RexNodeToExpressionConverter) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) CatalogTableStatisticsConverter(org.apache.flink.table.planner.utils.CatalogTableStatisticsConverter) RexNodeExtractor(org.apache.flink.table.planner.plan.utils.RexNodeExtractor) Expression(org.apache.flink.table.expressions.Expression) Filter(org.apache.calcite.rel.core.Filter) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Function(java.util.function.Function) ArrayList(java.util.ArrayList) CatalogTableStatistics(org.apache.flink.table.catalog.stats.CatalogTableStatistics) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPruner(org.apache.flink.table.planner.plan.utils.PartitionPruner) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) RelDataType(org.apache.calcite.rel.type.RelDataType) JavaConversions(scala.collection.JavaConversions) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) RexBuilder(org.apache.calcite.rex.RexBuilder) TableException(org.apache.flink.table.api.TableException) Option(scala.Option) FlinkRelOptUtil(org.apache.flink.table.planner.plan.utils.FlinkRelOptUtil) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) RelOptRule(org.apache.calcite.plan.RelOptRule) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexShuttle(org.apache.calcite.rex.RexShuttle) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) LogicalType(org.apache.flink.table.types.logical.LogicalType) RelDataType(org.apache.calcite.rel.type.RelDataType) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexBuilder(org.apache.calcite.rex.RexBuilder) List(java.util.List) ArrayList(java.util.ArrayList) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) TableException(org.apache.flink.table.api.TableException) RelBuilder(org.apache.calcite.tools.RelBuilder) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) TableStats(org.apache.flink.table.plan.stats.TableStats) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) Filter(org.apache.calcite.rel.core.Filter) Map(java.util.Map) Seq(scala.collection.Seq) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RexNode(org.apache.calcite.rex.RexNode)

Example 2 with PartitionPushDownSpec

use of org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec in project flink by apache.

the class PushPartitionIntoTableSourceScanRule method matches.

@Override
public boolean matches(RelOptRuleCall call) {
    Filter filter = call.rel(0);
    if (filter.getCondition() == null) {
        return false;
    }
    TableSourceTable tableSourceTable = call.rel(1).getTable().unwrap(TableSourceTable.class);
    if (tableSourceTable == null) {
        return false;
    }
    DynamicTableSource dynamicTableSource = tableSourceTable.tableSource();
    if (!(dynamicTableSource instanceof SupportsPartitionPushDown)) {
        return false;
    }
    CatalogTable catalogTable = tableSourceTable.contextResolvedTable().getTable();
    if (!catalogTable.isPartitioned() || catalogTable.getPartitionKeys().isEmpty()) {
        return false;
    }
    return Arrays.stream(tableSourceTable.abilitySpecs()).noneMatch(spec -> spec instanceof PartitionPushDownSpec);
}
Also used : PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) Filter(org.apache.calcite.rel.core.Filter) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) SupportsPartitionPushDown(org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown)

Example 3 with PartitionPushDownSpec

use of org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec in project flink by apache.

the class DynamicTableSourceSpecSerdeTest method testDynamicTableSinkSpecSerde.

public static Stream<DynamicTableSourceSpec> testDynamicTableSinkSpecSerde() {
    Map<String, String> options1 = new HashMap<>();
    options1.put("connector", FileSystemTableFactory.IDENTIFIER);
    options1.put("format", TestCsvFormatFactory.IDENTIFIER);
    options1.put("path", "/tmp");
    final ResolvedSchema resolvedSchema1 = new ResolvedSchema(Collections.singletonList(Column.physical("a", DataTypes.BIGINT())), Collections.emptyList(), null);
    final CatalogTable catalogTable1 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema1).build(), null, Collections.emptyList(), options1);
    DynamicTableSourceSpec spec1 = new DynamicTableSourceSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable1, resolvedSchema1)), null);
    Map<String, String> options2 = new HashMap<>();
    options2.put("connector", TestValuesTableFactory.IDENTIFIER);
    options2.put("disable-lookup", "true");
    options2.put("enable-watermark-push-down", "true");
    options2.put("filterable-fields", "b");
    options2.put("bounded", "false");
    options2.put("readable-metadata", "m1:INT, m2:STRING");
    final ResolvedSchema resolvedSchema2 = new ResolvedSchema(Arrays.asList(Column.physical("a", DataTypes.BIGINT()), Column.physical("b", DataTypes.INT()), Column.physical("c", DataTypes.STRING()), Column.physical("p", DataTypes.STRING()), Column.metadata("m1", DataTypes.INT(), null, false), Column.metadata("m2", DataTypes.STRING(), null, false), Column.physical("ts", DataTypes.TIMESTAMP(3))), Collections.emptyList(), null);
    final CatalogTable catalogTable2 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema2).build(), null, Collections.emptyList(), options2);
    FlinkTypeFactory factory = FlinkTypeFactory.INSTANCE();
    RexBuilder rexBuilder = new RexBuilder(factory);
    DynamicTableSourceSpec spec2 = new DynamicTableSourceSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable2, resolvedSchema2)), Arrays.asList(new ProjectPushDownSpec(new int[][] { { 0 }, { 1 }, { 4 }, { 6 } }, RowType.of(new LogicalType[] { new BigIntType(), new IntType(), new IntType(), new TimestampType(3) }, new String[] { "a", "b", "m1", "ts" })), new ReadingMetadataSpec(Arrays.asList("m1", "m2"), RowType.of(new LogicalType[] { new BigIntType(), new IntType(), new IntType(), new TimestampType(3) }, new String[] { "a", "b", "m1", "ts" })), new FilterPushDownSpec(Collections.singletonList(// b >= 10
    rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, rexBuilder.makeInputRef(factory.createSqlType(SqlTypeName.INTEGER), 1), rexBuilder.makeExactLiteral(new BigDecimal(10))))), new WatermarkPushDownSpec(rexBuilder.makeCall(SqlStdOperatorTable.MINUS, rexBuilder.makeInputRef(factory.createSqlType(SqlTypeName.TIMESTAMP, 3), 3), rexBuilder.makeIntervalLiteral(BigDecimal.valueOf(1000), new SqlIntervalQualifier(TimeUnit.SECOND, 2, TimeUnit.SECOND, 6, SqlParserPos.ZERO))), 5000, RowType.of(new BigIntType(), new IntType(), new IntType(), new TimestampType(false, TimestampKind.ROWTIME, 3))), new SourceWatermarkSpec(true, RowType.of(new BigIntType(), new IntType(), new IntType(), new TimestampType(false, TimestampKind.ROWTIME, 3))), new LimitPushDownSpec(100), new PartitionPushDownSpec(Arrays.asList(new HashMap<String, String>() {

        {
            put("p", "A");
        }
    }, new HashMap<String, String>() {

        {
            put("p", "B");
        }
    }))));
    return Stream.of(spec1, spec2);
}
Also used : WatermarkPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.WatermarkPushDownSpec) HashMap(java.util.HashMap) ProjectPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.ProjectPushDownSpec) SqlIntervalQualifier(org.apache.calcite.sql.SqlIntervalQualifier) DynamicTableSourceSpec(org.apache.flink.table.planner.plan.nodes.exec.spec.DynamicTableSourceSpec) LogicalType(org.apache.flink.table.types.logical.LogicalType) BigIntType(org.apache.flink.table.types.logical.BigIntType) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) ReadingMetadataSpec(org.apache.flink.table.planner.plan.abilities.source.ReadingMetadataSpec) BigDecimal(java.math.BigDecimal) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SourceWatermarkSpec(org.apache.flink.table.planner.plan.abilities.source.SourceWatermarkSpec) LimitPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.LimitPushDownSpec) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) FilterPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.FilterPushDownSpec) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RexBuilder(org.apache.calcite.rex.RexBuilder) TimestampType(org.apache.flink.table.types.logical.TimestampType) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema)

Aggregations

CatalogTable (org.apache.flink.table.catalog.CatalogTable)3 ResolvedCatalogTable (org.apache.flink.table.catalog.ResolvedCatalogTable)3 PartitionPushDownSpec (org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec)3 Filter (org.apache.calcite.rel.core.Filter)2 RexBuilder (org.apache.calcite.rex.RexBuilder)2 DynamicTableSource (org.apache.flink.table.connector.source.DynamicTableSource)2 SupportsPartitionPushDown (org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown)2 FlinkTypeFactory (org.apache.flink.table.planner.calcite.FlinkTypeFactory)2 TableSourceTable (org.apache.flink.table.planner.plan.schema.TableSourceTable)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 BigDecimal (java.math.BigDecimal)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 TimeZone (java.util.TimeZone)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1