Search in sources :

Example 31 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class PushPartitionIntoTableSourceScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    Filter filter = call.rel(0);
    LogicalTableScan scan = call.rel(1);
    TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
    RelDataType inputFieldTypes = filter.getInput().getRowType();
    List<String> inputFieldNames = inputFieldTypes.getFieldNames();
    List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
    // extract partition predicates
    RelBuilder relBuilder = call.builder();
    RexBuilder rexBuilder = relBuilder.getRexBuilder();
    Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
    RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
    if (partitionPredicate.isAlwaysTrue()) {
        return;
    }
    // build pruner
    LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
        int index = inputFieldNames.indexOf(name);
        if (index < 0) {
            throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
        }
        return inputFieldTypes.getFieldList().get(index).getType();
    }).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
    RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
    FlinkContext context = ShortcutUtils.unwrapContext(scan);
    Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
    // prune partitions
    List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
    // apply push down
    DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
    PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
    partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
    // build new statistic
    TableStats newTableStat = null;
    if (tableSourceTable.contextResolvedTable().isPermanent()) {
        ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
        ObjectPath tablePath = identifier.toObjectPath();
        Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
        for (Map<String, String> partition : remainingPartitions) {
            Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
            if (!partitionStats.isPresent()) {
                // clear all information before
                newTableStat = null;
                break;
            } else {
                newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
            }
        }
    }
    FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
    TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
    LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
    // transform to new node
    RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
    if (nonPartitionPredicate.isAlwaysTrue()) {
        call.transformTo(newScan);
    } else {
        Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
        call.transformTo(newFilter);
    }
}
Also used : CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) Arrays(java.util.Arrays) SourceAbilityContext(org.apache.flink.table.planner.plan.abilities.source.SourceAbilityContext) PartitionNotExistException(org.apache.flink.table.catalog.exceptions.PartitionNotExistException) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ShortcutUtils(org.apache.flink.table.planner.utils.ShortcutUtils) SupportsPartitionPushDown(org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RexUtil(org.apache.calcite.rex.RexUtil) RexNode(org.apache.calcite.rex.RexNode) RelBuilder(org.apache.calcite.tools.RelBuilder) ResolvedExpression(org.apache.flink.table.expressions.ResolvedExpression) Map(java.util.Map) TableStats(org.apache.flink.table.plan.stats.TableStats) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) TimeZone(java.util.TimeZone) Seq(scala.collection.Seq) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) SourceAbilitySpec(org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) TableNotPartitionedException(org.apache.flink.table.catalog.exceptions.TableNotPartitionedException) LogicalType(org.apache.flink.table.types.logical.LogicalType) Optional(java.util.Optional) RexNodeToExpressionConverter(org.apache.flink.table.planner.plan.utils.RexNodeToExpressionConverter) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) CatalogTableStatisticsConverter(org.apache.flink.table.planner.utils.CatalogTableStatisticsConverter) RexNodeExtractor(org.apache.flink.table.planner.plan.utils.RexNodeExtractor) Expression(org.apache.flink.table.expressions.Expression) Filter(org.apache.calcite.rel.core.Filter) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Function(java.util.function.Function) ArrayList(java.util.ArrayList) CatalogTableStatistics(org.apache.flink.table.catalog.stats.CatalogTableStatistics) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPruner(org.apache.flink.table.planner.plan.utils.PartitionPruner) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) RelDataType(org.apache.calcite.rel.type.RelDataType) JavaConversions(scala.collection.JavaConversions) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) RexBuilder(org.apache.calcite.rex.RexBuilder) TableException(org.apache.flink.table.api.TableException) Option(scala.Option) FlinkRelOptUtil(org.apache.flink.table.planner.plan.utils.FlinkRelOptUtil) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) CatalogPartitionSpec(org.apache.flink.table.catalog.CatalogPartitionSpec) RelOptRule(org.apache.calcite.plan.RelOptRule) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexShuttle(org.apache.calcite.rex.RexShuttle) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) LogicalType(org.apache.flink.table.types.logical.LogicalType) RelDataType(org.apache.calcite.rel.type.RelDataType) FlinkStatistic(org.apache.flink.table.planner.plan.stats.FlinkStatistic) RexBuilder(org.apache.calcite.rex.RexBuilder) List(java.util.List) ArrayList(java.util.ArrayList) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) TableException(org.apache.flink.table.api.TableException) RelBuilder(org.apache.calcite.tools.RelBuilder) FlinkContext(org.apache.flink.table.planner.calcite.FlinkContext) TableStats(org.apache.flink.table.plan.stats.TableStats) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) Catalog(org.apache.flink.table.catalog.Catalog) PartitionPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec) Filter(org.apache.calcite.rel.core.Filter) Map(java.util.Map) Seq(scala.collection.Seq) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RexNode(org.apache.calcite.rex.RexNode)

Example 32 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class PushPartitionIntoTableSourceScanRule method readPartitionFromCatalogAndPrune.

private List<Map<String, String>> readPartitionFromCatalogAndPrune(RexBuilder rexBuilder, FlinkContext context, Catalog catalog, ObjectIdentifier tableIdentifier, List<String> allFieldNames, Seq<RexNode> partitionPredicate, Function<List<Map<String, String>>, List<Map<String, String>>> pruner) throws TableNotExistException, TableNotPartitionedException {
    ObjectPath tablePath = tableIdentifier.toObjectPath();
    // build filters
    RexNodeToExpressionConverter converter = new RexNodeToExpressionConverter(rexBuilder, allFieldNames.toArray(new String[0]), context.getFunctionCatalog(), context.getCatalogManager(), TimeZone.getTimeZone(context.getTableConfig().getLocalTimeZone()));
    ArrayList<Expression> partitionFilters = new ArrayList<>();
    Option<ResolvedExpression> subExpr;
    for (RexNode node : JavaConversions.seqAsJavaList(partitionPredicate)) {
        subExpr = node.accept(converter);
        if (!subExpr.isEmpty()) {
            partitionFilters.add(subExpr.get());
        } else {
            // if part of expr is unresolved, we read all partitions and prune.
            return readPartitionFromCatalogWithoutFilterAndPrune(catalog, tablePath, pruner);
        }
    }
    try {
        return catalog.listPartitionsByFilter(tablePath, partitionFilters).stream().map(CatalogPartitionSpec::getPartitionSpec).collect(Collectors.toList());
    } catch (UnsupportedOperationException e) {
        return readPartitionFromCatalogWithoutFilterAndPrune(catalog, tablePath, pruner);
    }
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) RexNodeToExpressionConverter(org.apache.flink.table.planner.plan.utils.RexNodeToExpressionConverter) ResolvedExpression(org.apache.flink.table.expressions.ResolvedExpression) Expression(org.apache.flink.table.expressions.Expression) ArrayList(java.util.ArrayList) ResolvedExpression(org.apache.flink.table.expressions.ResolvedExpression) RexNode(org.apache.calcite.rex.RexNode)

Example 33 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class SqlToOperationConverterTest method before.

@Before
public void before() throws TableAlreadyExistException, DatabaseNotExistException {
    catalogManager.initSchemaResolver(isStreamingMode, ExpressionResolverMocks.basicResolver(catalogManager, functionCatalog, parser));
    final ObjectPath path1 = new ObjectPath(catalogManager.getCurrentDatabase(), "t1");
    final ObjectPath path2 = new ObjectPath(catalogManager.getCurrentDatabase(), "t2");
    final TableSchema tableSchema = TableSchema.builder().field("a", DataTypes.BIGINT()).field("b", DataTypes.VARCHAR(Integer.MAX_VALUE)).field("c", DataTypes.INT()).field("d", DataTypes.VARCHAR(Integer.MAX_VALUE)).build();
    Map<String, String> options = new HashMap<>();
    options.put("connector", "COLLECTION");
    final CatalogTable catalogTable = new CatalogTableImpl(tableSchema, options, "");
    catalog.createTable(path1, catalogTable, true);
    catalog.createTable(path2, catalogTable, true);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) HashMap(java.util.HashMap) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Before(org.junit.Before)

Example 34 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class TableEnvironmentTest method innerTestManagedTableFromDescriptor.

private void innerTestManagedTableFromDescriptor(boolean ignoreIfExists, boolean isTemporary) {
    final TableEnvironmentMock tEnv = TableEnvironmentMock.getStreamingInstance();
    final String catalog = tEnv.getCurrentCatalog();
    final String database = tEnv.getCurrentDatabase();
    final Schema schema = Schema.newBuilder().column("f0", DataTypes.INT()).build();
    final String tableName = UUID.randomUUID().toString();
    ObjectIdentifier identifier = ObjectIdentifier.of(catalog, database, tableName);
    // create table
    MANAGED_TABLES.put(identifier, new AtomicReference<>());
    CreateTableOperation createOperation = new CreateTableOperation(identifier, TableDescriptor.forManaged().schema(schema).option("a", "Test").build().toCatalogTable(), ignoreIfExists, isTemporary);
    tEnv.executeInternal(createOperation);
    // test ignore: create again
    if (ignoreIfExists) {
        tEnv.executeInternal(createOperation);
    } else {
        assertThatThrownBy(() -> tEnv.executeInternal(createOperation), isTemporary ? "already exists" : "Could not execute CreateTable");
    }
    // lookup table
    boolean isInCatalog = tEnv.getCatalog(catalog).orElseThrow(AssertionError::new).tableExists(new ObjectPath(database, tableName));
    if (isTemporary) {
        assertThat(isInCatalog).isFalse();
    } else {
        assertThat(isInCatalog).isTrue();
    }
    final Optional<ContextResolvedTable> lookupResult = tEnv.getCatalogManager().getTable(identifier);
    assertThat(lookupResult.isPresent()).isTrue();
    final CatalogBaseTable catalogTable = lookupResult.get().getTable();
    assertThat(catalogTable instanceof CatalogTable).isTrue();
    assertThat(catalogTable.getUnresolvedSchema()).isEqualTo(schema);
    assertThat(catalogTable.getOptions().get("a")).isEqualTo("Test");
    assertThat(catalogTable.getOptions().get(ENRICHED_KEY)).isEqualTo(ENRICHED_VALUE);
    AtomicReference<Map<String, String>> reference = MANAGED_TABLES.get(identifier);
    assertThat(reference.get()).isNotNull();
    assertThat(reference.get().get("a")).isEqualTo("Test");
    assertThat(reference.get().get(ENRICHED_KEY)).isEqualTo(ENRICHED_VALUE);
    DropTableOperation dropOperation = new DropTableOperation(identifier, ignoreIfExists, isTemporary);
    tEnv.executeInternal(dropOperation);
    assertThat(MANAGED_TABLES.get(identifier).get()).isNull();
    // test ignore: drop again
    if (ignoreIfExists) {
        tEnv.executeInternal(dropOperation);
    } else {
        assertThatThrownBy(() -> tEnv.executeInternal(dropOperation), "does not exist");
    }
    MANAGED_TABLES.remove(identifier);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) CreateTableOperation(org.apache.flink.table.operations.ddl.CreateTableOperation) CatalogTable(org.apache.flink.table.catalog.CatalogTable) DropTableOperation(org.apache.flink.table.operations.ddl.DropTableOperation) ContextResolvedTable(org.apache.flink.table.catalog.ContextResolvedTable) TableEnvironmentMock(org.apache.flink.table.utils.TableEnvironmentMock) Map(java.util.Map) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier)

Example 35 with ObjectPath

use of org.apache.flink.table.catalog.ObjectPath in project flink by apache.

the class TableEnvironmentTest method testCreateTemporaryTableFromDescriptor.

@Test
public void testCreateTemporaryTableFromDescriptor() {
    final TableEnvironmentMock tEnv = TableEnvironmentMock.getStreamingInstance();
    final String catalog = tEnv.getCurrentCatalog();
    final String database = tEnv.getCurrentDatabase();
    final Schema schema = Schema.newBuilder().column("f0", DataTypes.INT()).build();
    tEnv.createTemporaryTable("T", TableDescriptor.forConnector("fake").schema(schema).option("a", "Test").build());
    assertThat(tEnv.getCatalog(catalog).orElseThrow(AssertionError::new).tableExists(new ObjectPath(database, "T"))).isFalse();
    final Optional<ContextResolvedTable> lookupResult = tEnv.getCatalogManager().getTable(ObjectIdentifier.of(catalog, database, "T"));
    assertThat(lookupResult.isPresent()).isTrue();
    final CatalogBaseTable catalogTable = lookupResult.get().getTable();
    assertThat(catalogTable instanceof CatalogTable).isTrue();
    assertThat(catalogTable.getUnresolvedSchema()).isEqualTo(schema);
    assertThat(catalogTable.getOptions().get("connector")).isEqualTo("fake");
    assertThat(catalogTable.getOptions().get("a")).isEqualTo("Test");
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) ContextResolvedTable(org.apache.flink.table.catalog.ContextResolvedTable) CatalogTable(org.apache.flink.table.catalog.CatalogTable) TableEnvironmentMock(org.apache.flink.table.utils.TableEnvironmentMock) Test(org.junit.jupiter.api.Test)

Aggregations

ObjectPath (org.apache.flink.table.catalog.ObjectPath)81 Test (org.junit.Test)52 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)32 CatalogTable (org.apache.flink.table.catalog.CatalogTable)29 HashMap (java.util.HashMap)21 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)20 TableSchema (org.apache.flink.table.api.TableSchema)19 TableEnvironment (org.apache.flink.table.api.TableEnvironment)17 CatalogPartitionSpec (org.apache.flink.table.catalog.CatalogPartitionSpec)12 Table (org.apache.hadoop.hive.metastore.api.Table)12 Configuration (org.apache.flink.configuration.Configuration)11 SqlCreateHiveTable (org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable)11 TableNotExistException (org.apache.flink.table.catalog.exceptions.TableNotExistException)9 ArrayList (java.util.ArrayList)8 Map (java.util.Map)8 GenericInMemoryCatalog (org.apache.flink.table.catalog.GenericInMemoryCatalog)8 LinkedHashMap (java.util.LinkedHashMap)7 Catalog (org.apache.flink.table.catalog.Catalog)7 ContextResolvedTable (org.apache.flink.table.catalog.ContextResolvedTable)6 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)6