Search in sources :

Example 26 with DynamicTableSource

use of org.apache.flink.table.connector.source.DynamicTableSource in project flink by apache.

the class PushPartitionIntoTableSourceScanRule method readPartitionsAndPrune.

private List<Map<String, String>> readPartitionsAndPrune(RexBuilder rexBuilder, FlinkContext context, TableSourceTable tableSourceTable, Function<List<Map<String, String>>, List<Map<String, String>>> pruner, Seq<RexNode> partitionPredicate, List<String> inputFieldNames) {
    // get partitions from table/catalog and prune
    Optional<Catalog> catalogOptional = tableSourceTable.contextResolvedTable().getCatalog();
    DynamicTableSource dynamicTableSource = tableSourceTable.tableSource();
    Optional<List<Map<String, String>>> optionalPartitions = ((SupportsPartitionPushDown) dynamicTableSource).listPartitions();
    if (optionalPartitions.isPresent()) {
        return pruner.apply(optionalPartitions.get());
    } else {
        // we will read partitions from catalog if table doesn't support listPartitions.
        if (!catalogOptional.isPresent()) {
            throw new TableException(String.format("Table '%s' connector doesn't provide partitions, and it cannot be loaded from the catalog", tableSourceTable.contextResolvedTable().getIdentifier().asSummaryString()));
        }
        try {
            return readPartitionFromCatalogAndPrune(rexBuilder, context, catalogOptional.get(), tableSourceTable.contextResolvedTable().getIdentifier(), inputFieldNames, partitionPredicate, pruner);
        } catch (TableNotExistException tableNotExistException) {
            throw new TableException(String.format("Table %s is not found in catalog.", tableSourceTable.contextResolvedTable().getIdentifier().asSummaryString()));
        } catch (TableNotPartitionedException tableNotPartitionedException) {
            throw new TableException(String.format("Table %s is not a partitionable source. Validator should have checked it.", tableSourceTable.contextResolvedTable().getIdentifier().asSummaryString()), tableNotPartitionedException);
        }
    }
}
Also used : TableException(org.apache.flink.table.api.TableException) TableNotPartitionedException(org.apache.flink.table.catalog.exceptions.TableNotPartitionedException) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) List(java.util.List) ArrayList(java.util.ArrayList) Catalog(org.apache.flink.table.catalog.Catalog) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) SupportsPartitionPushDown(org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown)

Example 27 with DynamicTableSource

use of org.apache.flink.table.connector.source.DynamicTableSource in project flink by apache.

the class PushWatermarkIntoTableSourceScanRuleBase method getNewScan.

/**
 * It uses the input watermark expression to generate the {@link WatermarkGeneratorSupplier}.
 * After the {@link WatermarkStrategy} is pushed into the scan, it will build a new scan.
 * However, when {@link FlinkLogicalWatermarkAssigner} is the parent of the {@link
 * FlinkLogicalTableSourceScan} it should modify the rowtime type to keep the type of plan is
 * consistent. In other cases, it just keep the data type of the scan as same as before and
 * leave the work when rewriting the projection.
 *
 * <p>NOTES: the row type of the scan is not always as same as the watermark assigner. Because
 * the scan will not add the rowtime column into the row when pushing the watermark assigner
 * into the scan. In some cases, query may have computed columns defined on rowtime column. If
 * modifying the type of the rowtime(with time attribute), it will also influence the type of
 * the computed column. Therefore, if the watermark assigner is not the parent of the scan, set
 * the type of the scan as before and leave the work to projection.
 */
protected FlinkLogicalTableSourceScan getNewScan(FlinkLogicalWatermarkAssigner watermarkAssigner, RexNode watermarkExpr, FlinkLogicalTableSourceScan scan, TableConfig tableConfig, boolean useWatermarkAssignerRowType) {
    final TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
    final DynamicTableSource newDynamicTableSource = tableSourceTable.tableSource().copy();
    final boolean isSourceWatermark = newDynamicTableSource instanceof SupportsSourceWatermark && hasSourceWatermarkDeclaration(watermarkExpr);
    final RelDataType newType;
    if (useWatermarkAssignerRowType) {
        // project is trivial and set rowtime type in scan
        newType = watermarkAssigner.getRowType();
    } else {
        // project add/delete columns and set the rowtime column type in project
        newType = scan.getRowType();
    }
    final RowType producedType = (RowType) FlinkTypeFactory.toLogicalType(newType);
    final SourceAbilityContext abilityContext = SourceAbilityContext.from(scan);
    final SourceAbilitySpec abilitySpec;
    if (isSourceWatermark) {
        final SourceWatermarkSpec sourceWatermarkSpec = new SourceWatermarkSpec(true, producedType);
        sourceWatermarkSpec.apply(newDynamicTableSource, abilityContext);
        abilitySpec = sourceWatermarkSpec;
    } else {
        final Duration idleTimeout = tableConfig.getConfiguration().get(ExecutionConfigOptions.TABLE_EXEC_SOURCE_IDLE_TIMEOUT);
        final long idleTimeoutMillis;
        if (!idleTimeout.isZero() && !idleTimeout.isNegative()) {
            idleTimeoutMillis = idleTimeout.toMillis();
        } else {
            idleTimeoutMillis = -1L;
        }
        final WatermarkPushDownSpec watermarkPushDownSpec = new WatermarkPushDownSpec(watermarkExpr, idleTimeoutMillis, producedType);
        watermarkPushDownSpec.apply(newDynamicTableSource, abilityContext);
        abilitySpec = watermarkPushDownSpec;
    }
    TableSourceTable newTableSourceTable = tableSourceTable.copy(newDynamicTableSource, newType, new SourceAbilitySpec[] { abilitySpec });
    return FlinkLogicalTableSourceScan.create(scan.getCluster(), scan.getHints(), newTableSourceTable);
}
Also used : WatermarkPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.WatermarkPushDownSpec) SourceAbilityContext(org.apache.flink.table.planner.plan.abilities.source.SourceAbilityContext) SourceAbilitySpec(org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec) SupportsSourceWatermark(org.apache.flink.table.connector.source.abilities.SupportsSourceWatermark) RowType(org.apache.flink.table.types.logical.RowType) RelDataType(org.apache.calcite.rel.type.RelDataType) Duration(java.time.Duration) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) SourceWatermarkSpec(org.apache.flink.table.planner.plan.abilities.source.SourceWatermarkSpec)

Example 28 with DynamicTableSource

use of org.apache.flink.table.connector.source.DynamicTableSource in project flink by apache.

the class PushLocalAggIntoScanRuleBase method pushLocalAggregateIntoScan.

protected void pushLocalAggregateIntoScan(RelOptRuleCall call, BatchPhysicalGroupAggregateBase localAgg, BatchPhysicalTableSourceScan oldScan, int[] calcRefFields) {
    RowType inputType = FlinkTypeFactory.toLogicalRowType(oldScan.getRowType());
    List<int[]> groupingSets = Collections.singletonList(ArrayUtils.addAll(localAgg.grouping(), localAgg.auxGrouping()));
    List<AggregateCall> aggCallList = JavaScalaConversionUtil.toJava(localAgg.getAggCallList());
    // map arg index in aggregate to field index in scan through referred fields by calc.
    if (calcRefFields != null) {
        groupingSets = translateGroupingArgIndex(groupingSets, calcRefFields);
        aggCallList = translateAggCallArgIndex(aggCallList, calcRefFields);
    }
    RowType producedType = FlinkTypeFactory.toLogicalRowType(localAgg.getRowType());
    TableSourceTable oldTableSourceTable = oldScan.tableSourceTable();
    DynamicTableSource newTableSource = oldScan.tableSource().copy();
    boolean isPushDownSuccess = AggregatePushDownSpec.apply(inputType, groupingSets, aggCallList, producedType, newTableSource, SourceAbilityContext.from(oldScan));
    if (!isPushDownSuccess) {
        // aggregate push down failed, just return without changing any nodes.
        return;
    }
    // create new source table with new spec and statistic.
    AggregatePushDownSpec aggregatePushDownSpec = new AggregatePushDownSpec(inputType, groupingSets, aggCallList, producedType);
    TableSourceTable newTableSourceTable = oldTableSourceTable.copy(newTableSource, localAgg.getRowType(), new SourceAbilitySpec[] { aggregatePushDownSpec }).copy(FlinkStatistic.UNKNOWN());
    // transform to new nodes.
    BatchPhysicalTableSourceScan newScan = oldScan.copy(oldScan.getTraitSet(), newTableSourceTable);
    BatchPhysicalExchange oldExchange = call.rel(0);
    BatchPhysicalExchange newExchange = oldExchange.copy(oldExchange.getTraitSet(), newScan, oldExchange.getDistribution());
    call.transformTo(newExchange);
}
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) SourceAbilitySpec(org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec) AggregatePushDownSpec(org.apache.flink.table.planner.plan.abilities.source.AggregatePushDownSpec) RowType(org.apache.flink.table.types.logical.RowType) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) BatchPhysicalExchange(org.apache.flink.table.planner.plan.nodes.physical.batch.BatchPhysicalExchange) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) BatchPhysicalTableSourceScan(org.apache.flink.table.planner.plan.nodes.physical.batch.BatchPhysicalTableSourceScan)

Example 29 with DynamicTableSource

use of org.apache.flink.table.connector.source.DynamicTableSource in project flink by apache.

the class PushProjectIntoTableSourceScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final LogicalProject project = call.rel(0);
    final LogicalTableScan scan = call.rel(1);
    final TableSourceTable sourceTable = scan.getTable().unwrap(TableSourceTable.class);
    final boolean supportsNestedProjection = supportsNestedProjection(sourceTable.tableSource());
    final int[] refFields = RexNodeExtractor.extractRefInputFields(project.getProjects());
    if (!supportsNestedProjection && refFields.length == scan.getRowType().getFieldCount()) {
        // There is no top-level projection and nested projections aren't supported.
        return;
    }
    final FlinkTypeFactory typeFactory = unwrapTypeFactory(scan);
    final ResolvedSchema schema = sourceTable.contextResolvedTable().getResolvedSchema();
    final RowType producedType = createProducedType(schema, sourceTable.tableSource());
    final NestedSchema projectedSchema = NestedProjectionUtil.build(getProjections(project, scan), typeFactory.buildRelNodeRowType(producedType));
    if (!supportsNestedProjection) {
        for (NestedColumn column : projectedSchema.columns().values()) {
            column.markLeaf();
        }
    }
    final List<SourceAbilitySpec> abilitySpecs = new ArrayList<>();
    final RowType newProducedType = performPushDown(sourceTable, projectedSchema, producedType, abilitySpecs);
    final DynamicTableSource newTableSource = sourceTable.tableSource().copy();
    final SourceAbilityContext context = SourceAbilityContext.from(scan);
    abilitySpecs.forEach(spec -> spec.apply(newTableSource, context));
    final RelDataType newRowType = typeFactory.buildRelNodeRowType(newProducedType);
    final TableSourceTable newSource = sourceTable.copy(newTableSource, newRowType, abilitySpecs.toArray(new SourceAbilitySpec[0]));
    final LogicalTableScan newScan = new LogicalTableScan(scan.getCluster(), scan.getTraitSet(), scan.getHints(), newSource);
    final LogicalProject newProject = project.copy(project.getTraitSet(), newScan, rewriteProjections(call, newSource, projectedSchema), project.getRowType());
    if (ProjectRemoveRule.isTrivial(newProject)) {
        call.transformTo(newScan);
    } else {
        call.transformTo(newProject);
    }
}
Also used : SourceAbilitySpec(org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType) NestedColumn(org.apache.flink.table.planner.plan.utils.NestedColumn) RelDataType(org.apache.calcite.rel.type.RelDataType) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) SourceAbilityContext(org.apache.flink.table.planner.plan.abilities.source.SourceAbilityContext) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) NestedSchema(org.apache.flink.table.planner.plan.utils.NestedSchema)

Example 30 with DynamicTableSource

use of org.apache.flink.table.connector.source.DynamicTableSource in project flink by apache.

the class PushProjectIntoTableSourceScanRule method matches.

@Override
public boolean matches(RelOptRuleCall call) {
    final LogicalTableScan scan = call.rel(1);
    final TableSourceTable sourceTable = scan.getTable().unwrap(TableSourceTable.class);
    if (sourceTable == null) {
        return false;
    }
    final DynamicTableSource source = sourceTable.tableSource();
    // The source supports projection push-down.
    if (supportsProjectionPushDown(source)) {
        return Arrays.stream(sourceTable.abilitySpecs()).noneMatch(spec -> spec instanceof ProjectPushDownSpec);
    }
    // (for physical columns) is not supported.
    if (supportsMetadata(source)) {
        if (Arrays.stream(sourceTable.abilitySpecs()).anyMatch(spec -> spec instanceof ReadingMetadataSpec)) {
            return false;
        }
        return ((SupportsReadingMetadata) source).supportsMetadataProjection();
    }
    return false;
}
Also used : ProjectPushDownSpec(org.apache.flink.table.planner.plan.abilities.source.ProjectPushDownSpec) SupportsReadingMetadata(org.apache.flink.table.connector.source.abilities.SupportsReadingMetadata) TableSourceTable(org.apache.flink.table.planner.plan.schema.TableSourceTable) ReadingMetadataSpec(org.apache.flink.table.planner.plan.abilities.source.ReadingMetadataSpec) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource)

Aggregations

DynamicTableSource (org.apache.flink.table.connector.source.DynamicTableSource)55 Test (org.junit.Test)24 DynamicTableSink (org.apache.flink.table.connector.sink.DynamicTableSink)12 TestDynamicTableFactory (org.apache.flink.table.factories.TestDynamicTableFactory)12 Test (org.junit.jupiter.api.Test)10 RowData (org.apache.flink.table.data.RowData)9 DecodingFormatMock (org.apache.flink.table.factories.TestFormatFactory.DecodingFormatMock)8 TableSourceTable (org.apache.flink.table.planner.plan.schema.TableSourceTable)8 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)7 HashMap (java.util.HashMap)5 Configuration (org.apache.flink.configuration.Configuration)5 ScanTableSource (org.apache.flink.table.connector.source.ScanTableSource)5 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)5 ArrayList (java.util.ArrayList)4 JdbcConnectorOptions (org.apache.flink.connector.jdbc.internal.options.JdbcConnectorOptions)4 JdbcLookupOptions (org.apache.flink.connector.jdbc.internal.options.JdbcLookupOptions)4 CatalogTable (org.apache.flink.table.catalog.CatalogTable)4 SourceAbilitySpec (org.apache.flink.table.planner.plan.abilities.source.SourceAbilitySpec)4 List (java.util.List)3 LogicalTableScan (org.apache.calcite.rel.logical.LogicalTableScan)3