use of org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec in project flink by apache.
the class PushPartitionIntoTableSourceScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
Filter filter = call.rel(0);
LogicalTableScan scan = call.rel(1);
TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
RelDataType inputFieldTypes = filter.getInput().getRowType();
List<String> inputFieldNames = inputFieldTypes.getFieldNames();
List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
// extract partition predicates
RelBuilder relBuilder = call.builder();
RexBuilder rexBuilder = relBuilder.getRexBuilder();
Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
if (partitionPredicate.isAlwaysTrue()) {
return;
}
// build pruner
LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
int index = inputFieldNames.indexOf(name);
if (index < 0) {
throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
}
return inputFieldTypes.getFieldList().get(index).getType();
}).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
FlinkContext context = ShortcutUtils.unwrapContext(scan);
Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
// prune partitions
List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
// apply push down
DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
// build new statistic
TableStats newTableStat = null;
if (tableSourceTable.contextResolvedTable().isPermanent()) {
ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
ObjectPath tablePath = identifier.toObjectPath();
Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
for (Map<String, String> partition : remainingPartitions) {
Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
if (!partitionStats.isPresent()) {
// clear all information before
newTableStat = null;
break;
} else {
newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
}
}
}
FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
// transform to new node
RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
if (nonPartitionPredicate.isAlwaysTrue()) {
call.transformTo(newScan);
} else {
Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
call.transformTo(newFilter);
}
}
use of org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec in project flink by apache.
the class PushPartitionIntoTableSourceScanRule method matches.
@Override
public boolean matches(RelOptRuleCall call) {
Filter filter = call.rel(0);
if (filter.getCondition() == null) {
return false;
}
TableSourceTable tableSourceTable = call.rel(1).getTable().unwrap(TableSourceTable.class);
if (tableSourceTable == null) {
return false;
}
DynamicTableSource dynamicTableSource = tableSourceTable.tableSource();
if (!(dynamicTableSource instanceof SupportsPartitionPushDown)) {
return false;
}
CatalogTable catalogTable = tableSourceTable.contextResolvedTable().getTable();
if (!catalogTable.isPartitioned() || catalogTable.getPartitionKeys().isEmpty()) {
return false;
}
return Arrays.stream(tableSourceTable.abilitySpecs()).noneMatch(spec -> spec instanceof PartitionPushDownSpec);
}
use of org.apache.flink.table.planner.plan.abilities.source.PartitionPushDownSpec in project flink by apache.
the class DynamicTableSourceSpecSerdeTest method testDynamicTableSinkSpecSerde.
public static Stream<DynamicTableSourceSpec> testDynamicTableSinkSpecSerde() {
Map<String, String> options1 = new HashMap<>();
options1.put("connector", FileSystemTableFactory.IDENTIFIER);
options1.put("format", TestCsvFormatFactory.IDENTIFIER);
options1.put("path", "/tmp");
final ResolvedSchema resolvedSchema1 = new ResolvedSchema(Collections.singletonList(Column.physical("a", DataTypes.BIGINT())), Collections.emptyList(), null);
final CatalogTable catalogTable1 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema1).build(), null, Collections.emptyList(), options1);
DynamicTableSourceSpec spec1 = new DynamicTableSourceSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable1, resolvedSchema1)), null);
Map<String, String> options2 = new HashMap<>();
options2.put("connector", TestValuesTableFactory.IDENTIFIER);
options2.put("disable-lookup", "true");
options2.put("enable-watermark-push-down", "true");
options2.put("filterable-fields", "b");
options2.put("bounded", "false");
options2.put("readable-metadata", "m1:INT, m2:STRING");
final ResolvedSchema resolvedSchema2 = new ResolvedSchema(Arrays.asList(Column.physical("a", DataTypes.BIGINT()), Column.physical("b", DataTypes.INT()), Column.physical("c", DataTypes.STRING()), Column.physical("p", DataTypes.STRING()), Column.metadata("m1", DataTypes.INT(), null, false), Column.metadata("m2", DataTypes.STRING(), null, false), Column.physical("ts", DataTypes.TIMESTAMP(3))), Collections.emptyList(), null);
final CatalogTable catalogTable2 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema2).build(), null, Collections.emptyList(), options2);
FlinkTypeFactory factory = FlinkTypeFactory.INSTANCE();
RexBuilder rexBuilder = new RexBuilder(factory);
DynamicTableSourceSpec spec2 = new DynamicTableSourceSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable2, resolvedSchema2)), Arrays.asList(new ProjectPushDownSpec(new int[][] { { 0 }, { 1 }, { 4 }, { 6 } }, RowType.of(new LogicalType[] { new BigIntType(), new IntType(), new IntType(), new TimestampType(3) }, new String[] { "a", "b", "m1", "ts" })), new ReadingMetadataSpec(Arrays.asList("m1", "m2"), RowType.of(new LogicalType[] { new BigIntType(), new IntType(), new IntType(), new TimestampType(3) }, new String[] { "a", "b", "m1", "ts" })), new FilterPushDownSpec(Collections.singletonList(// b >= 10
rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, rexBuilder.makeInputRef(factory.createSqlType(SqlTypeName.INTEGER), 1), rexBuilder.makeExactLiteral(new BigDecimal(10))))), new WatermarkPushDownSpec(rexBuilder.makeCall(SqlStdOperatorTable.MINUS, rexBuilder.makeInputRef(factory.createSqlType(SqlTypeName.TIMESTAMP, 3), 3), rexBuilder.makeIntervalLiteral(BigDecimal.valueOf(1000), new SqlIntervalQualifier(TimeUnit.SECOND, 2, TimeUnit.SECOND, 6, SqlParserPos.ZERO))), 5000, RowType.of(new BigIntType(), new IntType(), new IntType(), new TimestampType(false, TimestampKind.ROWTIME, 3))), new SourceWatermarkSpec(true, RowType.of(new BigIntType(), new IntType(), new IntType(), new TimestampType(false, TimestampKind.ROWTIME, 3))), new LimitPushDownSpec(100), new PartitionPushDownSpec(Arrays.asList(new HashMap<String, String>() {
{
put("p", "A");
}
}, new HashMap<String, String>() {
{
put("p", "B");
}
}))));
return Stream.of(spec1, spec2);
}
Aggregations