use of org.apache.flink.table.plan.stats.TableStats in project flink by apache.
the class PushPartitionIntoTableSourceScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
Filter filter = call.rel(0);
LogicalTableScan scan = call.rel(1);
TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
RelDataType inputFieldTypes = filter.getInput().getRowType();
List<String> inputFieldNames = inputFieldTypes.getFieldNames();
List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
// extract partition predicates
RelBuilder relBuilder = call.builder();
RexBuilder rexBuilder = relBuilder.getRexBuilder();
Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
if (partitionPredicate.isAlwaysTrue()) {
return;
}
// build pruner
LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
int index = inputFieldNames.indexOf(name);
if (index < 0) {
throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
}
return inputFieldTypes.getFieldList().get(index).getType();
}).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
FlinkContext context = ShortcutUtils.unwrapContext(scan);
Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
// prune partitions
List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
// apply push down
DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
// build new statistic
TableStats newTableStat = null;
if (tableSourceTable.contextResolvedTable().isPermanent()) {
ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
ObjectPath tablePath = identifier.toObjectPath();
Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
for (Map<String, String> partition : remainingPartitions) {
Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
if (!partitionStats.isPresent()) {
// clear all information before
newTableStat = null;
break;
} else {
newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
}
}
}
FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
// transform to new node
RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
if (nonPartitionPredicate.isAlwaysTrue()) {
call.transformTo(newScan);
} else {
Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
call.transformTo(newFilter);
}
}
use of org.apache.flink.table.plan.stats.TableStats in project flink by apache.
the class PushPartitionIntoTableSourceScanRule method getPartitionStats.
private Optional<TableStats> getPartitionStats(Catalog catalog, ObjectPath tablePath, Map<String, String> partition) {
try {
CatalogPartitionSpec spec = new CatalogPartitionSpec(partition);
CatalogTableStatistics partitionStat = catalog.getPartitionStatistics(tablePath, spec);
CatalogColumnStatistics partitionColStat = catalog.getPartitionColumnStatistics(tablePath, spec);
TableStats stats = CatalogTableStatisticsConverter.convertToTableStats(partitionStat, partitionColStat);
return Optional.of(stats);
} catch (PartitionNotExistException e) {
return Optional.empty();
}
}
use of org.apache.flink.table.plan.stats.TableStats in project flink by apache.
the class PushLimitIntoTableSourceScanRule method applyLimit.
private TableSourceTable applyLimit(long limit, FlinkLogicalTableSourceScan scan) {
TableSourceTable relOptTable = scan.getTable().unwrap(TableSourceTable.class);
TableSourceTable oldTableSourceTable = relOptTable.unwrap(TableSourceTable.class);
DynamicTableSource newTableSource = oldTableSourceTable.tableSource().copy();
LimitPushDownSpec limitPushDownSpec = new LimitPushDownSpec(limit);
limitPushDownSpec.apply(newTableSource, SourceAbilityContext.from(scan));
FlinkStatistic statistic = relOptTable.getStatistic();
final long newRowCount;
if (statistic.getRowCount() != null) {
newRowCount = Math.min(limit, statistic.getRowCount().longValue());
} else {
newRowCount = limit;
}
// update TableStats after limit push down
TableStats newTableStats = new TableStats(newRowCount);
FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(statistic).tableStats(newTableStats).build();
return oldTableSourceTable.copy(newTableSource, newStatistic, new SourceAbilitySpec[] { limitPushDownSpec });
}
Aggregations