use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.
the class PushPartitionIntoTableSourceScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
Filter filter = call.rel(0);
LogicalTableScan scan = call.rel(1);
TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
RelDataType inputFieldTypes = filter.getInput().getRowType();
List<String> inputFieldNames = inputFieldTypes.getFieldNames();
List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
// extract partition predicates
RelBuilder relBuilder = call.builder();
RexBuilder rexBuilder = relBuilder.getRexBuilder();
Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
if (partitionPredicate.isAlwaysTrue()) {
return;
}
// build pruner
LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
int index = inputFieldNames.indexOf(name);
if (index < 0) {
throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
}
return inputFieldTypes.getFieldList().get(index).getType();
}).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
FlinkContext context = ShortcutUtils.unwrapContext(scan);
Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
// prune partitions
List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
// apply push down
DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
// build new statistic
TableStats newTableStat = null;
if (tableSourceTable.contextResolvedTable().isPermanent()) {
ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
ObjectPath tablePath = identifier.toObjectPath();
Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
for (Map<String, String> partition : remainingPartitions) {
Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
if (!partitionStats.isPresent()) {
// clear all information before
newTableStat = null;
break;
} else {
newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
}
}
}
FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
// transform to new node
RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
if (nonPartitionPredicate.isAlwaysTrue()) {
call.transformTo(newScan);
} else {
Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
call.transformTo(newFilter);
}
}
use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.
the class PushLimitIntoTableSourceScanRule method applyLimit.
private TableSourceTable applyLimit(long limit, FlinkLogicalTableSourceScan scan) {
TableSourceTable relOptTable = scan.getTable().unwrap(TableSourceTable.class);
TableSourceTable oldTableSourceTable = relOptTable.unwrap(TableSourceTable.class);
DynamicTableSource newTableSource = oldTableSourceTable.tableSource().copy();
LimitPushDownSpec limitPushDownSpec = new LimitPushDownSpec(limit);
limitPushDownSpec.apply(newTableSource, SourceAbilityContext.from(scan));
FlinkStatistic statistic = relOptTable.getStatistic();
final long newRowCount;
if (statistic.getRowCount() != null) {
newRowCount = Math.min(limit, statistic.getRowCount().longValue());
} else {
newRowCount = limit;
}
// update TableStats after limit push down
TableStats newTableStats = new TableStats(newRowCount);
FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(statistic).tableStats(newTableStats).build();
return oldTableSourceTable.copy(newTableSource, newStatistic, new SourceAbilitySpec[] { limitPushDownSpec });
}
use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.
the class CatalogSourceTable method createAnonymous.
/**
* Create a {@link CatalogSourceTable} from an anonymous {@link ContextResolvedTable}. This is
* required to manually create a preparing table skipping the calcite catalog resolution.
*/
public static CatalogSourceTable createAnonymous(FlinkRelBuilder relBuilder, ContextResolvedTable contextResolvedTable, boolean isBatchMode) {
Preconditions.checkArgument(contextResolvedTable.isAnonymous(), "ContextResolvedTable must be anonymous");
// Statistics are unknown for anonymous tables
// Look at DatabaseCalciteSchema#getStatistic for more details
FlinkStatistic flinkStatistic = FlinkStatistic.unknown(contextResolvedTable.getResolvedSchema()).build();
CatalogSchemaTable catalogSchemaTable = new CatalogSchemaTable(contextResolvedTable, flinkStatistic, !isBatchMode);
return new CatalogSourceTable(relBuilder.getRelOptSchema(), contextResolvedTable.getIdentifier().toList(), catalogSchemaTable.getRowType(relBuilder.getTypeFactory()), catalogSchemaTable);
}
use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.
the class PushFilterIntoSourceScanRuleBase method getNewFlinkStatistic.
protected FlinkStatistic getNewFlinkStatistic(TableSourceTable tableSourceTable, int originPredicatesSize, int updatedPredicatesSize) {
FlinkStatistic oldStatistic = tableSourceTable.getStatistic();
FlinkStatistic newStatistic;
if (originPredicatesSize == updatedPredicatesSize) {
// Keep all Statistics if no predicates can be pushed down
newStatistic = oldStatistic;
} else if (oldStatistic == FlinkStatistic.UNKNOWN()) {
newStatistic = oldStatistic;
} else {
// Remove tableStats after predicates pushed down
newStatistic = FlinkStatistic.builder().statistic(oldStatistic).tableStats(null).build();
}
return newStatistic;
}
use of org.apache.flink.table.planner.plan.stats.FlinkStatistic in project flink by apache.
the class DynamicSourceUtils method convertDataStreamToRel.
/**
* Converts a given {@link DataStream} to a {@link RelNode}. It adds helper projections if
* necessary.
*/
public static RelNode convertDataStreamToRel(boolean isBatchMode, ReadableConfig config, FlinkRelBuilder relBuilder, ContextResolvedTable contextResolvedTable, DataStream<?> dataStream, DataType physicalDataType, boolean isTopLevelRecord, ChangelogMode changelogMode) {
final DynamicTableSource tableSource = new ExternalDynamicSource<>(contextResolvedTable.getIdentifier(), dataStream, physicalDataType, isTopLevelRecord, changelogMode);
final FlinkStatistic statistic = FlinkStatistic.unknown(contextResolvedTable.getResolvedSchema()).build();
return convertSourceToRel(isBatchMode, config, relBuilder, contextResolvedTable, statistic, Collections.emptyList(), tableSource);
}
Aggregations