use of org.apache.flink.table.catalog.ResolvedCatalogTable in project flink by apache.
the class PushPartitionIntoTableSourceScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
Filter filter = call.rel(0);
LogicalTableScan scan = call.rel(1);
TableSourceTable tableSourceTable = scan.getTable().unwrap(TableSourceTable.class);
RelDataType inputFieldTypes = filter.getInput().getRowType();
List<String> inputFieldNames = inputFieldTypes.getFieldNames();
List<String> partitionFieldNames = tableSourceTable.contextResolvedTable().<ResolvedCatalogTable>getResolvedTable().getPartitionKeys();
// extract partition predicates
RelBuilder relBuilder = call.builder();
RexBuilder rexBuilder = relBuilder.getRexBuilder();
Tuple2<Seq<RexNode>, Seq<RexNode>> allPredicates = RexNodeExtractor.extractPartitionPredicateList(filter.getCondition(), FlinkRelOptUtil.getMaxCnfNodeCount(scan), inputFieldNames.toArray(new String[0]), rexBuilder, partitionFieldNames.toArray(new String[0]));
RexNode partitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._1));
if (partitionPredicate.isAlwaysTrue()) {
return;
}
// build pruner
LogicalType[] partitionFieldTypes = partitionFieldNames.stream().map(name -> {
int index = inputFieldNames.indexOf(name);
if (index < 0) {
throw new TableException(String.format("Partitioned key '%s' isn't found in input columns. " + "Validator should have checked that.", name));
}
return inputFieldTypes.getFieldList().get(index).getType();
}).map(FlinkTypeFactory::toLogicalType).toArray(LogicalType[]::new);
RexNode finalPartitionPredicate = adjustPartitionPredicate(inputFieldNames, partitionFieldNames, partitionPredicate);
FlinkContext context = ShortcutUtils.unwrapContext(scan);
Function<List<Map<String, String>>, List<Map<String, String>>> defaultPruner = partitions -> PartitionPruner.prunePartitions(context.getTableConfig(), partitionFieldNames.toArray(new String[0]), partitionFieldTypes, partitions, finalPartitionPredicate);
// prune partitions
List<Map<String, String>> remainingPartitions = readPartitionsAndPrune(rexBuilder, context, tableSourceTable, defaultPruner, allPredicates._1(), inputFieldNames);
// apply push down
DynamicTableSource dynamicTableSource = tableSourceTable.tableSource().copy();
PartitionPushDownSpec partitionPushDownSpec = new PartitionPushDownSpec(remainingPartitions);
partitionPushDownSpec.apply(dynamicTableSource, SourceAbilityContext.from(scan));
// build new statistic
TableStats newTableStat = null;
if (tableSourceTable.contextResolvedTable().isPermanent()) {
ObjectIdentifier identifier = tableSourceTable.contextResolvedTable().getIdentifier();
ObjectPath tablePath = identifier.toObjectPath();
Catalog catalog = tableSourceTable.contextResolvedTable().getCatalog().get();
for (Map<String, String> partition : remainingPartitions) {
Optional<TableStats> partitionStats = getPartitionStats(catalog, tablePath, partition);
if (!partitionStats.isPresent()) {
// clear all information before
newTableStat = null;
break;
} else {
newTableStat = newTableStat == null ? partitionStats.get() : newTableStat.merge(partitionStats.get());
}
}
}
FlinkStatistic newStatistic = FlinkStatistic.builder().statistic(tableSourceTable.getStatistic()).tableStats(newTableStat).build();
TableSourceTable newTableSourceTable = tableSourceTable.copy(dynamicTableSource, newStatistic, new SourceAbilitySpec[] { partitionPushDownSpec });
LogicalTableScan newScan = LogicalTableScan.create(scan.getCluster(), newTableSourceTable, scan.getHints());
// transform to new node
RexNode nonPartitionPredicate = RexUtil.composeConjunction(rexBuilder, JavaConversions.seqAsJavaList(allPredicates._2()));
if (nonPartitionPredicate.isAlwaysTrue()) {
call.transformTo(newScan);
} else {
Filter newFilter = filter.copy(filter.getTraitSet(), newScan, nonPartitionPredicate);
call.transformTo(newFilter);
}
}
use of org.apache.flink.table.catalog.ResolvedCatalogTable in project flink by apache.
the class DynamicSinkUtils method convertCollectToRel.
/**
* Converts an {@link TableResult#collect()} sink to a {@link RelNode}.
*/
public static RelNode convertCollectToRel(FlinkRelBuilder relBuilder, RelNode input, CollectModifyOperation collectModifyOperation, ReadableConfig configuration, ClassLoader classLoader) {
final DataTypeFactory dataTypeFactory = unwrapContext(relBuilder).getCatalogManager().getDataTypeFactory();
final ResolvedSchema childSchema = collectModifyOperation.getChild().getResolvedSchema();
final ResolvedSchema schema = ResolvedSchema.physical(childSchema.getColumnNames(), childSchema.getColumnDataTypes());
final ResolvedCatalogTable catalogTable = new ResolvedCatalogTable(new ExternalCatalogTable(Schema.newBuilder().fromResolvedSchema(schema).build()), schema);
final ContextResolvedTable contextResolvedTable = ContextResolvedTable.anonymous("collect", catalogTable);
final DataType consumedDataType = fixCollectDataType(dataTypeFactory, schema);
final String zone = configuration.get(TableConfigOptions.LOCAL_TIME_ZONE);
final ZoneId zoneId = TableConfigOptions.LOCAL_TIME_ZONE.defaultValue().equals(zone) ? ZoneId.systemDefault() : ZoneId.of(zone);
final CollectDynamicSink tableSink = new CollectDynamicSink(contextResolvedTable.getIdentifier(), consumedDataType, configuration.get(CollectSinkOperatorFactory.MAX_BATCH_SIZE), configuration.get(CollectSinkOperatorFactory.SOCKET_TIMEOUT), classLoader, zoneId, configuration.get(ExecutionConfigOptions.TABLE_EXEC_LEGACY_CAST_BEHAVIOUR).isEnabled());
collectModifyOperation.setSelectResultProvider(tableSink.getSelectResultProvider());
collectModifyOperation.setConsumedDataType(consumedDataType);
return convertSinkToRel(relBuilder, input, // dynamicOptions
Collections.emptyMap(), contextResolvedTable, // staticPartitions
Collections.emptyMap(), false, tableSink);
}
use of org.apache.flink.table.catalog.ResolvedCatalogTable in project flink by apache.
the class DynamicTableSourceSpecSerdeTest method testDynamicTableSinkSpecSerde.
public static Stream<DynamicTableSourceSpec> testDynamicTableSinkSpecSerde() {
Map<String, String> options1 = new HashMap<>();
options1.put("connector", FileSystemTableFactory.IDENTIFIER);
options1.put("format", TestCsvFormatFactory.IDENTIFIER);
options1.put("path", "/tmp");
final ResolvedSchema resolvedSchema1 = new ResolvedSchema(Collections.singletonList(Column.physical("a", DataTypes.BIGINT())), Collections.emptyList(), null);
final CatalogTable catalogTable1 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema1).build(), null, Collections.emptyList(), options1);
DynamicTableSourceSpec spec1 = new DynamicTableSourceSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable1, resolvedSchema1)), null);
Map<String, String> options2 = new HashMap<>();
options2.put("connector", TestValuesTableFactory.IDENTIFIER);
options2.put("disable-lookup", "true");
options2.put("enable-watermark-push-down", "true");
options2.put("filterable-fields", "b");
options2.put("bounded", "false");
options2.put("readable-metadata", "m1:INT, m2:STRING");
final ResolvedSchema resolvedSchema2 = new ResolvedSchema(Arrays.asList(Column.physical("a", DataTypes.BIGINT()), Column.physical("b", DataTypes.INT()), Column.physical("c", DataTypes.STRING()), Column.physical("p", DataTypes.STRING()), Column.metadata("m1", DataTypes.INT(), null, false), Column.metadata("m2", DataTypes.STRING(), null, false), Column.physical("ts", DataTypes.TIMESTAMP(3))), Collections.emptyList(), null);
final CatalogTable catalogTable2 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema2).build(), null, Collections.emptyList(), options2);
FlinkTypeFactory factory = FlinkTypeFactory.INSTANCE();
RexBuilder rexBuilder = new RexBuilder(factory);
DynamicTableSourceSpec spec2 = new DynamicTableSourceSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable2, resolvedSchema2)), Arrays.asList(new ProjectPushDownSpec(new int[][] { { 0 }, { 1 }, { 4 }, { 6 } }, RowType.of(new LogicalType[] { new BigIntType(), new IntType(), new IntType(), new TimestampType(3) }, new String[] { "a", "b", "m1", "ts" })), new ReadingMetadataSpec(Arrays.asList("m1", "m2"), RowType.of(new LogicalType[] { new BigIntType(), new IntType(), new IntType(), new TimestampType(3) }, new String[] { "a", "b", "m1", "ts" })), new FilterPushDownSpec(Collections.singletonList(// b >= 10
rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, rexBuilder.makeInputRef(factory.createSqlType(SqlTypeName.INTEGER), 1), rexBuilder.makeExactLiteral(new BigDecimal(10))))), new WatermarkPushDownSpec(rexBuilder.makeCall(SqlStdOperatorTable.MINUS, rexBuilder.makeInputRef(factory.createSqlType(SqlTypeName.TIMESTAMP, 3), 3), rexBuilder.makeIntervalLiteral(BigDecimal.valueOf(1000), new SqlIntervalQualifier(TimeUnit.SECOND, 2, TimeUnit.SECOND, 6, SqlParserPos.ZERO))), 5000, RowType.of(new BigIntType(), new IntType(), new IntType(), new TimestampType(false, TimestampKind.ROWTIME, 3))), new SourceWatermarkSpec(true, RowType.of(new BigIntType(), new IntType(), new IntType(), new TimestampType(false, TimestampKind.ROWTIME, 3))), new LimitPushDownSpec(100), new PartitionPushDownSpec(Arrays.asList(new HashMap<String, String>() {
{
put("p", "A");
}
}, new HashMap<String, String>() {
{
put("p", "B");
}
}))));
return Stream.of(spec1, spec2);
}
use of org.apache.flink.table.catalog.ResolvedCatalogTable in project flink by apache.
the class DynamicTableSinkSpecSerdeTest method testDynamicTableSinkSpecSerde.
static Stream<DynamicTableSinkSpec> testDynamicTableSinkSpecSerde() {
Map<String, String> options1 = new HashMap<>();
options1.put("connector", FileSystemTableFactory.IDENTIFIER);
options1.put("format", TestCsvFormatFactory.IDENTIFIER);
options1.put("path", "/tmp");
final ResolvedSchema resolvedSchema1 = new ResolvedSchema(Collections.singletonList(Column.physical("a", DataTypes.BIGINT())), Collections.emptyList(), null);
final CatalogTable catalogTable1 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema1).build(), null, Collections.emptyList(), options1);
DynamicTableSinkSpec spec1 = new DynamicTableSinkSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable1, resolvedSchema1)), null);
Map<String, String> options2 = new HashMap<>();
options2.put("connector", FileSystemTableFactory.IDENTIFIER);
options2.put("format", TestCsvFormatFactory.IDENTIFIER);
options2.put("path", "/tmp");
final ResolvedSchema resolvedSchema2 = new ResolvedSchema(Arrays.asList(Column.physical("a", DataTypes.BIGINT()), Column.physical("b", DataTypes.INT()), Column.physical("p", DataTypes.STRING())), Collections.emptyList(), null);
final CatalogTable catalogTable2 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema2).build(), null, Collections.emptyList(), options2);
DynamicTableSinkSpec spec2 = new DynamicTableSinkSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable2, resolvedSchema2)), Arrays.asList(new OverwriteSpec(true), new PartitioningSpec(new HashMap<String, String>() {
{
put("p", "A");
}
})));
Map<String, String> options3 = new HashMap<>();
options3.put("connector", TestValuesTableFactory.IDENTIFIER);
options3.put("writable-metadata", "m:STRING");
final ResolvedSchema resolvedSchema3 = new ResolvedSchema(Arrays.asList(Column.physical("a", DataTypes.BIGINT()), Column.physical("b", DataTypes.INT()), Column.metadata("m", DataTypes.STRING(), null, false)), Collections.emptyList(), null);
final CatalogTable catalogTable3 = CatalogTable.of(Schema.newBuilder().fromResolvedSchema(resolvedSchema3).build(), null, Collections.emptyList(), options3);
DynamicTableSinkSpec spec3 = new DynamicTableSinkSpec(ContextResolvedTable.temporary(ObjectIdentifier.of(DEFAULT_BUILTIN_CATALOG, DEFAULT_BUILTIN_DATABASE, "MyTable"), new ResolvedCatalogTable(catalogTable3, resolvedSchema3)), Collections.singletonList(new WritingMetadataSpec(Collections.singletonList("m"), RowType.of(new BigIntType(), new IntType()))));
return Stream.of(spec1, spec2, spec3);
}
use of org.apache.flink.table.catalog.ResolvedCatalogTable in project flink by apache.
the class QueryOperationTest method testWindowAggregationSummaryString.
@Test
public void testWindowAggregationSummaryString() {
ResolvedSchema schema = ResolvedSchema.physical(Collections.singletonList("a"), Collections.singletonList(DataTypes.INT()));
FieldReferenceExpression field = new FieldReferenceExpression("a", DataTypes.INT(), 0, 0);
WindowAggregateQueryOperation tableOperation = new WindowAggregateQueryOperation(Collections.singletonList(field), Collections.singletonList(CallExpression.permanent(BuiltInFunctionDefinitions.SUM, Collections.singletonList(field), DataTypes.INT())), Collections.emptyList(), WindowAggregateQueryOperation.ResolvedGroupWindow.sessionWindow("w", field, intervalOfMillis(10)), new SourceQueryOperation(ContextResolvedTable.temporary(ObjectIdentifier.of("cat1", "db1", "tab1"), new ResolvedCatalogTable(CatalogTable.of(Schema.newBuilder().build(), null, Collections.emptyList(), Collections.emptyMap()), schema))), schema);
DistinctQueryOperation distinctQueryOperation = new DistinctQueryOperation(tableOperation);
assertEquals("Distinct:\n" + " WindowAggregate: (group: [a], agg: [sum(a)], windowProperties: []," + " window: [SessionWindow(field: [a], gap: [10])])\n" + " CatalogTable: (identifier: [cat1.db1.tab1], fields: [a])", distinctQueryOperation.asSummaryString());
}
Aggregations