use of org.apache.flink.connector.file.table.format.BulkDecodingFormat in project flink by apache.
the class FileSystemTableSource method getScanRuntimeProvider.
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
// When this table has no partition, just return a empty source.
if (!partitionKeys.isEmpty() && getOrFetchPartitions().isEmpty()) {
return InputFormatProvider.of(new CollectionInputFormat<>(new ArrayList<>(), null));
}
// Resolve metadata and make sure to filter out metadata not in the producedDataType
final List<String> metadataKeys = DataType.getFieldNames(producedDataType).stream().filter(((this.metadataKeys == null) ? Collections.emptyList() : this.metadataKeys)::contains).collect(Collectors.toList());
final List<ReadableFileInfo> metadataToExtract = metadataKeys.stream().map(ReadableFileInfo::resolve).collect(Collectors.toList());
// Filter out partition columns not in producedDataType
final List<String> partitionKeysToExtract = DataType.getFieldNames(producedDataType).stream().filter(this.partitionKeys::contains).collect(Collectors.toList());
// Compute the physical projection and the physical data type, that is
// the type without partition columns and metadata in the same order of the schema
DataType physicalDataType = physicalRowDataType;
final Projection partitionKeysProjections = Projection.fromFieldNames(physicalDataType, partitionKeysToExtract);
final Projection physicalProjections = (projectFields != null ? Projection.of(projectFields) : Projection.all(physicalDataType)).difference(partitionKeysProjections);
physicalDataType = partitionKeysProjections.complement(physicalDataType).project(physicalDataType);
if (bulkReaderFormat != null) {
if (bulkReaderFormat instanceof BulkDecodingFormat && filters != null && filters.size() > 0) {
((BulkDecodingFormat<RowData>) bulkReaderFormat).applyFilters(filters);
}
BulkFormat<RowData, FileSourceSplit> format;
if (bulkReaderFormat instanceof ProjectableDecodingFormat) {
format = ((ProjectableDecodingFormat<BulkFormat<RowData, FileSourceSplit>>) bulkReaderFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes());
} else {
format = new ProjectingBulkFormat(bulkReaderFormat.createRuntimeDecoder(scanContext, physicalDataType), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
}
format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
return createSourceProvider(format);
} else if (deserializationFormat != null) {
BulkFormat<RowData, FileSourceSplit> format;
if (deserializationFormat instanceof ProjectableDecodingFormat) {
format = new DeserializationSchemaAdapter(((ProjectableDecodingFormat<DeserializationSchema<RowData>>) deserializationFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes()));
} else {
format = new ProjectingBulkFormat(new DeserializationSchemaAdapter(deserializationFormat.createRuntimeDecoder(scanContext, physicalDataType)), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
}
format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
return createSourceProvider(format);
} else {
throw new TableException("Can not find format factory.");
}
}
Aggregations