use of org.apache.flink.connector.file.src.reader.BulkFormat in project flink by apache.
the class FileSystemTableSource method getScanRuntimeProvider.
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
// When this table has no partition, just return a empty source.
if (!partitionKeys.isEmpty() && getOrFetchPartitions().isEmpty()) {
return InputFormatProvider.of(new CollectionInputFormat<>(new ArrayList<>(), null));
}
// Resolve metadata and make sure to filter out metadata not in the producedDataType
final List<String> metadataKeys = DataType.getFieldNames(producedDataType).stream().filter(((this.metadataKeys == null) ? Collections.emptyList() : this.metadataKeys)::contains).collect(Collectors.toList());
final List<ReadableFileInfo> metadataToExtract = metadataKeys.stream().map(ReadableFileInfo::resolve).collect(Collectors.toList());
// Filter out partition columns not in producedDataType
final List<String> partitionKeysToExtract = DataType.getFieldNames(producedDataType).stream().filter(this.partitionKeys::contains).collect(Collectors.toList());
// Compute the physical projection and the physical data type, that is
// the type without partition columns and metadata in the same order of the schema
DataType physicalDataType = physicalRowDataType;
final Projection partitionKeysProjections = Projection.fromFieldNames(physicalDataType, partitionKeysToExtract);
final Projection physicalProjections = (projectFields != null ? Projection.of(projectFields) : Projection.all(physicalDataType)).difference(partitionKeysProjections);
physicalDataType = partitionKeysProjections.complement(physicalDataType).project(physicalDataType);
if (bulkReaderFormat != null) {
if (bulkReaderFormat instanceof BulkDecodingFormat && filters != null && filters.size() > 0) {
((BulkDecodingFormat<RowData>) bulkReaderFormat).applyFilters(filters);
}
BulkFormat<RowData, FileSourceSplit> format;
if (bulkReaderFormat instanceof ProjectableDecodingFormat) {
format = ((ProjectableDecodingFormat<BulkFormat<RowData, FileSourceSplit>>) bulkReaderFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes());
} else {
format = new ProjectingBulkFormat(bulkReaderFormat.createRuntimeDecoder(scanContext, physicalDataType), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
}
format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
return createSourceProvider(format);
} else if (deserializationFormat != null) {
BulkFormat<RowData, FileSourceSplit> format;
if (deserializationFormat instanceof ProjectableDecodingFormat) {
format = new DeserializationSchemaAdapter(((ProjectableDecodingFormat<DeserializationSchema<RowData>>) deserializationFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes()));
} else {
format = new ProjectingBulkFormat(new DeserializationSchemaAdapter(deserializationFormat.createRuntimeDecoder(scanContext, physicalDataType)), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
}
format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
return createSourceProvider(format);
} else {
throw new TableException("Can not find format factory.");
}
}
use of org.apache.flink.connector.file.src.reader.BulkFormat in project flink by apache.
the class FileSystemTableSink method createCompactReaderFactory.
private Optional<CompactReader.Factory<RowData>> createCompactReaderFactory(Context context) {
// Compute producedDataType (including partition fields) and physicalDataType (excluding
// partition fields)
final DataType producedDataType = physicalRowDataType;
final DataType physicalDataType = DataType.getFields(producedDataType).stream().filter(field -> !partitionKeys.contains(field.getName())).collect(Collectors.collectingAndThen(Collectors.toList(), DataTypes::ROW));
if (bulkReaderFormat != null) {
final BulkFormat<RowData, FileSourceSplit> format = new FileInfoExtractorBulkFormat(bulkReaderFormat.createRuntimeDecoder(createSourceContext(context), physicalDataType), producedDataType, context.createTypeInformation(producedDataType), Collections.emptyMap(), partitionKeys, defaultPartName);
return Optional.of(CompactBulkReader.factory(format));
} else if (deserializationFormat != null) {
final DeserializationSchema<RowData> decoder = deserializationFormat.createRuntimeDecoder(createSourceContext(context), physicalDataType);
final BulkFormat<RowData, FileSourceSplit> format = new FileInfoExtractorBulkFormat(new DeserializationSchemaAdapter(decoder), producedDataType, context.createTypeInformation(producedDataType), Collections.emptyMap(), partitionKeys, defaultPartName);
return Optional.of(CompactBulkReader.factory(format));
}
return Optional.empty();
}
Aggregations