Search in sources :

Example 1 with BulkDecodingFormat

use of org.apache.flink.connector.file.table.format.BulkDecodingFormat in project flink by apache.

the class FileSystemTableSource method getScanRuntimeProvider.

@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
    // When this table has no partition, just return a empty source.
    if (!partitionKeys.isEmpty() && getOrFetchPartitions().isEmpty()) {
        return InputFormatProvider.of(new CollectionInputFormat<>(new ArrayList<>(), null));
    }
    // Resolve metadata and make sure to filter out metadata not in the producedDataType
    final List<String> metadataKeys = DataType.getFieldNames(producedDataType).stream().filter(((this.metadataKeys == null) ? Collections.emptyList() : this.metadataKeys)::contains).collect(Collectors.toList());
    final List<ReadableFileInfo> metadataToExtract = metadataKeys.stream().map(ReadableFileInfo::resolve).collect(Collectors.toList());
    // Filter out partition columns not in producedDataType
    final List<String> partitionKeysToExtract = DataType.getFieldNames(producedDataType).stream().filter(this.partitionKeys::contains).collect(Collectors.toList());
    // Compute the physical projection and the physical data type, that is
    // the type without partition columns and metadata in the same order of the schema
    DataType physicalDataType = physicalRowDataType;
    final Projection partitionKeysProjections = Projection.fromFieldNames(physicalDataType, partitionKeysToExtract);
    final Projection physicalProjections = (projectFields != null ? Projection.of(projectFields) : Projection.all(physicalDataType)).difference(partitionKeysProjections);
    physicalDataType = partitionKeysProjections.complement(physicalDataType).project(physicalDataType);
    if (bulkReaderFormat != null) {
        if (bulkReaderFormat instanceof BulkDecodingFormat && filters != null && filters.size() > 0) {
            ((BulkDecodingFormat<RowData>) bulkReaderFormat).applyFilters(filters);
        }
        BulkFormat<RowData, FileSourceSplit> format;
        if (bulkReaderFormat instanceof ProjectableDecodingFormat) {
            format = ((ProjectableDecodingFormat<BulkFormat<RowData, FileSourceSplit>>) bulkReaderFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes());
        } else {
            format = new ProjectingBulkFormat(bulkReaderFormat.createRuntimeDecoder(scanContext, physicalDataType), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
        }
        format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
        return createSourceProvider(format);
    } else if (deserializationFormat != null) {
        BulkFormat<RowData, FileSourceSplit> format;
        if (deserializationFormat instanceof ProjectableDecodingFormat) {
            format = new DeserializationSchemaAdapter(((ProjectableDecodingFormat<DeserializationSchema<RowData>>) deserializationFormat).createRuntimeDecoder(scanContext, physicalDataType, physicalProjections.toNestedIndexes()));
        } else {
            format = new ProjectingBulkFormat(new DeserializationSchemaAdapter(deserializationFormat.createRuntimeDecoder(scanContext, physicalDataType)), physicalProjections.toTopLevelIndexes(), scanContext.createTypeInformation(physicalProjections.project(physicalDataType)));
        }
        format = wrapBulkFormat(scanContext, format, producedDataType, metadataToExtract, partitionKeysToExtract);
        return createSourceProvider(format);
    } else {
        throw new TableException("Can not find format factory.");
    }
}
Also used : TableException(org.apache.flink.table.api.TableException) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) ArrayList(java.util.ArrayList) Projection(org.apache.flink.table.connector.Projection) BulkDecodingFormat(org.apache.flink.connector.file.table.format.BulkDecodingFormat) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) BulkFormat(org.apache.flink.connector.file.src.reader.BulkFormat)

Aggregations

ArrayList (java.util.ArrayList)1 DeserializationSchema (org.apache.flink.api.common.serialization.DeserializationSchema)1 FileSourceSplit (org.apache.flink.connector.file.src.FileSourceSplit)1 BulkFormat (org.apache.flink.connector.file.src.reader.BulkFormat)1 BulkDecodingFormat (org.apache.flink.connector.file.table.format.BulkDecodingFormat)1 TableException (org.apache.flink.table.api.TableException)1 Projection (org.apache.flink.table.connector.Projection)1 ProjectableDecodingFormat (org.apache.flink.table.connector.format.ProjectableDecodingFormat)1 RowData (org.apache.flink.table.data.RowData)1 DataType (org.apache.flink.table.types.DataType)1