Search in sources :

Example 1 with IncrementalInputSplits

use of org.apache.hudi.source.IncrementalInputSplits in project hudi by apache.

the class HoodieTableSource method getBatchInputFormat.

private InputFormat<RowData, ?> getBatchInputFormat() {
    final Schema tableAvroSchema = getTableAvroSchema();
    final DataType rowDataType = AvroSchemaConverter.convertToDataType(tableAvroSchema);
    final RowType rowType = (RowType) rowDataType.getLogicalType();
    final RowType requiredRowType = (RowType) getProducedDataType().notNull().getLogicalType();
    final String queryType = this.conf.getString(FlinkOptions.QUERY_TYPE);
    switch(queryType) {
        case FlinkOptions.QUERY_TYPE_SNAPSHOT:
            final HoodieTableType tableType = HoodieTableType.valueOf(this.conf.getString(FlinkOptions.TABLE_TYPE));
            switch(tableType) {
                case MERGE_ON_READ:
                    final List<MergeOnReadInputSplit> inputSplits = buildFileIndex();
                    if (inputSplits.size() == 0) {
                        // When there is no input splits, just return an empty source.
                        LOG.warn("No input splits generate for MERGE_ON_READ input format, returns empty collection instead");
                        return InputFormats.EMPTY_INPUT_FORMAT;
                    }
                    return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema, rowDataType, inputSplits, false);
                case COPY_ON_WRITE:
                    return baseFileOnlyInputFormat();
                default:
                    throw new HoodieException("Unexpected table type: " + this.conf.getString(FlinkOptions.TABLE_TYPE));
            }
        case FlinkOptions.QUERY_TYPE_READ_OPTIMIZED:
            return baseFileOnlyInputFormat();
        case FlinkOptions.QUERY_TYPE_INCREMENTAL:
            IncrementalInputSplits incrementalInputSplits = IncrementalInputSplits.builder().conf(conf).path(FilePathUtils.toFlinkPath(path)).maxCompactionMemoryInBytes(maxCompactionMemoryInBytes).requiredPartitions(getRequiredPartitionPaths()).build();
            final IncrementalInputSplits.Result result = incrementalInputSplits.inputSplits(metaClient, hadoopConf);
            if (result.isEmpty()) {
                // When there is no input splits, just return an empty source.
                LOG.warn("No input splits generate for incremental read, returns empty collection instead");
                return InputFormats.EMPTY_INPUT_FORMAT;
            }
            return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema, rowDataType, result.getInputSplits(), false);
        default:
            String errMsg = String.format("Invalid query type : '%s', options ['%s', '%s', '%s'] are supported now", queryType, FlinkOptions.QUERY_TYPE_SNAPSHOT, FlinkOptions.QUERY_TYPE_READ_OPTIMIZED, FlinkOptions.QUERY_TYPE_INCREMENTAL);
            throw new HoodieException(errMsg);
    }
}
Also used : MergeOnReadInputSplit(org.apache.hudi.table.format.mor.MergeOnReadInputSplit) IncrementalInputSplits(org.apache.hudi.source.IncrementalInputSplits) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) Schema(org.apache.avro.Schema) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) HoodieException(org.apache.hudi.exception.HoodieException)

Aggregations

Schema (org.apache.avro.Schema)1 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)1 DataType (org.apache.flink.table.types.DataType)1 RowType (org.apache.flink.table.types.logical.RowType)1 HoodieTableType (org.apache.hudi.common.model.HoodieTableType)1 HoodieException (org.apache.hudi.exception.HoodieException)1 IncrementalInputSplits (org.apache.hudi.source.IncrementalInputSplits)1 MergeOnReadInputSplit (org.apache.hudi.table.format.mor.MergeOnReadInputSplit)1