use of org.apache.hudi.source.IncrementalInputSplits in project hudi by apache.
the class HoodieTableSource method getBatchInputFormat.
private InputFormat<RowData, ?> getBatchInputFormat() {
final Schema tableAvroSchema = getTableAvroSchema();
final DataType rowDataType = AvroSchemaConverter.convertToDataType(tableAvroSchema);
final RowType rowType = (RowType) rowDataType.getLogicalType();
final RowType requiredRowType = (RowType) getProducedDataType().notNull().getLogicalType();
final String queryType = this.conf.getString(FlinkOptions.QUERY_TYPE);
switch(queryType) {
case FlinkOptions.QUERY_TYPE_SNAPSHOT:
final HoodieTableType tableType = HoodieTableType.valueOf(this.conf.getString(FlinkOptions.TABLE_TYPE));
switch(tableType) {
case MERGE_ON_READ:
final List<MergeOnReadInputSplit> inputSplits = buildFileIndex();
if (inputSplits.size() == 0) {
// When there is no input splits, just return an empty source.
LOG.warn("No input splits generate for MERGE_ON_READ input format, returns empty collection instead");
return InputFormats.EMPTY_INPUT_FORMAT;
}
return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema, rowDataType, inputSplits, false);
case COPY_ON_WRITE:
return baseFileOnlyInputFormat();
default:
throw new HoodieException("Unexpected table type: " + this.conf.getString(FlinkOptions.TABLE_TYPE));
}
case FlinkOptions.QUERY_TYPE_READ_OPTIMIZED:
return baseFileOnlyInputFormat();
case FlinkOptions.QUERY_TYPE_INCREMENTAL:
IncrementalInputSplits incrementalInputSplits = IncrementalInputSplits.builder().conf(conf).path(FilePathUtils.toFlinkPath(path)).maxCompactionMemoryInBytes(maxCompactionMemoryInBytes).requiredPartitions(getRequiredPartitionPaths()).build();
final IncrementalInputSplits.Result result = incrementalInputSplits.inputSplits(metaClient, hadoopConf);
if (result.isEmpty()) {
// When there is no input splits, just return an empty source.
LOG.warn("No input splits generate for incremental read, returns empty collection instead");
return InputFormats.EMPTY_INPUT_FORMAT;
}
return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema, rowDataType, result.getInputSplits(), false);
default:
String errMsg = String.format("Invalid query type : '%s', options ['%s', '%s', '%s'] are supported now", queryType, FlinkOptions.QUERY_TYPE_SNAPSHOT, FlinkOptions.QUERY_TYPE_READ_OPTIMIZED, FlinkOptions.QUERY_TYPE_INCREMENTAL);
throw new HoodieException(errMsg);
}
}
Aggregations