use of org.apache.hudi.avro.model.HoodieMetadataColumnStats in project hudi by apache.
the class HoodieBloomIndex method loadColumnRangesFromMetaIndex.
/**
* Load the column stats index as BloomIndexFileInfo for all the involved files in the partition.
*
* @param partitions - List of partitions for which column stats need to be loaded
* @param context - Engine context
* @param hoodieTable - Hoodie table
* @return List of partition and file column range info pairs
*/
protected List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromMetaIndex(List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
// also obtain file ranges, if range pruning is enabled
context.setJobStatus(this.getClass().getName(), "Load meta index key ranges for file slices");
final String keyField = hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
return context.flatMap(partitions, partitionName -> {
// Partition and file name pairs
List<Pair<String, String>> partitionFileNameList = HoodieIndexUtils.getLatestBaseFilesForPartition(partitionName, hoodieTable).stream().map(baseFile -> Pair.of(partitionName, baseFile.getFileName())).sorted().collect(toList());
if (partitionFileNameList.isEmpty()) {
return Stream.empty();
}
try {
Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatsMap = hoodieTable.getMetadataTable().getColumnStats(partitionFileNameList, keyField);
List<Pair<String, BloomIndexFileInfo>> result = new ArrayList<>();
for (Map.Entry<Pair<String, String>, HoodieMetadataColumnStats> entry : fileToColumnStatsMap.entrySet()) {
result.add(Pair.of(entry.getKey().getLeft(), new BloomIndexFileInfo(FSUtils.getFileId(entry.getKey().getRight()), entry.getValue().getMinValue(), entry.getValue().getMaxValue())));
}
return result.stream();
} catch (MetadataNotFoundException me) {
throw new HoodieMetadataException("Unable to find column range metadata for partition:" + partitionName, me);
}
}, Math.max(partitions.size(), 1));
}
use of org.apache.hudi.avro.model.HoodieMetadataColumnStats in project hudi by apache.
the class BaseTableMetadata method getColumnStats.
@Override
public Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final List<Pair<String, String>> partitionNameFileNameList, final String columnName) throws HoodieMetadataException {
if (!isColumnStatsIndexEnabled) {
LOG.error("Metadata column stats index is disabled!");
return Collections.emptyMap();
}
Map<String, Pair<String, String>> columnStatKeyToFileNameMap = new HashMap<>();
TreeSet<String> sortedKeys = new TreeSet<>();
final ColumnIndexID columnIndexID = new ColumnIndexID(columnName);
for (Pair<String, String> partitionNameFileNamePair : partitionNameFileNameList) {
final String columnStatsIndexKey = HoodieMetadataPayload.getColumnStatsIndexKey(new PartitionIndexID(partitionNameFileNamePair.getLeft()), new FileIndexID(partitionNameFileNamePair.getRight()), columnIndexID);
sortedKeys.add(columnStatsIndexKey);
columnStatKeyToFileNameMap.put(columnStatsIndexKey, partitionNameFileNamePair);
}
List<String> columnStatKeys = new ArrayList<>(sortedKeys);
HoodieTimer timer = new HoodieTimer().startTimer();
List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList = getRecordsByKeys(columnStatKeys, MetadataPartitionType.COLUMN_STATS.getPartitionPath());
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_COLUMN_STATS_METADATA_STR, timer.endTimer()));
Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatMap = new HashMap<>();
for (final Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : hoodieRecordList) {
if (entry.getRight().isPresent()) {
final Option<HoodieMetadataColumnStats> columnStatMetadata = entry.getRight().get().getData().getColumnStatMetadata();
if (columnStatMetadata.isPresent()) {
if (!columnStatMetadata.get().getIsDeleted()) {
ValidationUtils.checkState(columnStatKeyToFileNameMap.containsKey(entry.getLeft()));
final Pair<String, String> partitionFileNamePair = columnStatKeyToFileNameMap.get(entry.getLeft());
ValidationUtils.checkState(!fileToColumnStatMap.containsKey(partitionFileNamePair));
fileToColumnStatMap.put(partitionFileNamePair, columnStatMetadata.get());
}
} else {
LOG.error("Meta index column stats missing for: " + entry.getLeft());
}
}
}
return fileToColumnStatMap;
}
Aggregations