use of org.apache.hudi.common.util.hash.PartitionIndexID in project hudi by apache.
the class HoodieMetadataPayload method getColumnStatsIndexKey.
/**
* Get column stats index key from the column range metadata.
*
* @param partitionName - Partition name
* @param columnRangeMetadata - Column range metadata
* @return Column stats index key
*/
public static String getColumnStatsIndexKey(String partitionName, HoodieColumnRangeMetadata<Comparable> columnRangeMetadata) {
final PartitionIndexID partitionIndexID = new PartitionIndexID(partitionName);
final FileIndexID fileIndexID = new FileIndexID(new Path(columnRangeMetadata.getFilePath()).getName());
final ColumnIndexID columnIndexID = new ColumnIndexID(columnRangeMetadata.getColumnName());
return getColumnStatsIndexKey(partitionIndexID, fileIndexID, columnIndexID);
}
use of org.apache.hudi.common.util.hash.PartitionIndexID in project hudi by apache.
the class HoodieMetadataPayload method createBloomFilterMetadataRecord.
/**
* Create bloom filter metadata record.
*
* @param partitionName - Partition name
* @param baseFileName - Base file name for which the bloom filter needs to persisted
* @param timestamp - Instant timestamp responsible for this record
* @param bloomFilter - Bloom filter for the File
* @param isDeleted - Is the bloom filter no more valid
* @return Metadata payload containing the fileID and its bloom filter record
*/
public static HoodieRecord<HoodieMetadataPayload> createBloomFilterMetadataRecord(final String partitionName, final String baseFileName, final String timestamp, final String bloomFilterType, final ByteBuffer bloomFilter, final boolean isDeleted) {
checkArgument(!baseFileName.contains(Path.SEPARATOR) && FSUtils.isBaseFile(new Path(baseFileName)), "Invalid base file '" + baseFileName + "' for MetaIndexBloomFilter!");
final String bloomFilterIndexKey = new PartitionIndexID(partitionName).asBase64EncodedString().concat(new FileIndexID(baseFileName).asBase64EncodedString());
HoodieKey key = new HoodieKey(bloomFilterIndexKey, MetadataPartitionType.BLOOM_FILTERS.getPartitionPath());
HoodieMetadataBloomFilter metadataBloomFilter = new HoodieMetadataBloomFilter(bloomFilterType, timestamp, bloomFilter, isDeleted);
HoodieMetadataPayload metadataPayload = new HoodieMetadataPayload(key.getRecordKey(), metadataBloomFilter);
return new HoodieAvroRecord<>(key, metadataPayload);
}
use of org.apache.hudi.common.util.hash.PartitionIndexID in project hudi by apache.
the class BaseTableMetadata method getColumnStats.
@Override
public Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final List<Pair<String, String>> partitionNameFileNameList, final String columnName) throws HoodieMetadataException {
if (!isColumnStatsIndexEnabled) {
LOG.error("Metadata column stats index is disabled!");
return Collections.emptyMap();
}
Map<String, Pair<String, String>> columnStatKeyToFileNameMap = new HashMap<>();
TreeSet<String> sortedKeys = new TreeSet<>();
final ColumnIndexID columnIndexID = new ColumnIndexID(columnName);
for (Pair<String, String> partitionNameFileNamePair : partitionNameFileNameList) {
final String columnStatsIndexKey = HoodieMetadataPayload.getColumnStatsIndexKey(new PartitionIndexID(partitionNameFileNamePair.getLeft()), new FileIndexID(partitionNameFileNamePair.getRight()), columnIndexID);
sortedKeys.add(columnStatsIndexKey);
columnStatKeyToFileNameMap.put(columnStatsIndexKey, partitionNameFileNamePair);
}
List<String> columnStatKeys = new ArrayList<>(sortedKeys);
HoodieTimer timer = new HoodieTimer().startTimer();
List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList = getRecordsByKeys(columnStatKeys, MetadataPartitionType.COLUMN_STATS.getPartitionPath());
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_COLUMN_STATS_METADATA_STR, timer.endTimer()));
Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatMap = new HashMap<>();
for (final Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : hoodieRecordList) {
if (entry.getRight().isPresent()) {
final Option<HoodieMetadataColumnStats> columnStatMetadata = entry.getRight().get().getData().getColumnStatMetadata();
if (columnStatMetadata.isPresent()) {
if (!columnStatMetadata.get().getIsDeleted()) {
ValidationUtils.checkState(columnStatKeyToFileNameMap.containsKey(entry.getLeft()));
final Pair<String, String> partitionFileNamePair = columnStatKeyToFileNameMap.get(entry.getLeft());
ValidationUtils.checkState(!fileToColumnStatMap.containsKey(partitionFileNamePair));
fileToColumnStatMap.put(partitionFileNamePair, columnStatMetadata.get());
}
} else {
LOG.error("Meta index column stats missing for: " + entry.getLeft());
}
}
}
return fileToColumnStatMap;
}
use of org.apache.hudi.common.util.hash.PartitionIndexID in project hudi by apache.
the class BaseTableMetadata method getBloomFilters.
@Override
public Map<Pair<String, String>, BloomFilter> getBloomFilters(final List<Pair<String, String>> partitionNameFileNameList) throws HoodieMetadataException {
if (!isBloomFilterIndexEnabled) {
LOG.error("Metadata bloom filter index is disabled!");
return Collections.emptyMap();
}
if (partitionNameFileNameList.isEmpty()) {
return Collections.emptyMap();
}
HoodieTimer timer = new HoodieTimer().startTimer();
Set<String> partitionIDFileIDSortedStrings = new TreeSet<>();
Map<String, Pair<String, String>> fileToKeyMap = new HashMap<>();
partitionNameFileNameList.forEach(partitionNameFileNamePair -> {
final String bloomFilterIndexKey = HoodieMetadataPayload.getBloomFilterIndexKey(new PartitionIndexID(partitionNameFileNamePair.getLeft()), new FileIndexID(partitionNameFileNamePair.getRight()));
partitionIDFileIDSortedStrings.add(bloomFilterIndexKey);
fileToKeyMap.put(bloomFilterIndexKey, partitionNameFileNamePair);
});
List<String> partitionIDFileIDStrings = new ArrayList<>(partitionIDFileIDSortedStrings);
List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList = getRecordsByKeys(partitionIDFileIDStrings, MetadataPartitionType.BLOOM_FILTERS.getPartitionPath());
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_BLOOM_FILTERS_METADATA_STR, (timer.endTimer() / partitionIDFileIDStrings.size())));
Map<Pair<String, String>, BloomFilter> partitionFileToBloomFilterMap = new HashMap<>();
for (final Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : hoodieRecordList) {
if (entry.getRight().isPresent()) {
final Option<HoodieMetadataBloomFilter> bloomFilterMetadata = entry.getRight().get().getData().getBloomFilterMetadata();
if (bloomFilterMetadata.isPresent()) {
if (!bloomFilterMetadata.get().getIsDeleted()) {
ValidationUtils.checkState(fileToKeyMap.containsKey(entry.getLeft()));
final ByteBuffer bloomFilterByteBuffer = bloomFilterMetadata.get().getBloomFilter();
final String bloomFilterType = bloomFilterMetadata.get().getType();
final BloomFilter bloomFilter = BloomFilterFactory.fromString(StandardCharsets.UTF_8.decode(bloomFilterByteBuffer).toString(), bloomFilterType);
partitionFileToBloomFilterMap.put(fileToKeyMap.get(entry.getLeft()), bloomFilter);
}
} else {
LOG.error("Meta index bloom filter missing for: " + fileToKeyMap.get(entry.getLeft()));
}
}
}
return partitionFileToBloomFilterMap;
}
Aggregations