Search in sources :

Example 1 with HoodieRangeInfoHandle

use of org.apache.hudi.io.HoodieRangeInfoHandle in project hudi by apache.

the class HoodieBloomIndex method loadColumnRangesFromFiles.

/**
 * Load all involved files as <Partition, filename> pair List.
 */
List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromFiles(List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
    // Obtain the latest data files from all the partitions.
    List<Pair<String, String>> partitionPathFileIDList = getLatestBaseFilesForAllPartitions(partitions, context, hoodieTable).stream().map(pair -> Pair.of(pair.getKey(), pair.getValue().getFileId())).collect(toList());
    context.setJobStatus(this.getClass().getName(), "Obtain key ranges for file slices (range pruning=on)");
    return context.map(partitionPathFileIDList, pf -> {
        try {
            HoodieRangeInfoHandle rangeInfoHandle = new HoodieRangeInfoHandle(config, hoodieTable, pf);
            String[] minMaxKeys = rangeInfoHandle.getMinMaxKeys();
            return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue(), minMaxKeys[0], minMaxKeys[1]));
        } catch (MetadataNotFoundException me) {
            LOG.warn("Unable to find range metadata in file :" + pf);
            return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue()));
        }
    }, Math.max(partitionPathFileIDList.size(), 1));
}
Also used : ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieIndexUtils.getLatestBaseFilesForAllPartitions(org.apache.hudi.index.HoodieIndexUtils.getLatestBaseFilesForAllPartitions) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieConfig(org.apache.hudi.common.config.HoodieConfig) HoodieRangeInfoHandle(org.apache.hudi.io.HoodieRangeInfoHandle) Map(java.util.Map) Collectors.mapping(java.util.stream.Collectors.mapping) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) MetadataNotFoundException(org.apache.hudi.exception.MetadataNotFoundException) HoodiePairData(org.apache.hudi.common.data.HoodiePairData) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieMetadataException(org.apache.hudi.exception.HoodieMetadataException) WriteStatus(org.apache.hudi.client.WriteStatus) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) Stream(java.util.stream.Stream) HoodieMetadataColumnStats(org.apache.hudi.avro.model.HoodieMetadataColumnStats) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIndexUtils(org.apache.hudi.index.HoodieIndexUtils) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) MetadataNotFoundException(org.apache.hudi.exception.MetadataNotFoundException) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair) HoodieRangeInfoHandle(org.apache.hudi.io.HoodieRangeInfoHandle)

Aggregations

ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 Collectors.groupingBy (java.util.stream.Collectors.groupingBy)1 Collectors.mapping (java.util.stream.Collectors.mapping)1 Collectors.toList (java.util.stream.Collectors.toList)1 Stream (java.util.stream.Stream)1 HoodieMetadataColumnStats (org.apache.hudi.avro.model.HoodieMetadataColumnStats)1 WriteStatus (org.apache.hudi.client.WriteStatus)1 HoodieConfig (org.apache.hudi.common.config.HoodieConfig)1 HoodieData (org.apache.hudi.common.data.HoodieData)1 HoodiePairData (org.apache.hudi.common.data.HoodiePairData)1 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)1 FSUtils (org.apache.hudi.common.fs.FSUtils)1 HoodieKey (org.apache.hudi.common.model.HoodieKey)1 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)1 HoodieRecordLocation (org.apache.hudi.common.model.HoodieRecordLocation)1 Option (org.apache.hudi.common.util.Option)1 ImmutablePair (org.apache.hudi.common.util.collection.ImmutablePair)1