Search in sources :

Example 1 with BootstrapBaseFileMapping

use of org.apache.hudi.common.model.BootstrapBaseFileMapping in project hudi by apache.

the class SpillableMapBasedFileSystemView method createFileIdToBootstrapBaseFileMap.

@Override
protected Map<HoodieFileGroupId, BootstrapBaseFileMapping> createFileIdToBootstrapBaseFileMap(Map<HoodieFileGroupId, BootstrapBaseFileMapping> fileGroupIdBootstrapBaseFileMap) {
    try {
        LOG.info("Creating bootstrap base File Map using external spillable Map. Max Mem=" + maxMemoryForBootstrapBaseFile + ", BaseDir=" + baseStoreDir);
        new File(baseStoreDir).mkdirs();
        Map<HoodieFileGroupId, BootstrapBaseFileMapping> pendingMap = new ExternalSpillableMap<>(maxMemoryForBootstrapBaseFile, baseStoreDir, new DefaultSizeEstimator(), new DefaultSizeEstimator<>(), diskMapType, isBitCaskDiskMapCompressionEnabled);
        pendingMap.putAll(fileGroupIdBootstrapBaseFileMap);
        return pendingMap;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) IOException(java.io.IOException) BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) File(java.io.File)

Example 2 with BootstrapBaseFileMapping

use of org.apache.hudi.common.model.BootstrapBaseFileMapping in project hudi by apache.

the class AbstractTableFileSystemView method addBootstrapBaseFileIfPresent.

protected HoodieBaseFile addBootstrapBaseFileIfPresent(HoodieFileGroupId fileGroupId, HoodieBaseFile baseFile) {
    if (baseFile.getCommitTime().equals(METADATA_BOOTSTRAP_INSTANT_TS)) {
        HoodieBaseFile copy = new HoodieBaseFile(baseFile);
        Option<BootstrapBaseFileMapping> edf = getBootstrapBaseFile(fileGroupId);
        edf.ifPresent(e -> copy.setBootstrapBaseFile(e.getBootstrapBaseFile()));
        return copy;
    }
    return baseFile;
}
Also used : HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping)

Example 3 with BootstrapBaseFileMapping

use of org.apache.hudi.common.model.BootstrapBaseFileMapping in project hudi by apache.

the class AbstractTableFileSystemView method addFilesToView.

/**
 * Adds the provided statuses into the file system view, and also caches it inside this object.
 */
public List<HoodieFileGroup> addFilesToView(FileStatus[] statuses) {
    HoodieTimer timer = new HoodieTimer().startTimer();
    List<HoodieFileGroup> fileGroups = buildFileGroups(statuses, visibleCommitsAndCompactionTimeline, true);
    long fgBuildTimeTakenMs = timer.endTimer();
    timer.startTimer();
    // Group by partition for efficient updates for both InMemory and DiskBased stuctures.
    fileGroups.stream().collect(Collectors.groupingBy(HoodieFileGroup::getPartitionPath)).forEach((partition, value) -> {
        if (!isPartitionAvailableInStore(partition)) {
            if (bootstrapIndex.useIndex()) {
                try (BootstrapIndex.IndexReader reader = bootstrapIndex.createReader()) {
                    LOG.info("Bootstrap Index available for partition " + partition);
                    List<BootstrapFileMapping> sourceFileMappings = reader.getSourceFileMappingForPartition(partition);
                    addBootstrapBaseFileMapping(sourceFileMappings.stream().map(s -> new BootstrapBaseFileMapping(new HoodieFileGroupId(s.getPartitionPath(), s.getFileId()), s.getBootstrapFileStatus())));
                }
            }
            storePartitionView(partition, value);
        }
    });
    long storePartitionsTs = timer.endTimer();
    LOG.info("addFilesToView: NumFiles=" + statuses.length + ", NumFileGroups=" + fileGroups.size() + ", FileGroupsCreationTime=" + fgBuildTimeTakenMs + ", StoreTimeTaken=" + storePartitionsTs);
    return fileGroups;
}
Also used : BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ReadLock(java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) WriteLock(java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock) Predicate(java.util.function.Predicate) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) AbstractMap(java.util.AbstractMap) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) METADATA_BOOTSTRAP_INSTANT_TS(org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping)

Example 4 with BootstrapBaseFileMapping

use of org.apache.hudi.common.model.BootstrapBaseFileMapping in project hudi by apache.

the class RocksDbBasedFileSystemView method getBootstrapBaseFile.

@Override
protected Option<BootstrapBaseFileMapping> getBootstrapBaseFile(HoodieFileGroupId fileGroupId) {
    String lookupKey = schemaHelper.getKeyForBootstrapBaseFile(fileGroupId);
    BootstrapBaseFileMapping externalBaseFile = rocksDB.get(schemaHelper.getColFamilyForBootstrapBaseFile(), lookupKey);
    return Option.ofNullable(externalBaseFile);
}
Also used : BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping)

Example 5 with BootstrapBaseFileMapping

use of org.apache.hudi.common.model.BootstrapBaseFileMapping in project hudi by apache.

the class AbstractTableFileSystemView method addBootstrapBaseFileIfPresent.

protected FileSlice addBootstrapBaseFileIfPresent(FileSlice fileSlice) {
    if (fileSlice.getBaseInstantTime().equals(METADATA_BOOTSTRAP_INSTANT_TS)) {
        FileSlice copy = new FileSlice(fileSlice);
        copy.getBaseFile().ifPresent(dataFile -> {
            Option<BootstrapBaseFileMapping> edf = getBootstrapBaseFile(copy.getFileGroupId());
            edf.ifPresent(e -> dataFile.setBootstrapBaseFile(e.getBootstrapBaseFile()));
        });
        return copy;
    }
    return fileSlice;
}
Also used : FileSlice(org.apache.hudi.common.model.FileSlice) BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping)

Aggregations

BootstrapBaseFileMapping (org.apache.hudi.common.model.BootstrapBaseFileMapping)5 IOException (java.io.IOException)2 FileSlice (org.apache.hudi.common.model.FileSlice)2 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)2 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 Serializable (java.io.Serializable)1 AbstractMap (java.util.AbstractMap)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Comparator (java.util.Comparator)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)1 ReadLock (java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock)1 WriteLock (java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock)1