Search in sources :

Example 16 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class IncrementalTimelineSyncFileSystemView method addReplaceInstant.

/**
 * Add newly found REPLACE instant.
 *
 * @param timeline Hoodie Timeline
 * @param instant REPLACE Instant
 */
private void addReplaceInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
    LOG.info("Syncing replace instant (" + instant + ")");
    HoodieReplaceCommitMetadata replaceMetadata = HoodieReplaceCommitMetadata.fromBytes(timeline.getInstantDetails(instant).get(), HoodieReplaceCommitMetadata.class);
    updatePartitionWriteFileGroups(replaceMetadata.getPartitionToWriteStats(), timeline, instant);
    replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream().forEach(entry -> {
        String partition = entry.getKey();
        Map<HoodieFileGroupId, HoodieInstant> replacedFileIds = entry.getValue().stream().collect(Collectors.toMap(replaceStat -> new HoodieFileGroupId(partition, replaceStat), replaceStat -> instant));
        LOG.info("For partition (" + partition + ") of instant (" + instant + "), excluding " + replacedFileIds.size() + " file groups");
        addReplacedFileGroups(replacedFileIds);
    });
    LOG.info("Done Syncing REPLACE instant (" + instant + ")");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) TimelineDiffHelper(org.apache.hudi.common.table.timeline.TimelineDiffHelper) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Set(java.util.Set) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) TimelineDiffResult(org.apache.hudi.common.table.timeline.TimelineDiffHelper.TimelineDiffResult) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 17 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class CompactionUtils method getDeltaCommitsSinceLatestCompaction.

/**
 * Returns a pair of (timeline containing the delta commits after the latest completed
 * compaction commit, the completed compaction commit instant), if the latest completed
 * compaction commit is present; a pair of (timeline containing all the delta commits,
 * the first delta commit instant), if there is no completed compaction commit.
 *
 * @param activeTimeline Active timeline of a table.
 * @return Pair of timeline containing delta commits and an instant.
 */
public static Option<Pair<HoodieTimeline, HoodieInstant>> getDeltaCommitsSinceLatestCompaction(HoodieActiveTimeline activeTimeline) {
    Option<HoodieInstant> lastCompaction = activeTimeline.getCommitTimeline().filterCompletedInstants().lastInstant();
    HoodieTimeline deltaCommits = activeTimeline.getDeltaCommitTimeline();
    HoodieInstant latestInstant;
    if (lastCompaction.isPresent()) {
        latestInstant = lastCompaction.get();
        // and the completed compaction commit instant
        return Option.of(Pair.of(deltaCommits.findInstantsAfter(latestInstant.getTimestamp(), Integer.MAX_VALUE), lastCompaction.get()));
    } else {
        if (deltaCommits.countInstants() > 0) {
            latestInstant = deltaCommits.firstInstant().get();
            // timeline containing all the delta commits, and the first delta commit instant
            return Option.of(Pair.of(deltaCommits.findInstantsAfterOrEquals(latestInstant.getTimestamp(), Integer.MAX_VALUE), latestInstant));
        } else {
            return Option.empty();
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline)

Example 18 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class CompactionUtils method getOldestInstantToRetainForCompaction.

/**
 * Gets the oldest instant to retain for MOR compaction.
 * If there is no completed compaction,
 * num delta commits >= "hoodie.compact.inline.max.delta.commits"
 * If there is a completed compaction,
 * num delta commits after latest completed compaction >= "hoodie.compact.inline.max.delta.commits"
 *
 * @param activeTimeline  Active timeline of a table.
 * @param maxDeltaCommits Maximum number of delta commits that trigger the compaction plan,
 *                        i.e., "hoodie.compact.inline.max.delta.commits".
 * @return the oldest instant to keep for MOR compaction.
 */
public static Option<HoodieInstant> getOldestInstantToRetainForCompaction(HoodieActiveTimeline activeTimeline, int maxDeltaCommits) {
    Option<Pair<HoodieTimeline, HoodieInstant>> deltaCommitsInfoOption = CompactionUtils.getDeltaCommitsSinceLatestCompaction(activeTimeline);
    if (deltaCommitsInfoOption.isPresent()) {
        Pair<HoodieTimeline, HoodieInstant> deltaCommitsInfo = deltaCommitsInfoOption.get();
        HoodieTimeline deltaCommitTimeline = deltaCommitsInfo.getLeft();
        int numDeltaCommits = deltaCommitTimeline.countInstants();
        if (numDeltaCommits < maxDeltaCommits) {
            return Option.of(deltaCommitsInfo.getRight());
        } else {
            // delta commits with the last one to keep
            List<HoodieInstant> instants = deltaCommitTimeline.getInstants().limit(numDeltaCommits - maxDeltaCommits + 1).collect(Collectors.toList());
            return Option.of(instants.get(instants.size() - 1));
        }
    }
    return Option.empty();
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair)

Example 19 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class IncrementalInputSplits method inputSplits.

/**
 * Returns the incremental input splits.
 *
 * @param metaClient    The meta client
 * @param hadoopConf    The hadoop configuration
 * @param issuedInstant The last issued instant, only valid in streaming read
 * @return The list of incremental input splits or empty if there are no new instants
 */
public Result inputSplits(HoodieTableMetaClient metaClient, org.apache.hadoop.conf.Configuration hadoopConf, String issuedInstant) {
    metaClient.reloadActiveTimeline();
    HoodieTimeline commitTimeline = metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants();
    if (commitTimeline.empty()) {
        LOG.warn("No splits found for the table under path " + path);
        return Result.EMPTY;
    }
    List<HoodieInstant> instants = filterInstantsWithRange(commitTimeline, issuedInstant);
    // get the latest instant that satisfies condition
    final HoodieInstant instantToIssue = instants.size() == 0 ? null : instants.get(instants.size() - 1);
    final InstantRange instantRange;
    if (instantToIssue != null) {
        if (issuedInstant != null) {
            // the streaming reader may record the last issued instant, if the issued instant is present,
            // the instant range should be: (issued instant, the latest instant].
            instantRange = InstantRange.getInstance(issuedInstant, instantToIssue.getTimestamp(), InstantRange.RangeType.OPEN_CLOSE);
        } else if (this.conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent()) {
            // first time consume and has a start commit
            final String startCommit = this.conf.getString(FlinkOptions.READ_START_COMMIT);
            instantRange = startCommit.equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST) ? null : InstantRange.getInstance(startCommit, instantToIssue.getTimestamp(), InstantRange.RangeType.CLOSE_CLOSE);
        } else {
            // first time consume and no start commit, consumes the latest incremental data set.
            instantRange = InstantRange.getInstance(instantToIssue.getTimestamp(), instantToIssue.getTimestamp(), InstantRange.RangeType.CLOSE_CLOSE);
        }
    } else {
        LOG.info("No new instant found for the table under path " + path + ", skip reading");
        return Result.EMPTY;
    }
    String tableName = conf.getString(FlinkOptions.TABLE_NAME);
    Set<String> writePartitions;
    final FileStatus[] fileStatuses;
    if (instantRange == null) {
        // reading from the earliest, scans the partitions and files directly.
        FileIndex fileIndex = FileIndex.instance(new org.apache.hadoop.fs.Path(path.toUri()), conf);
        if (this.requiredPartitions != null) {
            // apply partition push down
            fileIndex.setPartitionPaths(this.requiredPartitions);
        }
        writePartitions = new HashSet<>(fileIndex.getOrBuildPartitionPaths());
        if (writePartitions.size() == 0) {
            LOG.warn("No partitions found for reading in user provided path.");
            return Result.EMPTY;
        }
        fileStatuses = fileIndex.getFilesInPartitions();
    } else {
        List<HoodieCommitMetadata> activeMetadataList = instants.stream().map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, commitTimeline)).collect(Collectors.toList());
        List<HoodieCommitMetadata> archivedMetadataList = getArchivedMetadata(metaClient, instantRange, commitTimeline, tableName);
        if (archivedMetadataList.size() > 0) {
            LOG.warn("\n" + "--------------------------------------------------------------------------------\n" + "---------- caution: the reader has fall behind too much from the writer,\n" + "---------- tweak 'read.tasks' option to add parallelism of read tasks.\n" + "--------------------------------------------------------------------------------");
        }
        List<HoodieCommitMetadata> metadataList = archivedMetadataList.size() > 0 ? // IMPORTANT: the merged metadata list must be in ascending order by instant time
        mergeList(archivedMetadataList, activeMetadataList) : activeMetadataList;
        writePartitions = HoodieInputFormatUtils.getWritePartitionPaths(metadataList);
        // apply partition push down
        if (this.requiredPartitions != null) {
            writePartitions = writePartitions.stream().filter(this.requiredPartitions::contains).collect(Collectors.toSet());
        }
        if (writePartitions.size() == 0) {
            LOG.warn("No partitions found for reading in user provided path.");
            return Result.EMPTY;
        }
        fileStatuses = WriteProfiles.getWritePathsOfInstants(path, hadoopConf, metadataList, metaClient.getTableType());
    }
    if (fileStatuses.length == 0) {
        LOG.warn("No files found for reading in user provided path.");
        return Result.EMPTY;
    }
    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, commitTimeline, fileStatuses);
    final String endInstant = instantToIssue.getTimestamp();
    final AtomicInteger cnt = new AtomicInteger(0);
    final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
    List<MergeOnReadInputSplit> inputSplits = writePartitions.stream().map(relPartitionPath -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, endInstant).map(fileSlice -> {
        Option<List<String>> logPaths = Option.ofNullable(fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator()).map(logFile -> logFile.getPath().toString()).collect(Collectors.toList()));
        String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
        return new MergeOnReadInputSplit(cnt.getAndAdd(1), basePath, logPaths, endInstant, metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, instantRange);
    }).collect(Collectors.toList())).flatMap(Collection::stream).collect(Collectors.toList());
    return Result.instance(inputSplits, endInstant);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInputFormatUtils(org.apache.hudi.hadoop.utils.HoodieInputFormatUtils) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) Serializable(scala.Serializable) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseFile(org.apache.hudi.common.model.BaseFile) Path(org.apache.flink.core.fs.Path) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) LESSER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Objects(java.util.Objects) WriteProfiles(org.apache.hudi.sink.partitioner.profile.WriteProfiles) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) InstantRange(org.apache.hudi.common.table.log.InstantRange) MergeOnReadInputSplit(org.apache.hudi.table.format.mor.MergeOnReadInputSplit) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) Collections(java.util.Collections) FlinkOptions(org.apache.hudi.configuration.FlinkOptions) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) MergeOnReadInputSplit(org.apache.hudi.table.format.mor.MergeOnReadInputSplit) InstantRange(org.apache.hudi.common.table.log.InstantRange) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) List(java.util.List) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 20 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class BootstrapOperator method loadRecords.

/**
 * Loads all the indices of give partition path into the backup state.
 *
 * @param partitionPath The partition path
 */
@SuppressWarnings("unchecked")
protected void loadRecords(String partitionPath) throws Exception {
    long start = System.currentTimeMillis();
    final int parallelism = getRuntimeContext().getNumberOfParallelSubtasks();
    final int maxParallelism = getRuntimeContext().getMaxNumberOfParallelSubtasks();
    final int taskID = getRuntimeContext().getIndexOfThisSubtask();
    HoodieTimeline commitsTimeline = this.hoodieTable.getMetaClient().getCommitsTimeline();
    if (!StringUtils.isNullOrEmpty(lastInstantTime)) {
        commitsTimeline = commitsTimeline.findInstantsAfter(lastInstantTime);
    }
    Option<HoodieInstant> latestCommitTime = commitsTimeline.filterCompletedInstants().lastInstant();
    if (latestCommitTime.isPresent()) {
        BaseFileUtils fileUtils = BaseFileUtils.getInstance(this.hoodieTable.getBaseFileFormat());
        Schema schema = new TableSchemaResolver(this.hoodieTable.getMetaClient()).getTableAvroSchema();
        List<FileSlice> fileSlices = this.hoodieTable.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.get().getTimestamp(), true).collect(toList());
        for (FileSlice fileSlice : fileSlices) {
            if (!shouldLoadFile(fileSlice.getFileId(), maxParallelism, parallelism, taskID)) {
                continue;
            }
            LOG.info("Load records from {}.", fileSlice);
            // load parquet records
            fileSlice.getBaseFile().ifPresent(baseFile -> {
                // filter out crushed files
                if (!isValidFile(baseFile.getFileStatus())) {
                    return;
                }
                try (ClosableIterator<HoodieKey> iterator = fileUtils.getHoodieKeyIterator(this.hadoopConf, new Path(baseFile.getPath()))) {
                    iterator.forEachRemaining(hoodieKey -> {
                        output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(hoodieKey, fileSlice))));
                    });
                }
            });
            // load avro log records
            List<String> logPaths = fileSlice.getLogFiles().filter(logFile -> isValidFile(logFile.getFileStatus())).map(logFile -> logFile.getPath().toString()).collect(toList());
            HoodieMergedLogRecordScanner scanner = FormatUtils.logScanner(logPaths, schema, latestCommitTime.get().getTimestamp(), writeConfig, hadoopConf);
            try {
                for (String recordKey : scanner.getRecords().keySet()) {
                    output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(new HoodieKey(recordKey, partitionPath), fileSlice))));
                }
            } catch (Exception e) {
                throw new HoodieException(String.format("Error when loading record keys from files: %s", logPaths), e);
            } finally {
                scanner.close();
            }
        }
    }
    long cost = System.currentTimeMillis() - start;
    LOG.info("Task [{}}:{}}] finish loading the index under partition {} and sending them to downstream, time cost: {} milliseconds.", this.getClass().getSimpleName(), taskID, partitionPath, cost);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) BootstrapAggFunction(org.apache.hudi.sink.bootstrap.aggregate.BootstrapAggFunction) CkpMetadata(org.apache.hudi.sink.meta.CkpMetadata) ListState(org.apache.flink.api.common.state.ListState) StringUtils(org.apache.hudi.common.util.StringUtils) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkTables(org.apache.hudi.util.FlinkTables) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Path(org.apache.hadoop.fs.Path) StreamerUtil(org.apache.hudi.util.StreamerUtil) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Types(org.apache.flink.api.common.typeinfo.Types) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) HoodieRecordGlobalLocation(org.apache.hudi.common.model.HoodieRecordGlobalLocation) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) Configuration(org.apache.flink.configuration.Configuration) StreamerUtil.isValidFile(org.apache.hudi.util.StreamerUtil.isValidFile) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) GlobalAggregateManager(org.apache.flink.runtime.taskexecutor.GlobalAggregateManager) HoodieKey(org.apache.hudi.common.model.HoodieKey) Pattern(java.util.regex.Pattern) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) FlinkOptions(org.apache.hudi.configuration.FlinkOptions) FormatUtils(org.apache.hudi.table.format.FormatUtils) KeyGroupRangeAssignment(org.apache.flink.runtime.state.KeyGroupRangeAssignment) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) HoodieKey(org.apache.hudi.common.model.HoodieKey) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils)

Aggregations

HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)118 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)74 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)39 List (java.util.List)36 IOException (java.io.IOException)34 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)34 ArrayList (java.util.ArrayList)32 Option (org.apache.hudi.common.util.Option)30 Collectors (java.util.stream.Collectors)29 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)29 HoodieException (org.apache.hudi.exception.HoodieException)26 Map (java.util.Map)25 FileStatus (org.apache.hadoop.fs.FileStatus)24 Path (org.apache.hadoop.fs.Path)24 Set (java.util.Set)22 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)22 FileSlice (org.apache.hudi.common.model.FileSlice)21 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)21 Pair (org.apache.hudi.common.util.collection.Pair)21 FSUtils (org.apache.hudi.common.fs.FSUtils)20