Search in sources :

Example 1 with RealtimeFileStatus

use of org.apache.hudi.hadoop.RealtimeFileStatus in project hudi by apache.

the class TestHoodieRealtimeRecordReader method testLogOnlyReader.

@Test
public void testLogOnlyReader() throws Exception {
    // initial commit
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
    HoodieTestUtils.init(hadoopConf, basePath.toString(), HoodieTableType.MERGE_ON_READ);
    String baseInstant = "100";
    File partitionDir = InputFormatTestUtil.prepareNonPartitionedParquetTable(basePath, schema, 1, 100, baseInstant, HoodieTableType.MERGE_ON_READ);
    FileCreateUtils.createDeltaCommit(basePath.toString(), baseInstant);
    // Add the paths
    FileInputFormat.setInputPaths(baseJobConf, partitionDir.getPath());
    FileSlice fileSlice = new FileSlice("default", baseInstant, "fileid1");
    try {
        // update files or generate new log file
        int logVersion = 1;
        int baseInstantTs = Integer.parseInt(baseInstant);
        String instantTime = String.valueOf(baseInstantTs + logVersion);
        HoodieLogFormat.Writer writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid1", baseInstant, instantTime, 100, 0, logVersion);
        long size = writer.getCurrentSize();
        writer.close();
        assertTrue(size > 0, "block - size should be > 0");
        HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(Collections.emptyList(), Collections.emptyMap(), Option.empty(), WriteOperationType.UPSERT, schema.toString(), HoodieTimeline.COMMIT_ACTION);
        FileCreateUtils.createDeltaCommit(basePath.toString(), instantTime, commitMetadata);
        // create a split with new log file(s)
        fileSlice.addLogFile(new HoodieLogFile(writer.getLogFile().getPath(), size));
        RealtimeFileStatus realtimeFileStatus = new RealtimeFileStatus(new FileStatus(writer.getLogFile().getFileSize(), false, 1, 1, 0, writer.getLogFile().getPath()), basePath.toString(), fileSlice.getLogFiles().collect(Collectors.toList()), false, Option.empty());
        realtimeFileStatus.setMaxCommitTime(instantTime);
        HoodieRealtimePath realtimePath = (HoodieRealtimePath) realtimeFileStatus.getPath();
        HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(new FileSplit(realtimePath, 0, 0, new String[] { "" }), realtimePath);
        JobConf newJobConf = new JobConf(baseJobConf);
        List<Schema.Field> fields = schema.getFields();
        setHiveColumnNameProps(fields, newJobConf, false);
        // create a dummy RecordReader to be used by HoodieRealtimeRecordReader
        RecordReader<NullWritable, ArrayWritable> reader = new HoodieRealtimeRecordReader(split, newJobConf, new HoodieEmptyRecordReader(split, newJobConf));
        // use reader to read log file.
        NullWritable key = reader.createKey();
        ArrayWritable value = reader.createValue();
        while (reader.next(key, value)) {
            Writable[] values = value.get();
            assertEquals(instantTime, values[0].toString());
            key = reader.createKey();
            value = reader.createValue();
        }
        reader.close();
    } catch (Exception e) {
        throw new HoodieException(e.getMessage(), e);
    }
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) FileSlice(org.apache.hudi.common.model.FileSlice) Schema(org.apache.avro.Schema) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) HoodieException(org.apache.hudi.exception.HoodieException) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Field(org.apache.avro.Schema.Field) Writer(org.apache.hudi.common.table.log.HoodieLogFormat.Writer) ArrayWritable(org.apache.hadoop.io.ArrayWritable) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) JobConf(org.apache.hadoop.mapred.JobConf) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) NullWritable(org.apache.hadoop.io.NullWritable) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 2 with RealtimeFileStatus

use of org.apache.hudi.hadoop.RealtimeFileStatus in project hudi by apache.

the class HoodieMergeOnReadTableInputFormat method listStatusForIncrementalMode.

/**
 * Keep the logic of mor_incr_view as same as spark datasource.
 * Step1: Get list of commits to be fetched based on start commit and max commits(for snapshot max commits is -1).
 * Step2: Get list of affected files status for these affected file status.
 * Step3: Construct HoodieTableFileSystemView based on those affected file status.
 *        a. Filter affected partitions based on inputPaths.
 *        b. Get list of fileGroups based on affected partitions by fsView.getAllFileGroups.
 * Step4: Set input paths based on filtered affected partition paths. changes that amony original input paths passed to
 *        this method. some partitions did not have commits as part of the trimmed down list of commits and hence we need this step.
 * Step5: Find candidate fileStatus, since when we get baseFileStatus from HoodieTableFileSystemView,
 *        the BaseFileStatus will missing file size information.
 *        We should use candidate fileStatus to update the size information for BaseFileStatus.
 * Step6: For every file group from step3(b)
 *        Get 1st available base file from all file slices. then we use candidate file status to update the baseFileStatus,
 *        and construct RealTimeFileStatus and add it to result along with log files.
 *        If file group just has log files, construct RealTimeFileStatus and add it to result.
 * TODO: unify the incremental view code between hive/spark-sql and spark datasource
 */
@Override
protected List<FileStatus> listStatusForIncrementalMode(JobConf job, HoodieTableMetaClient tableMetaClient, List<Path> inputPaths, String incrementalTableName) throws IOException {
    List<FileStatus> result = new ArrayList<>();
    Job jobContext = Job.getInstance(job);
    // step1
    Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
    if (!timeline.isPresent()) {
        return result;
    }
    HoodieTimeline commitsTimelineToReturn = HoodieInputFormatUtils.getHoodieTimelineForIncrementalQuery(jobContext, incrementalTableName, timeline.get());
    Option<List<HoodieInstant>> commitsToCheck = Option.of(commitsTimelineToReturn.getInstants().collect(Collectors.toList()));
    if (!commitsToCheck.isPresent()) {
        return result;
    }
    // step2
    commitsToCheck.get().sort(HoodieInstant::compareTo);
    List<HoodieCommitMetadata> metadataList = commitsToCheck.get().stream().map(instant -> {
        try {
            return HoodieInputFormatUtils.getCommitMetadata(instant, commitsTimelineToReturn);
        } catch (IOException e) {
            throw new HoodieException(String.format("cannot get metadata for instant: %s", instant));
        }
    }).collect(Collectors.toList());
    // build fileGroup from fsView
    List<FileStatus> affectedFileStatus = Arrays.asList(HoodieInputFormatUtils.listAffectedFilesForCommits(job, new Path(tableMetaClient.getBasePath()), metadataList));
    // step3
    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(tableMetaClient, commitsTimelineToReturn, affectedFileStatus.toArray(new FileStatus[0]));
    // build fileGroup from fsView
    Path basePath = new Path(tableMetaClient.getBasePath());
    // filter affectedPartition by inputPaths
    List<String> affectedPartition = HoodieInputFormatUtils.getWritePartitionPaths(metadataList).stream().filter(k -> k.isEmpty() ? inputPaths.contains(basePath) : inputPaths.contains(new Path(basePath, k))).collect(Collectors.toList());
    if (affectedPartition.isEmpty()) {
        return result;
    }
    List<HoodieFileGroup> fileGroups = affectedPartition.stream().flatMap(partitionPath -> fsView.getAllFileGroups(partitionPath)).collect(Collectors.toList());
    // step4
    setInputPaths(job, affectedPartition.stream().map(p -> p.isEmpty() ? basePath.toString() : new Path(basePath, p).toString()).collect(Collectors.joining(",")));
    // step5
    // find all file status in partitionPaths.
    FileStatus[] fileStatuses = doListStatus(job);
    Map<String, FileStatus> candidateFileStatus = new HashMap<>();
    for (int i = 0; i < fileStatuses.length; i++) {
        String key = fileStatuses[i].getPath().toString();
        candidateFileStatus.put(key, fileStatuses[i]);
    }
    Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt = getHoodieVirtualKeyInfo(tableMetaClient);
    String maxCommitTime = fsView.getLastInstant().get().getTimestamp();
    // step6
    result.addAll(collectAllIncrementalFiles(fileGroups, maxCommitTime, basePath.toString(), candidateFileStatus, virtualKeyInfoOpt));
    return result;
}
Also used : HoodieInputFormatUtils(org.apache.hudi.hadoop.utils.HoodieInputFormatUtils) Arrays(java.util.Arrays) FileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile) FileSystem(org.apache.hadoop.fs.FileSystem) HiveHoodieTableFileIndex(org.apache.hudi.hadoop.HiveHoodieTableFileIndex) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) ValidationUtils.checkState(org.apache.hudi.common.util.ValidationUtils.checkState) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo) HoodieCopyOnWriteTableInputFormat(org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Configurable(org.apache.hadoop.conf.Configurable) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) LocatedFileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) JobConf(org.apache.hadoop.mapred.JobConf) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieRealtimeInputFormatUtils(org.apache.hudi.hadoop.utils.HoodieRealtimeInputFormatUtils) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapred.InputSplit) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FileStatus(org.apache.hadoop.fs.FileStatus) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) HashMap(java.util.HashMap) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) ArrayList(java.util.ArrayList) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup)

Example 3 with RealtimeFileStatus

use of org.apache.hudi.hadoop.RealtimeFileStatus in project hudi by apache.

the class HoodieMergeOnReadTableInputFormat method createRealtimeFileStatusUnchecked.

/**
 * Creates {@link RealtimeFileStatus} for the file-slice where base file is present
 */
private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieBaseFile baseFile, Stream<HoodieLogFile> logFiles, String basePath, Option<HoodieInstant> latestCompletedInstantOpt, Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
    FileStatus baseFileStatus = getFileStatusUnchecked(baseFile);
    List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
    try {
        RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(baseFileStatus, basePath, sortedLogFiles, false, virtualKeyInfoOpt);
        if (latestCompletedInstantOpt.isPresent()) {
            HoodieInstant latestCompletedInstant = latestCompletedInstantOpt.get();
            checkState(latestCompletedInstant.isCompleted());
            rtFileStatus.setMaxCommitTime(latestCompletedInstant.getTimestamp());
        }
        if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
            rtFileStatus.setBootStrapFileStatus(baseFileStatus);
        }
        return rtFileStatus;
    } catch (IOException e) {
        throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile) LocatedFileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile) FileStatus(org.apache.hadoop.fs.FileStatus) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) LocatedFileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 4 with RealtimeFileStatus

use of org.apache.hudi.hadoop.RealtimeFileStatus in project hudi by apache.

the class HoodieMergeOnReadTableInputFormat method collectAllIncrementalFiles.

private static List<FileStatus> collectAllIncrementalFiles(List<HoodieFileGroup> fileGroups, String maxCommitTime, String basePath, Map<String, FileStatus> candidateFileStatus, Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
    List<FileStatus> result = new ArrayList<>();
    fileGroups.stream().forEach(f -> {
        try {
            List<FileSlice> baseFiles = f.getAllFileSlices().filter(slice -> slice.getBaseFile().isPresent()).collect(Collectors.toList());
            if (!baseFiles.isEmpty()) {
                FileStatus baseFileStatus = HoodieInputFormatUtils.getFileStatus(baseFiles.get(0).getBaseFile().get());
                String baseFilePath = baseFileStatus.getPath().toUri().toString();
                if (!candidateFileStatus.containsKey(baseFilePath)) {
                    throw new HoodieException("Error obtaining fileStatus for file: " + baseFilePath);
                }
                List<HoodieLogFile> deltaLogFiles = f.getLatestFileSlice().get().getLogFiles().collect(Collectors.toList());
                // We cannot use baseFileStatus.getPath() here, since baseFileStatus.getPath() missing file size information.
                // So we use candidateFileStatus.get(baseFileStatus.getPath()) to get a correct path.
                RealtimeFileStatus fileStatus = new RealtimeFileStatus(candidateFileStatus.get(baseFilePath), basePath, deltaLogFiles, true, virtualKeyInfoOpt);
                fileStatus.setMaxCommitTime(maxCommitTime);
                if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
                    fileStatus.setBootStrapFileStatus(baseFileStatus);
                }
                result.add(fileStatus);
            }
            // add file group which has only logs.
            if (f.getLatestFileSlice().isPresent() && baseFiles.isEmpty()) {
                List<FileStatus> logFileStatus = f.getLatestFileSlice().get().getLogFiles().map(logFile -> logFile.getFileStatus()).collect(Collectors.toList());
                if (logFileStatus.size() > 0) {
                    List<HoodieLogFile> deltaLogFiles = logFileStatus.stream().map(l -> new HoodieLogFile(l.getPath(), l.getLen())).collect(Collectors.toList());
                    RealtimeFileStatus fileStatus = new RealtimeFileStatus(logFileStatus.get(0), basePath, deltaLogFiles, true, virtualKeyInfoOpt);
                    fileStatus.setMaxCommitTime(maxCommitTime);
                    result.add(fileStatus);
                }
            }
        } catch (IOException e) {
            throw new HoodieException("Error obtaining data file/log file grouping ", e);
        }
    });
    return result;
}
Also used : HoodieInputFormatUtils(org.apache.hudi.hadoop.utils.HoodieInputFormatUtils) Arrays(java.util.Arrays) FileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile) FileSystem(org.apache.hadoop.fs.FileSystem) HiveHoodieTableFileIndex(org.apache.hudi.hadoop.HiveHoodieTableFileIndex) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) ValidationUtils.checkState(org.apache.hudi.common.util.ValidationUtils.checkState) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) SplitLocationInfo(org.apache.hadoop.mapred.SplitLocationInfo) HoodieCopyOnWriteTableInputFormat(org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Configurable(org.apache.hadoop.conf.Configurable) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) LocatedFileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) BootstrapBaseFileSplit(org.apache.hudi.hadoop.BootstrapBaseFileSplit) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) JobConf(org.apache.hadoop.mapred.JobConf) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieRealtimeInputFormatUtils(org.apache.hudi.hadoop.utils.HoodieRealtimeInputFormatUtils) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapred.InputSplit) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FileStatus(org.apache.hadoop.fs.FileStatus) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) LocatedFileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile) LocatedFileStatusWithBootstrapBaseFile(org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 5 with RealtimeFileStatus

use of org.apache.hudi.hadoop.RealtimeFileStatus in project hudi by apache.

the class HoodieMergeOnReadTableInputFormat method createRealtimeFileStatusUnchecked.

/**
 * Creates {@link RealtimeFileStatus} for the file-slice where base file is NOT present
 */
private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieLogFile latestLogFile, Stream<HoodieLogFile> logFiles, String basePath, Option<HoodieInstant> latestCompletedInstantOpt, Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
    List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
    try {
        RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(latestLogFile.getFileStatus(), basePath, sortedLogFiles, false, virtualKeyInfoOpt);
        if (latestCompletedInstantOpt.isPresent()) {
            HoodieInstant latestCompletedInstant = latestCompletedInstantOpt.get();
            checkState(latestCompletedInstant.isCompleted());
            rtFileStatus.setMaxCommitTime(latestCompletedInstant.getTimestamp());
        }
        return rtFileStatus;
    } catch (IOException e) {
        throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RealtimeFileStatus(org.apache.hudi.hadoop.RealtimeFileStatus) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

IOException (java.io.IOException)5 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)5 RealtimeFileStatus (org.apache.hudi.hadoop.RealtimeFileStatus)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)4 HoodieIOException (org.apache.hudi.exception.HoodieIOException)4 FileSplit (org.apache.hadoop.mapred.FileSplit)3 JobConf (org.apache.hadoop.mapred.JobConf)3 FileSlice (org.apache.hudi.common.model.FileSlice)3 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)3 HoodieException (org.apache.hudi.exception.HoodieException)3 FileStatusWithBootstrapBaseFile (org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile)3 LocatedFileStatusWithBootstrapBaseFile (org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile)3 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2