Search in sources :

Example 11 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class LogFileDTO method toHoodieLogFile.

public static HoodieLogFile toHoodieLogFile(LogFileDTO dto) {
    FileStatus status = FileStatusDTO.toFileStatus(dto.fileStatus);
    HoodieLogFile logFile = (status == null) ? new HoodieLogFile(dto.pathStr) : new HoodieLogFile(status);
    logFile.setFileLen(dto.fileLen);
    return logFile;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 12 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class HoodieAppendHandle method init.

private void init(HoodieRecord record) {
    if (doInit) {
        // extract some information from the first record
        SliceView rtView = hoodieTable.getSliceView();
        Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
        // Set the base commit time as the current instantTime for new inserts into log files
        String baseInstantTime;
        String baseFile = "";
        List<String> logFiles = new ArrayList<>();
        if (fileSlice.isPresent()) {
            baseInstantTime = fileSlice.get().getBaseInstantTime();
            baseFile = fileSlice.get().getBaseFile().map(BaseFile::getFileName).orElse("");
            logFiles = fileSlice.get().getLogFiles().map(HoodieLogFile::getFileName).collect(Collectors.toList());
        } else {
            baseInstantTime = instantTime;
            // This means there is no base data file, start appending to a new log file
            fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId));
            LOG.info("New AppendHandle for partition :" + partitionPath);
        }
        // Prepare the first write status
        writeStatus.setStat(new HoodieDeltaWriteStat());
        writeStatus.setFileId(fileId);
        writeStatus.setPartitionPath(partitionPath);
        averageRecordSize = sizeEstimator.sizeEstimate(record);
        HoodieDeltaWriteStat deltaWriteStat = (HoodieDeltaWriteStat) writeStatus.getStat();
        deltaWriteStat.setPrevCommit(baseInstantTime);
        deltaWriteStat.setPartitionPath(partitionPath);
        deltaWriteStat.setFileId(fileId);
        deltaWriteStat.setBaseFile(baseFile);
        deltaWriteStat.setLogFiles(logFiles);
        try {
            // Save hoodie partition meta in the partition path
            HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
            partitionMetadata.trySave(getPartitionId());
            // Since the actual log file written to can be different based on when rollover happens, we use the
            // base file to denote some log appends happened on a slice. writeToken will still fence concurrent
            // writers.
            // https://issues.apache.org/jira/browse/HUDI-1517
            createMarkerFile(partitionPath, FSUtils.makeDataFileName(baseInstantTime, writeToken, fileId, hoodieTable.getBaseFileExtension()));
            this.writer = createLogWriter(fileSlice, baseInstantTime);
        } catch (Exception e) {
            LOG.error("Error in update task at commit " + instantTime, e);
            writeStatus.setGlobalError(e);
            throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit " + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + "/" + partitionPath, e);
        }
        doInit = false;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) HoodieException(org.apache.hudi.exception.HoodieException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieAppendException(org.apache.hudi.exception.HoodieAppendException) IOException(java.io.IOException) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieDeltaWriteStat(org.apache.hudi.common.model.HoodieDeltaWriteStat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 13 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class HoodieFlinkWriteableTestTable method appendRecordsToLogFile.

private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> groupedRecords) throws Exception {
    String partitionPath = groupedRecords.get(0).getPartitionPath();
    HoodieRecordLocation location = groupedRecords.get(0).getCurrentLocation();
    try (HoodieLogFormat.Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(basePath, partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(location.getFileId()).overBaseCommit(location.getInstantTime()).withFs(fs).build()) {
        Map<HeaderMetadataType, String> header = new java.util.HashMap<>();
        header.put(HeaderMetadataType.INSTANT_TIME, location.getInstantTime());
        header.put(HeaderMetadataType.SCHEMA, schema.toString());
        logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
            try {
                GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
                HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
                return (IndexedRecord) val;
            } catch (IOException e) {
                LOG.warn("Failed to convert record " + r.toString(), e);
                return null;
            }
        }).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
        return Pair.of(partitionPath, logWriter.getLogFile());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) Arrays(java.util.Arrays) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) BloomFilterFactory(org.apache.hudi.common.bloom.BloomFilterFactory) BloomFilterTypeCode(org.apache.hudi.common.bloom.BloomFilterTypeCode) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) HashMap(java.util.HashMap) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) IOException(java.io.IOException) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 14 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestHoodieTableFileSystemView method testViewForFileSlicesWithNoBaseFile.

protected void testViewForFileSlicesWithNoBaseFile(int expNumTotalFileSlices, int expNumTotalDataFiles, String partitionPath) throws Exception {
    Paths.get(basePath, partitionPath).toFile().mkdirs();
    String fileId = UUID.randomUUID().toString();
    String instantTime1 = "1";
    String deltaInstantTime1 = "2";
    String deltaInstantTime2 = "3";
    String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
    String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1, TEST_WRITE_TOKEN);
    Paths.get(basePath, partitionPath, fileName1).toFile().createNewFile();
    Paths.get(basePath, partitionPath, fileName2).toFile().createNewFile();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
    HoodieInstant deltaInstant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime1);
    HoodieInstant deltaInstant3 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime2);
    saveAsComplete(commitTimeline, instant1, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant2, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant3, Option.empty());
    refreshFsView();
    List<HoodieBaseFile> dataFiles = roView.getLatestBaseFiles().collect(Collectors.toList());
    assertTrue(dataFiles.isEmpty(), "No data file expected");
    List<FileSlice> fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
    assertEquals(1, fileSliceList.size());
    FileSlice fileSlice = fileSliceList.get(0);
    assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
    assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
    assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
    List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
    assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
    assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
    assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
    // Check Merged File Slices API
    fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime2).collect(Collectors.toList());
    assertEquals(1, fileSliceList.size());
    fileSlice = fileSliceList.get(0);
    assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
    assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
    assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
    logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
    assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
    assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
    assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
    // Check UnCompacted File Slices API
    fileSliceList = rtView.getLatestUnCompactedFileSlices(partitionPath).collect(Collectors.toList());
    assertEquals(1, fileSliceList.size());
    fileSlice = fileSliceList.get(0);
    assertEquals(fileId, fileSlice.getFileId(), "File-Id must be set correctly");
    assertFalse(fileSlice.getBaseFile().isPresent(), "Data file for base instant must be present");
    assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant for file-group set correctly");
    logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
    assertEquals(2, logFiles.size(), "Correct number of log-files shows up in file-slice");
    assertEquals(fileName2, logFiles.get(0).getFileName(), "Log File Order check");
    assertEquals(fileName1, logFiles.get(1).getFileName(), "Log File Order check");
    assertEquals(expNumTotalFileSlices, rtView.getAllFileSlices(partitionPath).count(), "Total number of file-slices in view matches expected");
    assertEquals(expNumTotalDataFiles, roView.getAllBaseFiles(partitionPath).count(), "Total number of data-files in view matches expected");
    assertEquals(1, fsView.getAllFileGroups(partitionPath).count(), "Total number of file-groups in view matches expected");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 15 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestHoodieRealtimeFileSplit method setUp.

@BeforeEach
public void setUp(@TempDir java.nio.file.Path tempDir) throws Exception {
    basePath = tempDir.toAbsolutePath().toString();
    deltaLogFiles = Collections.singletonList(new HoodieLogFile(new Path(basePath + "/1.log"), 0L));
    deltaLogPaths = Collections.singletonList(basePath + "/1.log");
    fileSplitName = basePath + "/test.file";
    baseFileSplit = new FileSplit(new Path(fileSplitName), 0, 100, new String[] {});
    maxCommitTime = "10001";
    split = new HoodieRealtimeFileSplit(baseFileSplit, basePath, deltaLogFiles, maxCommitTime, false, Option.empty());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) FileSplit(org.apache.hadoop.mapred.FileSplit) BeforeEach(org.junit.jupiter.api.BeforeEach)

Aggregations

HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)77 IOException (java.io.IOException)48 List (java.util.List)46 Path (org.apache.hadoop.fs.Path)45 Map (java.util.Map)42 Collectors (java.util.stream.Collectors)42 ArrayList (java.util.ArrayList)38 Option (org.apache.hudi.common.util.Option)37 FileSlice (org.apache.hudi.common.model.FileSlice)34 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)29 FileStatus (org.apache.hadoop.fs.FileStatus)28 HashMap (java.util.HashMap)26 FSUtils (org.apache.hudi.common.fs.FSUtils)26 Pair (org.apache.hudi.common.util.collection.Pair)25 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 Set (java.util.Set)22 LogManager (org.apache.log4j.LogManager)22 Logger (org.apache.log4j.Logger)22 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)21