Search in sources :

Example 31 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class MarkerBasedRollbackStrategy method getRollbackRequestForAppend.

protected HoodieRollbackRequest getRollbackRequestForAppend(String markerFilePath) throws IOException {
    Path baseFilePathForAppend = new Path(basePath, markerFilePath);
    String fileId = FSUtils.getFileIdFromFilePath(baseFilePathForAppend);
    String baseCommitTime = FSUtils.getCommitTime(baseFilePathForAppend.getName());
    String relativePartitionPath = FSUtils.getRelativePartitionPath(new Path(basePath), baseFilePathForAppend.getParent());
    Path partitionPath = FSUtils.getPartitionPath(config.getBasePath(), relativePartitionPath);
    // NOTE: Since we're rolling back incomplete Delta Commit, it only could have appended its
    // block to the latest log-file
    // TODO(HUDI-1517) use provided marker-file's path instead
    Option<HoodieLogFile> latestLogFileOption = FSUtils.getLatestLogFile(table.getMetaClient().getFs(), partitionPath, fileId, HoodieFileFormat.HOODIE_LOG.getFileExtension(), baseCommitTime);
    Map<String, Long> logFilesWithBlocsToRollback = new HashMap<>();
    if (latestLogFileOption.isPresent()) {
        HoodieLogFile latestLogFile = latestLogFileOption.get();
        // NOTE: Marker's don't carry information about the cumulative size of the blocks that have been appended,
        // therefore we simply stub this value.
        logFilesWithBlocsToRollback = Collections.singletonMap(latestLogFile.getFileStatus().getPath().toString(), -1L);
    }
    return new HoodieRollbackRequest(relativePartitionPath, fileId, baseCommitTime, Collections.emptyList(), logFilesWithBlocsToRollback);
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieRollbackRequest(org.apache.hudi.avro.model.HoodieRollbackRequest)

Example 32 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestHoodieBackedMetadata method verifyMetadataRawRecords.

/**
 * Verify the metadata table on-disk raw records. When populate meta fields is enabled,
 * these records should have additional meta fields in the payload. When key deduplication
 * is enabled, these records on the disk should have key in the payload as empty string.
 *
 * @param table
 * @param logFiles         - Metadata table log files to be verified
 * @param enableMetaFields - Enable meta fields for records
 * @throws IOException
 */
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles, boolean enableMetaFields) throws IOException {
    for (HoodieLogFile logFile : logFiles) {
        FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
        MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
        if (writerSchemaMsg == null) {
            // not a data block
            continue;
        }
        Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
        HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
        while (logFileReader.hasNext()) {
            HoodieLogBlock logBlock = logFileReader.next();
            if (logBlock instanceof HoodieDataBlock) {
                try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
                    recordItr.forEachRemaining(indexRecord -> {
                        final GenericRecord record = (GenericRecord) indexRecord;
                        if (enableMetaFields) {
                            // Metadata table records should have meta fields!
                            assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
                            assertNotNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
                        } else {
                            // Metadata table records should not have meta fields!
                            assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
                            assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
                        }
                        final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
                        assertFalse(key.isEmpty());
                        if (enableMetaFields) {
                            assertTrue(key.equals(String.valueOf(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD))));
                        }
                    });
                }
            }
        }
    }
}
Also used : AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) GenericRecord(org.apache.avro.generic.GenericRecord) MessageType(org.apache.parquet.schema.MessageType)

Example 33 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestHoodieBackedTableMetadata method verifyMetadataRawRecords.

/**
 * Verify the metadata table on-disk raw records. When populate meta fields is enabled,
 * these records should have additional meta fields in the payload. When key deduplication
 * is enabled, these records on the disk should have key in the payload as empty string.
 *
 * @param table
 * @param logFiles - Metadata table log files to be verified
 * @throws IOException
 */
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles) throws IOException {
    for (HoodieLogFile logFile : logFiles) {
        FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
        MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
        if (writerSchemaMsg == null) {
            // not a data block
            continue;
        }
        Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
        HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
        while (logFileReader.hasNext()) {
            HoodieLogBlock logBlock = logFileReader.next();
            if (logBlock instanceof HoodieDataBlock) {
                try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
                    recordItr.forEachRemaining(indexRecord -> {
                        final GenericRecord record = (GenericRecord) indexRecord;
                        assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
                        assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
                        final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
                        assertFalse(key.isEmpty());
                    });
                }
            }
        }
    }
}
Also used : AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieDataBlock(org.apache.hudi.common.table.log.block.HoodieDataBlock) IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) GenericRecord(org.apache.avro.generic.GenericRecord) MessageType(org.apache.parquet.schema.MessageType)

Example 34 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestCompactionAdminClient method validateUnSchedulePlan.

/**
 * Validate Unschedule operations.
 */
private List<Pair<HoodieLogFile, HoodieLogFile>> validateUnSchedulePlan(CompactionAdminClient client, String ingestionInstant, String compactionInstant, int numEntriesPerInstant, int expNumRenames, boolean skipUnSchedule) throws Exception {
    ensureValidCompactionPlan(compactionInstant);
    // Check suggested rename operations
    List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    // Log files belonging to file-slices created because of compaction request must be renamed
    Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
    final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
    assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
    if (skipUnSchedule) {
        // Do the renaming only but do not touch the compaction plan - Needed for repair tests
        renameFiles.forEach(lfPair -> {
            try {
                renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight());
            } catch (IOException e) {
                throw new HoodieIOException(e.getMessage(), e);
            }
        });
    } else {
        validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
    }
    Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    // Call the main unschedule API
    client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    // Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
    newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
        assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
        assertEquals(0, fs.getLogFiles().count(), "No Log Files");
    });
    // Ensure same number of log-files before and after renaming per fileId
    Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
    assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
    assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
    return renameFiles;
}
Also used : HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) BeforeEach(org.junit.jupiter.api.BeforeEach) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) CompactionTestUtils(org.apache.hudi.common.testutils.CompactionTestUtils) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation(org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation) Set(java.util.Set) MERGE_ON_READ(org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIOException(org.apache.hudi.exception.HoodieIOException) CompactionAdminClient.renameLogFile(org.apache.hudi.client.CompactionAdminClient.renameLogFile) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Pair(org.apache.hudi.common.util.collection.Pair)

Example 35 with HoodieLogFile

use of org.apache.hudi.common.model.HoodieLogFile in project hudi by apache.

the class TestCompactionAdminClient method validateRenameFiles.

private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant, String compactionInstant, HoodieTableFileSystemView fsView) {
    // Ensure new names of log-files are on expected lines
    Set<HoodieLogFile> uniqNewLogFiles = new HashSet<>();
    Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
    renameFiles.forEach(lfPair -> {
        assertFalse(uniqOldLogFiles.contains(lfPair.getKey()), "Old Log File Names do not collide");
        assertFalse(uniqNewLogFiles.contains(lfPair.getValue()), "New Log File Names do not collide");
        uniqOldLogFiles.add(lfPair.getKey());
        uniqNewLogFiles.add(lfPair.getValue());
    });
    renameFiles.forEach(lfPair -> {
        HoodieLogFile oldLogFile = lfPair.getLeft();
        HoodieLogFile newLogFile = lfPair.getValue();
        assertEquals(ingestionInstant, newLogFile.getBaseCommitTime(), "Base Commit time is expected");
        assertEquals(compactionInstant, oldLogFile.getBaseCommitTime(), "Base Commit time is expected");
        assertEquals(oldLogFile.getFileId(), newLogFile.getFileId(), "File Id is expected");
        HoodieLogFile lastLogFileBeforeCompaction = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant).filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get()).findFirst().get();
        assertEquals(lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion(), "Log Version expected");
        assertTrue(newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion(), "Log version does not collide");
    });
}
Also used : HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) BeforeEach(org.junit.jupiter.api.BeforeEach) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) CompactionTestUtils(org.apache.hudi.common.testutils.CompactionTestUtils) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation(org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation) Set(java.util.Set) MERGE_ON_READ(org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIOException(org.apache.hudi.exception.HoodieIOException) CompactionAdminClient.renameLogFile(org.apache.hudi.client.CompactionAdminClient.renameLogFile) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HashSet(java.util.HashSet)

Aggregations

HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)77 IOException (java.io.IOException)48 List (java.util.List)46 Path (org.apache.hadoop.fs.Path)45 Map (java.util.Map)42 Collectors (java.util.stream.Collectors)42 ArrayList (java.util.ArrayList)38 Option (org.apache.hudi.common.util.Option)37 FileSlice (org.apache.hudi.common.model.FileSlice)34 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)29 FileStatus (org.apache.hadoop.fs.FileStatus)28 HashMap (java.util.HashMap)26 FSUtils (org.apache.hudi.common.fs.FSUtils)26 Pair (org.apache.hudi.common.util.collection.Pair)25 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 Set (java.util.Set)22 LogManager (org.apache.log4j.LogManager)22 Logger (org.apache.log4j.Logger)22 HoodieLogFormat (org.apache.hudi.common.table.log.HoodieLogFormat)21