Search in sources :

Example 21 with HoodieTableFileSystemView

use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.

the class HoodieMetadataMetrics method getStats.

public Map<String, String> getStats(boolean detailed, HoodieTableMetaClient metaClient, HoodieTableMetadata metadata) {
    try {
        metaClient.reloadActiveTimeline();
        HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline());
        return getStats(fsView, detailed, metadata);
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to get metadata stats.", ioe);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 22 with HoodieTableFileSystemView

use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.

the class TestCompactionAdminClient method validateUnSchedulePlan.

/**
 * Validate Unschedule operations.
 */
private List<Pair<HoodieLogFile, HoodieLogFile>> validateUnSchedulePlan(CompactionAdminClient client, String ingestionInstant, String compactionInstant, int numEntriesPerInstant, int expNumRenames, boolean skipUnSchedule) throws Exception {
    ensureValidCompactionPlan(compactionInstant);
    // Check suggested rename operations
    List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    // Log files belonging to file-slices created because of compaction request must be renamed
    Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
    final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
    assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
    if (skipUnSchedule) {
        // Do the renaming only but do not touch the compaction plan - Needed for repair tests
        renameFiles.forEach(lfPair -> {
            try {
                renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight());
            } catch (IOException e) {
                throw new HoodieIOException(e.getMessage(), e);
            }
        });
    } else {
        validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
    }
    Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    // Call the main unschedule API
    client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    // Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
    newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
        assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
        assertEquals(0, fs.getLogFiles().count(), "No Log Files");
    });
    // Ensure same number of log-files before and after renaming per fileId
    Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
    assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
    assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
    return renameFiles;
}
Also used : HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) BeforeEach(org.junit.jupiter.api.BeforeEach) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) CompactionTestUtils(org.apache.hudi.common.testutils.CompactionTestUtils) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation(org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation) Set(java.util.Set) MERGE_ON_READ(org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIOException(org.apache.hudi.exception.HoodieIOException) CompactionAdminClient.renameLogFile(org.apache.hudi.client.CompactionAdminClient.renameLogFile) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Pair(org.apache.hudi.common.util.collection.Pair)

Example 23 with HoodieTableFileSystemView

use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.

the class TestCompactionAdminClient method validateRenameFiles.

private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant, String compactionInstant, HoodieTableFileSystemView fsView) {
    // Ensure new names of log-files are on expected lines
    Set<HoodieLogFile> uniqNewLogFiles = new HashSet<>();
    Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
    renameFiles.forEach(lfPair -> {
        assertFalse(uniqOldLogFiles.contains(lfPair.getKey()), "Old Log File Names do not collide");
        assertFalse(uniqNewLogFiles.contains(lfPair.getValue()), "New Log File Names do not collide");
        uniqOldLogFiles.add(lfPair.getKey());
        uniqNewLogFiles.add(lfPair.getValue());
    });
    renameFiles.forEach(lfPair -> {
        HoodieLogFile oldLogFile = lfPair.getLeft();
        HoodieLogFile newLogFile = lfPair.getValue();
        assertEquals(ingestionInstant, newLogFile.getBaseCommitTime(), "Base Commit time is expected");
        assertEquals(compactionInstant, oldLogFile.getBaseCommitTime(), "Base Commit time is expected");
        assertEquals(oldLogFile.getFileId(), newLogFile.getFileId(), "File Id is expected");
        HoodieLogFile lastLogFileBeforeCompaction = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant).filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get()).findFirst().get();
        assertEquals(lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion(), "Log Version expected");
        assertTrue(newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion(), "Log version does not collide");
    });
}
Also used : HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) BeforeEach(org.junit.jupiter.api.BeforeEach) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) CompactionTestUtils(org.apache.hudi.common.testutils.CompactionTestUtils) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation(org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation) Set(java.util.Set) MERGE_ON_READ(org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIOException(org.apache.hudi.exception.HoodieIOException) CompactionAdminClient.renameLogFile(org.apache.hudi.client.CompactionAdminClient.renameLogFile) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HashSet(java.util.HashSet)

Example 24 with HoodieTableFileSystemView

use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.

the class HoodieBackedTableMetadataWriter method prepRecords.

/**
 * Tag each record with the location in the given partition.
 * The record is tagged with respective file slice's location based on its record key.
 */
protected HoodieData<HoodieRecord> prepRecords(Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap) {
    // The result set
    HoodieData<HoodieRecord> allPartitionRecords = engineContext.emptyHoodieData();
    HoodieTableFileSystemView fsView = HoodieTableMetadataUtil.getFileSystemView(metadataMetaClient);
    for (Map.Entry<MetadataPartitionType, HoodieData<HoodieRecord>> entry : partitionRecordsMap.entrySet()) {
        final String partitionName = entry.getKey().getPartitionPath();
        final int fileGroupCount = entry.getKey().getFileGroupCount();
        HoodieData<HoodieRecord> records = entry.getValue();
        List<FileSlice> fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, Option.ofNullable(fsView), partitionName);
        ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount, String.format("Invalid number of file groups for partition:%s, found=%d, required=%d", partitionName, fileSlices.size(), fileGroupCount));
        HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> {
            FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(), fileGroupCount));
            r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
            return r;
        });
        allPartitionRecords = allPartitionRecords.union(rddSinglePartitionRecords);
    }
    return allPartitionRecords;
}
Also used : HoodieData(org.apache.hudi.common.data.HoodieData) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Map(java.util.Map) HashMap(java.util.HashMap)

Example 25 with HoodieTableFileSystemView

use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.

the class CompactionAdminClient method validateCompactionPlan.

/**
 * Validate all compaction operations in a compaction plan. Verifies the file-slices are consistent with corresponding
 * compaction operations.
 *
 * @param metaClient Hoodie Table Meta Client
 * @param compactionInstant Compaction Instant
 */
public List<ValidationOpResult> validateCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism) throws IOException {
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    if (plan.getOperations() != null) {
        List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
        context.setJobStatus(this.getClass().getSimpleName(), "Validate compaction operations");
        return context.map(ops, op -> {
            try {
                return validateCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView));
            } catch (IOException e) {
                throw new HoodieIOException(e.getMessage(), e);
            }
        }, parallelism);
    }
    return new ArrayList<>();
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Aggregations

HoodieTableFileSystemView (org.apache.hudi.common.table.view.HoodieTableFileSystemView)42 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)29 FileStatus (org.apache.hadoop.fs.FileStatus)25 Path (org.apache.hadoop.fs.Path)24 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)22 FileSlice (org.apache.hudi.common.model.FileSlice)22 List (java.util.List)21 Collectors (java.util.stream.Collectors)20 Option (org.apache.hudi.common.util.Option)20 Map (java.util.Map)19 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)17 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)16 HoodieIOException (org.apache.hudi.exception.HoodieIOException)16 HoodieException (org.apache.hudi.exception.HoodieException)15 Stream (java.util.stream.Stream)14 Test (org.junit.jupiter.api.Test)13 HashMap (java.util.HashMap)12