Search in sources :

Example 11 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class CompactionAdminClient method getRenamingActionsForUnschedulingCompactionOperation.

/**
 * Generate renaming actions for unscheduling a compaction operation NOTE: Can only be used safely when no writer
 * (ingestion/compaction) is running.
 *
 * @param metaClient Hoodie Table MetaClient
 * @param compactionInstant Compaction Instant
 * @param operation Compaction Operation
 * @param fsViewOpt Cached File System View
 * @param skipValidation Skip Validation
 * @return list of pairs of log-files (old, new) and for each pair, rename must be done to successfully unschedule
 *         compaction.
 */
public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
    List<Pair<HoodieLogFile, HoodieLogFile>> result = new ArrayList<>();
    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    if (!skipValidation) {
        validateCompactionOperation(metaClient, compactionInstant, operation, Option.of(fileSystemView));
    }
    HoodieInstant lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant().get();
    FileSlice merged = fileSystemView.getLatestMergedFileSlicesBeforeOrOn(operation.getPartitionPath(), lastInstant.getTimestamp()).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
    List<HoodieLogFile> logFilesToRepair = merged.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(compactionInstant)).sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
    FileSlice fileSliceForCompaction = fileSystemView.getLatestFileSlicesBeforeOrOn(operation.getPartitionPath(), operation.getBaseInstantTime(), true).filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
    int maxUsedVersion = fileSliceForCompaction.getLogFiles().findFirst().map(HoodieLogFile::getLogVersion).orElse(HoodieLogFile.LOGFILE_BASE_VERSION - 1);
    String logExtn = fileSliceForCompaction.getLogFiles().findFirst().map(lf -> "." + lf.getFileExtension()).orElse(HoodieLogFile.DELTA_EXTENSION);
    String parentPath = fileSliceForCompaction.getBaseFile().map(df -> new Path(df.getPath()).getParent().toString()).orElse(fileSliceForCompaction.getLogFiles().findFirst().map(lf -> lf.getPath().getParent().toString()).get());
    for (HoodieLogFile toRepair : logFilesToRepair) {
        int version = maxUsedVersion + 1;
        HoodieLogFile newLf = new HoodieLogFile(new Path(parentPath, FSUtils.makeLogFileName(operation.getFileId(), logExtn, operation.getBaseInstantTime(), version, HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
        result.add(Pair.of(toRepair, newLf));
        maxUsedVersion = version;
    }
    return result;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) OperationResult(org.apache.hudi.table.action.compact.OperationResult) FileStatus(org.apache.hadoop.fs.FileStatus) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Pair(org.apache.hudi.common.util.collection.Pair)

Example 12 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class CompactionAdminClient method getRenamingActionsForUnschedulingCompactionPlan.

/**
 * Generate renaming actions for unscheduling a pending compaction plan. NOTE: Can only be used safely when no writer
 * (ingestion/compaction) is running.
 *
 * @param metaClient Hoodie Table MetaClient
 * @param compactionInstant Compaction Instant to be unscheduled
 * @param fsViewOpt Cached File System View
 * @param skipValidation Skip Validation
 * @return list of pairs of log-files (old, new) and for each pair, rename must be done to successfully unschedule
 *         compaction.
 */
public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism, Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
    HoodieTableFileSystemView fsView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    if (plan.getOperations() != null) {
        LOG.info("Number of Compaction Operations :" + plan.getOperations().size() + " for instant :" + compactionInstant);
        List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
        context.setJobStatus(this.getClass().getSimpleName(), "Generate compaction unscheduling operations");
        return context.flatMap(ops, op -> {
            try {
                return getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView), skipValidation).stream();
            } catch (IOException ioe) {
                throw new HoodieIOException(ioe.getMessage(), ioe);
            } catch (CompactionValidationException ve) {
                throw new HoodieException(ve);
            }
        }, parallelism);
    }
    LOG.warn("No operations for compaction instant : " + compactionInstant);
    return new ArrayList<>();
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 13 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class TestCompactionAdminClient method validateUnScheduleFileId.

/**
 * Validate Unschedule operations.
 */
private void validateUnScheduleFileId(CompactionAdminClient client, String ingestionInstant, String compactionInstant, CompactionOperation op, int expNumRenames) throws Exception {
    ensureValidCompactionPlan(compactionInstant);
    // Check suggested rename operations
    List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.empty(), false);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    // Log files belonging to file-slices created because of compaction request must be renamed
    Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
    final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
    assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
    validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
    Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    // Call the main unschedule API
    client.unscheduleCompactionFileId(op.getFileGroupId(), false, false);
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
    final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    // Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
    newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> {
        assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
        assertEquals(0, fs.getLogFiles().count(), "No Log Files");
    });
    // Ensure same number of log-files before and after renaming per fileId
    Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).filter(fs -> fs.getFileId().equals(op.getFileId())).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
    assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
    assertEquals(1, fileIdToCountsAfterRenaming.size(), "Not Empty");
    assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
}
Also used : HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) BeforeEach(org.junit.jupiter.api.BeforeEach) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) CompactionTestUtils(org.apache.hudi.common.testutils.CompactionTestUtils) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation(org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation) Set(java.util.Set) MERGE_ON_READ(org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIOException(org.apache.hudi.exception.HoodieIOException) CompactionAdminClient.renameLogFile(org.apache.hudi.client.CompactionAdminClient.renameLogFile) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Pair(org.apache.hudi.common.util.collection.Pair)

Example 14 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class TestCompactionAdminClient method testUnscheduleCompactionFileId.

@Test
public void testUnscheduleCompactionFileId() throws Exception {
    int numEntriesPerInstant = 10;
    CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
    Map<String, CompactionOperation> instantsWithOp = Stream.of("001", "003", "005", "007").map(instant -> {
        try {
            return Pair.of(instant, CompactionUtils.getCompactionPlan(metaClient, instant));
        } catch (IOException ioe) {
            throw new HoodieException(ioe);
        }
    }).map(instantWithPlan -> instantWithPlan.getRight().getOperations().stream().map(op -> Pair.of(instantWithPlan.getLeft(), CompactionOperation.convertFromAvroRecordInstance(op))).findFirst().get()).collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
    // THere are delta-commits after compaction instant
    validateUnScheduleFileId(client, "000", "001", instantsWithOp.get("001"), 2);
    // THere are delta-commits after compaction instant
    validateUnScheduleFileId(client, "002", "003", instantsWithOp.get("003"), 2);
    // THere are no delta-commits after compaction instant
    validateUnScheduleFileId(client, "004", "005", instantsWithOp.get("005"), 0);
    // THere are no delta-commits after compaction instant
    validateUnScheduleFileId(client, "006", "007", instantsWithOp.get("007"), 0);
}
Also used : HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) BeforeEach(org.junit.jupiter.api.BeforeEach) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) CompactionTestUtils(org.apache.hudi.common.testutils.CompactionTestUtils) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation(org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation) Set(java.util.Set) MERGE_ON_READ(org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIOException(org.apache.hudi.exception.HoodieIOException) CompactionAdminClient.renameLogFile(org.apache.hudi.client.CompactionAdminClient.renameLogFile) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation(org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Test(org.junit.jupiter.api.Test)

Example 15 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class TestHoodieTableFileSystemView method testPendingCompactionWithDuplicateFileIdsAcrossPartitions.

@Test
public void testPendingCompactionWithDuplicateFileIdsAcrossPartitions() throws Exception {
    // Put some files in the partition
    String partitionPath1 = "2016/05/01";
    String partitionPath2 = "2016/05/02";
    String partitionPath3 = "2016/05/03";
    String fullPartitionPath1 = basePath + "/" + partitionPath1 + "/";
    new File(fullPartitionPath1).mkdirs();
    String fullPartitionPath2 = basePath + "/" + partitionPath2 + "/";
    new File(fullPartitionPath2).mkdirs();
    String fullPartitionPath3 = basePath + "/" + partitionPath3 + "/";
    new File(fullPartitionPath3).mkdirs();
    String instantTime1 = "1";
    String deltaInstantTime1 = "2";
    String deltaInstantTime2 = "3";
    String fileId = UUID.randomUUID().toString();
    String dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
    new File(fullPartitionPath1 + dataFileName).createNewFile();
    String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
    new File(fullPartitionPath1 + fileName1).createNewFile();
    new File(fullPartitionPath2 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
    new File(fullPartitionPath2 + fileName1).createNewFile();
    new File(fullPartitionPath3 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
    new File(fullPartitionPath3 + fileName1).createNewFile();
    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
    HoodieInstant deltaInstant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime1);
    HoodieInstant deltaInstant3 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime2);
    saveAsComplete(commitTimeline, instant1, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant2, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant3, Option.empty());
    // Now we list all partitions
    FileStatus[] statuses = metaClient.getFs().listStatus(new Path[] { new Path(fullPartitionPath1), new Path(fullPartitionPath2), new Path(fullPartitionPath3) });
    assertEquals(6, statuses.length);
    refreshFsView();
    Arrays.asList(partitionPath1, partitionPath2, partitionPath3).forEach(p -> fsView.getAllFileGroups(p).count());
    List<HoodieFileGroup> groups = Stream.of(partitionPath1, partitionPath2, partitionPath3).flatMap(p -> fsView.getAllFileGroups(p)).collect(Collectors.toList());
    assertEquals(3, groups.size(), "Expected number of file-groups");
    assertEquals(3, groups.stream().map(HoodieFileGroup::getPartitionPath).collect(Collectors.toSet()).size(), "Partitions must be different for file-groups");
    Set<String> fileIds = groups.stream().map(HoodieFileGroup::getFileGroupId).map(HoodieFileGroupId::getFileId).collect(Collectors.toSet());
    assertEquals(1, fileIds.size(), "File Id must be same");
    assertTrue(fileIds.contains(fileId), "Expected FileId");
    // Setup Pending compaction for all of these fileIds.
    List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
    List<FileSlice> fileSlices = rtView.getLatestFileSlices(partitionPath1).collect(Collectors.toList());
    partitionFileSlicesPairs.add(Pair.of(partitionPath1, fileSlices.get(0)));
    fileSlices = rtView.getLatestFileSlices(partitionPath2).collect(Collectors.toList());
    partitionFileSlicesPairs.add(Pair.of(partitionPath2, fileSlices.get(0)));
    fileSlices = rtView.getLatestFileSlices(partitionPath3).collect(Collectors.toList());
    partitionFileSlicesPairs.add(Pair.of(partitionPath3, fileSlices.get(0)));
    String compactionRequestedTime = "2";
    String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
    HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Option.empty(), Option.empty());
    // Create a Data-file for some of the partitions but this should be skipped by view
    new File(basePath + "/" + partitionPath1 + "/" + compactDataFileName).createNewFile();
    new File(basePath + "/" + partitionPath2 + "/" + compactDataFileName).createNewFile();
    HoodieInstant compactionInstant = new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionRequestedTime);
    HoodieInstant requested = HoodieTimeline.getCompactionRequestedInstant(compactionInstant.getTimestamp());
    metaClient.getActiveTimeline().saveToCompactionRequested(requested, TimelineMetadataUtils.serializeCompactionPlan(compactionPlan));
    metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(requested);
    // Fake delta-ingestion after compaction-requested
    String deltaInstantTime4 = "4";
    String deltaInstantTime5 = "6";
    String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0, TEST_WRITE_TOKEN);
    String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1, TEST_WRITE_TOKEN);
    new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
    new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath2 + "/" + fileName4).createNewFile();
    new File(basePath + "/" + partitionPath3 + "/" + fileName3).createNewFile();
    new File(basePath + "/" + partitionPath3 + "/" + fileName4).createNewFile();
    HoodieInstant deltaInstant4 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime4);
    HoodieInstant deltaInstant5 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime5);
    saveAsComplete(commitTimeline, deltaInstant4, Option.empty());
    saveAsComplete(commitTimeline, deltaInstant5, Option.empty());
    refreshFsView();
    // Test Data Files
    List<HoodieBaseFile> dataFiles = roView.getAllBaseFiles(partitionPath1).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "One data-file is expected as there is only one file-group");
    assertEquals("1", dataFiles.get(0).getCommitTime(), "Expect only valid commit");
    dataFiles = roView.getAllBaseFiles(partitionPath2).collect(Collectors.toList());
    assertEquals(1, dataFiles.size(), "One data-file is expected as there is only one file-group");
    assertEquals("1", dataFiles.get(0).getCommitTime(), "Expect only valid commit");
    // Merge API Tests
    Arrays.asList(partitionPath1, partitionPath2, partitionPath3).forEach(partitionPath -> {
        List<FileSlice> fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
        assertEquals(1, fileSliceList.size(), "Expect file-slice to be merged");
        FileSlice fileSlice = fileSliceList.get(0);
        assertEquals(fileId, fileSlice.getFileId());
        assertEquals(dataFileName, fileSlice.getBaseFile().get().getFileName(), "Data file must be present");
        assertEquals(instantTime1, fileSlice.getBaseInstantTime(), "Base Instant of penultimate file-slice must be base instant");
        List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
        assertEquals(3, logFiles.size(), "Log files must include those after compaction request");
        assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
        assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
        assertEquals(fileName1, logFiles.get(2).getFileName(), "Log File Order check");
        fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true).collect(Collectors.toList());
        assertEquals(1, fileSliceList.size(), "Expect only one file-id");
        fileSlice = fileSliceList.get(0);
        assertEquals(fileId, fileSlice.getFileId());
        assertFalse(fileSlice.getBaseFile().isPresent(), "No data-file expected in latest file-slice");
        assertEquals(compactionRequestedTime, fileSlice.getBaseInstantTime(), "Compaction requested instant must be base instant");
        logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
        assertEquals(2, logFiles.size(), "Log files must include only those after compaction request");
        assertEquals(fileName4, logFiles.get(0).getFileName(), "Log File Order check");
        assertEquals(fileName3, logFiles.get(1).getFileName(), "Log File Order check");
        // Check getLatestFileSlicesBeforeOrOn excluding fileIds in pending compaction
        fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, false).collect(Collectors.toList());
        assertEquals(0, fileSliceList.size(), "Expect empty list as file-id is in pending compaction");
    });
    assertEquals(3, fsView.getPendingCompactionOperations().count());
    Set<String> partitionsInCompaction = fsView.getPendingCompactionOperations().map(Pair::getValue).map(CompactionOperation::getPartitionPath).collect(Collectors.toSet());
    assertEquals(3, partitionsInCompaction.size());
    assertTrue(partitionsInCompaction.contains(partitionPath1));
    assertTrue(partitionsInCompaction.contains(partitionPath2));
    assertTrue(partitionsInCompaction.contains(partitionPath3));
    Set<String> fileIdsInCompaction = fsView.getPendingCompactionOperations().map(Pair::getValue).map(CompactionOperation::getFileId).collect(Collectors.toSet());
    assertEquals(1, fileIdsInCompaction.size());
    assertTrue(fileIdsInCompaction.contains(fileId));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodiePath(org.apache.hudi.avro.model.HoodiePath) BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) Date(java.util.Date) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) BaseFile(org.apache.hudi.common.model.BaseFile) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodiePath(org.apache.hudi.avro.model.HoodiePath) HoodieFSPermission(org.apache.hudi.avro.model.HoodieFSPermission) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) FileSlice(org.apache.hudi.common.model.FileSlice) IndexWriter(org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) CommitUtils(org.apache.hudi.common.util.CommitUtils) FsAction(org.apache.hadoop.fs.permission.FsAction) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

CompactionOperation (org.apache.hudi.common.model.CompactionOperation)19 IOException (java.io.IOException)16 List (java.util.List)14 CompactionUtils (org.apache.hudi.common.util.CompactionUtils)14 Option (org.apache.hudi.common.util.Option)14 Set (java.util.Set)13 Pair (org.apache.hudi.common.util.collection.Pair)13 LogManager (org.apache.log4j.LogManager)13 Logger (org.apache.log4j.Logger)13 ArrayList (java.util.ArrayList)12 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)12 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)12 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)12 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)12 Path (org.apache.hadoop.fs.Path)11 FSUtils (org.apache.hudi.common.fs.FSUtils)11 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)11 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)11 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)11 Map (java.util.Map)10