Search in sources :

Example 6 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class CompactionAdminClient method unscheduleCompactionFileId.

/**
 * Remove a fileId from pending compaction. Removes the associated compaction operation and rename delta-files that
 * were generated for that file-id after the compaction operation was scheduled.
 *
 * This operation MUST be executed with compactions and writer turned OFF.
 *
 * @param fgId FileGroupId to be unscheduled
 * @param skipValidation Skip validation
 * @param dryRun Dry Run Mode
 */
public List<RenameOpResult> unscheduleCompactionFileId(HoodieFileGroupId fgId, boolean skipValidation, boolean dryRun) throws Exception {
    HoodieTableMetaClient metaClient = createMetaClient(false);
    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = getRenamingActionsForUnschedulingCompactionForFileId(metaClient, fgId, Option.empty(), skipValidation);
    List<RenameOpResult> res = runRenamingOps(metaClient, renameActions, 1, dryRun);
    if (!dryRun && !res.isEmpty() && res.get(0).isExecuted() && res.get(0).isSuccess()) {
        // Ready to remove this file-Id from compaction request
        Pair<String, HoodieCompactionOperation> compactionOperationWithInstant = CompactionUtils.getAllPendingCompactionOperations(metaClient).get(fgId);
        HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, compactionOperationWithInstant.getKey());
        List<HoodieCompactionOperation> newOps = plan.getOperations().stream().filter(op -> (!op.getFileId().equals(fgId.getFileId())) && (!op.getPartitionPath().equals(fgId.getPartitionPath()))).collect(Collectors.toList());
        HoodieCompactionPlan newPlan = HoodieCompactionPlan.newBuilder().setOperations(newOps).setExtraMetadata(plan.getExtraMetadata()).build();
        HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION, compactionOperationWithInstant.getLeft());
        Path inflightPath = new Path(metaClient.getMetaPath(), inflight.getFileName());
        if (metaClient.getFs().exists(inflightPath)) {
            // revert if in inflight state
            metaClient.getActiveTimeline().revertCompactionInflightToRequested(inflight);
        }
        // Overwrite compaction plan with updated info
        metaClient.getActiveTimeline().saveToCompactionRequested(new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, compactionOperationWithInstant.getLeft()), TimelineMetadataUtils.serializeCompactionPlan(newPlan), true);
    }
    return res;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) OperationResult(org.apache.hudi.table.action.compact.OperationResult) FileStatus(org.apache.hadoop.fs.FileStatus) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) Pair(org.apache.hudi.common.util.collection.Pair)

Example 7 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestCompactionUtils method testFileSliceCompactionOpEquality.

/**
 * Validates if generated compaction operation matches with input file slice and partition path.
 *
 * @param slice            File Slice
 * @param op               HoodieCompactionOperation
 * @param expPartitionPath Partition path
 */
private void testFileSliceCompactionOpEquality(FileSlice slice, HoodieCompactionOperation op, String expPartitionPath, int version) {
    assertEquals(expPartitionPath, op.getPartitionPath(), "Partition path is correct");
    assertEquals(slice.getBaseInstantTime(), op.getBaseInstantTime(), "Same base-instant");
    assertEquals(slice.getFileId(), op.getFileId(), "Same file-id");
    if (slice.getBaseFile().isPresent()) {
        HoodieBaseFile df = slice.getBaseFile().get();
        assertEquals(version == COMPACTION_METADATA_VERSION_1 ? df.getPath() : df.getFileName(), op.getDataFilePath(), "Same data-file");
    }
    List<String> paths = slice.getLogFiles().map(l -> l.getPath().toString()).collect(Collectors.toList());
    IntStream.range(0, paths.size()).boxed().forEach(idx -> assertEquals(version == COMPACTION_METADATA_VERSION_1 ? paths.get(idx) : new Path(paths.get(idx)).getName(), op.getDeltaFilePaths().get(idx), "Log File Index " + idx));
    assertEquals(METRICS, op.getMetrics(), "Metrics set");
}
Also used : IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) CompactionPlanMigrator(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionPlanMigrator) CompactionTestUtils.setupAndValidateCompactionOperations(org.apache.hudi.common.testutils.CompactionTestUtils.setupAndValidateCompactionOperations) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HashMap(java.util.HashMap) COMPACTION_METADATA_VERSION_1(org.apache.hudi.common.util.CompactionUtils.COMPACTION_METADATA_VERSION_1) LATEST_COMPACTION_METADATA_VERSION(org.apache.hudi.common.util.CompactionUtils.LATEST_COMPACTION_METADATA_VERSION) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CompactionTestUtils.createCompactionPlan(org.apache.hudi.common.testutils.CompactionTestUtils.createCompactionPlan) ValueSource(org.junit.jupiter.params.provider.ValueSource) DummyHoodieBaseFile(org.apache.hudi.common.testutils.CompactionTestUtils.DummyHoodieBaseFile) IOException(java.io.IOException) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionTestUtils.scheduleCompaction(org.apache.hudi.common.testutils.CompactionTestUtils.scheduleCompaction) DEFAULT_PARTITION_PATHS(org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS) Comparator(java.util.Comparator) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) DummyHoodieBaseFile(org.apache.hudi.common.testutils.CompactionTestUtils.DummyHoodieBaseFile) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile)

Example 8 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class BoundedIOCompactionStrategy method orderAndFilter.

@Override
public List<HoodieCompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig, List<HoodieCompactionOperation> operations, List<HoodieCompactionPlan> pendingCompactionPlans) {
    // Iterate through the operations in order and accept operations as long as we are within the
    // IO limit
    // Preserves the original ordering of compactions
    List<HoodieCompactionOperation> finalOperations = new ArrayList<>();
    long targetIORemaining = writeConfig.getTargetIOPerCompactionInMB();
    for (HoodieCompactionOperation op : operations) {
        long opIo = op.getMetrics().get(TOTAL_IO_MB).longValue();
        targetIORemaining -= opIo;
        finalOperations.add(op);
        if (targetIORemaining <= 0) {
            return finalOperations;
        }
    }
    return finalOperations;
}
Also used : HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList)

Example 9 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestCompactionUtils method testBuildFromFileSlice.

@Test
public void testBuildFromFileSlice() {
    String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
    // Empty File-Slice with no data and log files
    FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
    HoodieCompactionOperation op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], emptyFileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(emptyFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
    // File Slice with data-file but no log files
    FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
    noLogFileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
    op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(noLogFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
    // File Slice with no data-file but log files present
    FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
    noDataFileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
    noDataFileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
    op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(noDataFileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
    // File Slice with data-file and log files present
    FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
    fileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
    fileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
    fileSlice.addLogFile(new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
    op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], fileSlice, Option.of(metricsCaptureFn));
    testFileSliceCompactionOpEquality(fileSlice, op, DEFAULT_PARTITION_PATHS[0], LATEST_COMPACTION_METADATA_VERSION);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) DummyHoodieBaseFile(org.apache.hudi.common.testutils.CompactionTestUtils.DummyHoodieBaseFile) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 10 with HoodieCompactionOperation

use of org.apache.hudi.avro.model.HoodieCompactionOperation in project hudi by apache.

the class TestCompactionUtils method testCompactionTransformation.

@Test
public void testCompactionTransformation() {
    // check HoodieCompactionOperation <=> CompactionOperation transformation function
    Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan();
    HoodieCompactionPlan plan = inputAndPlan.getRight();
    List<HoodieCompactionOperation> originalOps = plan.getOperations();
    // Convert to CompactionOperation
    // Convert back to HoodieCompactionOperation and check for equality
    List<HoodieCompactionOperation> regeneratedOps = originalOps.stream().map(CompactionUtils::buildCompactionOperation).map(CompactionUtils::buildHoodieCompactionOperation).collect(Collectors.toList());
    assertTrue(originalOps.size() > 0, "Transformation did get tested");
    assertEquals(originalOps, regeneratedOps, "All fields set correctly in transformations");
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)21 ArrayList (java.util.ArrayList)16 List (java.util.List)13 HashMap (java.util.HashMap)11 Test (org.junit.jupiter.api.Test)10 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)9 FileSlice (org.apache.hudi.common.model.FileSlice)9 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)9 Map (java.util.Map)7 Collectors (java.util.stream.Collectors)7 Path (org.apache.hadoop.fs.Path)7 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)7 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)7 Pair (org.apache.hudi.common.util.collection.Pair)7 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)6 IOException (java.io.IOException)5 SimpleDateFormat (java.text.SimpleDateFormat)5 Date (java.util.Date)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)5