Search in sources :

Example 16 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class TestIncrementalFSViewSync method areViewsConsistent.

/**
 * Check for equality of views.
 *
 * @param view1 View1
 * @param view2 View2
 */
private void areViewsConsistent(SyncableFileSystemView view1, SyncableFileSystemView view2, long expectedTotalFileSlices) {
    // Timeline check
    assertEquals(view1.getLastInstant(), view2.getLastInstant());
    // View Checks
    Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap1 = partitions.stream().flatMap(view1::getAllFileGroups).collect(Collectors.toMap(HoodieFileGroup::getFileGroupId, fg -> fg));
    Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap2 = partitions.stream().flatMap(view2::getAllFileGroups).collect(Collectors.toMap(HoodieFileGroup::getFileGroupId, fg -> fg));
    assertEquals(fileGroupsMap1.keySet(), fileGroupsMap2.keySet());
    long gotSlicesCount = fileGroupsMap1.keySet().stream().map(k -> Pair.of(fileGroupsMap1.get(k), fileGroupsMap2.get(k))).mapToLong(e -> {
        HoodieFileGroup fg1 = e.getKey();
        HoodieFileGroup fg2 = e.getValue();
        assertEquals(fg1.getFileGroupId(), fg2.getFileGroupId());
        List<FileSlice> slices1 = fg1.getAllRawFileSlices().collect(Collectors.toList());
        List<FileSlice> slices2 = fg2.getAllRawFileSlices().collect(Collectors.toList());
        assertEquals(slices1.size(), slices2.size());
        IntStream.range(0, slices1.size()).mapToObj(idx -> Pair.of(slices1.get(idx), slices2.get(idx))).forEach(e2 -> {
            FileSlice slice1 = e2.getKey();
            FileSlice slice2 = e2.getValue();
            assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime());
            assertEquals(slice1.getFileId(), slice2.getFileId());
            assertEquals(slice1.getBaseFile().isPresent(), slice2.getBaseFile().isPresent());
            if (slice1.getBaseFile().isPresent()) {
                HoodieBaseFile df1 = slice1.getBaseFile().get();
                HoodieBaseFile df2 = slice2.getBaseFile().get();
                assertEquals(df1.getCommitTime(), df2.getCommitTime());
                assertEquals(df1.getFileId(), df2.getFileId());
                assertEquals(df1.getFileName(), df2.getFileName());
                assertEquals(Path.getPathWithoutSchemeAndAuthority(new Path(df1.getPath())), Path.getPathWithoutSchemeAndAuthority(new Path(df2.getPath())));
            }
            List<Path> logPaths1 = slice1.getLogFiles().map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
            List<Path> logPaths2 = slice2.getLogFiles().map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
            assertEquals(logPaths1, logPaths2);
        });
        return slices1.size();
    }).sum();
    assertEquals(expectedTotalFileSlices, gotSlicesCount);
    // Pending Compaction Operations Check
    Set<Pair<String, CompactionOperation>> ops1 = view1.getPendingCompactionOperations().collect(Collectors.toSet());
    Set<Pair<String, CompactionOperation>> ops2 = view2.getPendingCompactionOperations().collect(Collectors.toSet());
    assertEquals(ops1, ops2);
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) FileSlice(org.apache.hudi.common.model.FileSlice) List(java.util.List) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Pair(org.apache.hudi.common.util.collection.Pair)

Example 17 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class ITTestCompactionCommand method testUnscheduleCompactFile.

/**
 * This function mainly tests the workflow of 'compaction unscheduleFileId' command.
 * The real test of {@link org.apache.hudi.client.CompactionAdminClient#unscheduleCompactionFileId}
 * is {@link TestCompactionAdminClient#testUnscheduleCompactionFileId}.
 */
@Test
public void testUnscheduleCompactFile() throws IOException {
    int numEntriesPerInstant = 10;
    CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
    CompactionOperation op = CompactionOperation.convertFromAvroRecordInstance(CompactionUtils.getCompactionPlan(metaClient, "001").getOperations().stream().findFirst().get());
    CommandResult cr = getShell().executeCommand(String.format("compaction unscheduleFileId --fileId %s --partitionPath %s --sparkMaster %s", op.getFileGroupId().getFileId(), op.getFileGroupId().getPartitionPath(), "local"));
    assertAll("Command run failed", () -> assertTrue(cr.isSuccess()), () -> assertTrue(removeNonWordAndStripSpace(cr.getResult().toString()).contains("true")), () -> assertFalse(removeNonWordAndStripSpace(cr.getResult().toString()).contains("false")));
}
Also used : CompactionOperation(org.apache.hudi.common.model.CompactionOperation) CommandResult(org.springframework.shell.core.CommandResult) AbstractShellIntegrationTest(org.apache.hudi.cli.testutils.AbstractShellIntegrationTest) Test(org.junit.jupiter.api.Test)

Example 18 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class CompactionPlanOperator method scheduleCompaction.

private void scheduleCompaction(HoodieFlinkTable<?> table, long checkpointId) throws IOException {
    // the first instant takes the highest priority.
    Option<HoodieInstant> firstRequested = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED).firstInstant();
    if (!firstRequested.isPresent()) {
        // do nothing.
        LOG.info("No compaction plan for checkpoint " + checkpointId);
        return;
    }
    String compactionInstantTime = firstRequested.get().getTimestamp();
    // generate compaction plan
    // should support configurable commit metadata
    HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
    if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) {
        // do nothing.
        LOG.info("Empty compaction plan for instant " + compactionInstantTime);
    } else {
        HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
        // Mark instant as compaction inflight
        table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
        table.getMetaClient().reloadActiveTimeline();
        List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
        LOG.info("Execute compaction plan for instant {} as {} file groups", compactionInstantTime, operations.size());
        for (CompactionOperation operation : operations) {
            output.collect(new StreamRecord<>(new CompactionPlanEvent(compactionInstantTime, operation)));
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Configuration(org.apache.flink.configuration.Configuration) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkTables(org.apache.hudi.util.FlinkTables) Output(org.apache.flink.streaming.api.operators.Output) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) CompactionUtil(org.apache.hudi.util.CompactionUtil) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation)

Example 19 with CompactionOperation

use of org.apache.hudi.common.model.CompactionOperation in project hudi by apache.

the class CompactionPlanSourceFunction method run.

@Override
public void run(SourceContext sourceContext) throws Exception {
    List<CompactionOperation> operations = this.compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
    LOG.info("CompactionPlanFunction compacting " + operations + " files");
    for (CompactionOperation operation : operations) {
        sourceContext.collect(new CompactionPlanEvent(compactionInstantTime, operation));
    }
}
Also used : CompactionOperation(org.apache.hudi.common.model.CompactionOperation)

Aggregations

CompactionOperation (org.apache.hudi.common.model.CompactionOperation)19 IOException (java.io.IOException)16 List (java.util.List)14 CompactionUtils (org.apache.hudi.common.util.CompactionUtils)14 Option (org.apache.hudi.common.util.Option)14 Set (java.util.Set)13 Pair (org.apache.hudi.common.util.collection.Pair)13 LogManager (org.apache.log4j.LogManager)13 Logger (org.apache.log4j.Logger)13 ArrayList (java.util.ArrayList)12 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)12 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)12 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)12 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)12 Path (org.apache.hadoop.fs.Path)11 FSUtils (org.apache.hudi.common.fs.FSUtils)11 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)11 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)11 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)11 Map (java.util.Map)10