Search in sources :

Example 6 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class IncrementalTimelineSyncFileSystemView method addPendingCompactionInstant.

/**
 * Add newly found compaction instant.
 *
 * @param timeline Hoodie Timeline
 * @param instant Compaction Instant
 */
private void addPendingCompactionInstant(HoodieTimeline timeline, HoodieInstant instant) throws IOException {
    LOG.info("Syncing pending compaction instant (" + instant + ")");
    HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(metaClient, instant.getTimestamp());
    List<Pair<String, CompactionOperation>> pendingOps = CompactionUtils.getPendingCompactionOperations(instant, compactionPlan).map(p -> Pair.of(p.getValue().getKey(), CompactionOperation.convertFromAvroRecordInstance(p.getValue().getValue()))).collect(Collectors.toList());
    // First, update Pending compaction instants
    addPendingCompactionOperations(pendingOps.stream());
    Map<String, List<Pair<String, HoodieFileGroup>>> partitionToFileGroups = pendingOps.stream().map(opPair -> {
        String compactionInstantTime = opPair.getKey();
        HoodieFileGroup fileGroup = new HoodieFileGroup(opPair.getValue().getFileGroupId(), timeline);
        fileGroup.addNewFileSliceAtInstant(compactionInstantTime);
        return Pair.of(compactionInstantTime, fileGroup);
    }).collect(Collectors.groupingBy(x -> x.getValue().getPartitionPath()));
    partitionToFileGroups.entrySet().forEach(entry -> {
        if (isPartitionAvailableInStore(entry.getKey())) {
            applyDeltaFileSlicesToPartitionView(entry.getKey(), entry.getValue().stream().map(Pair::getValue).collect(Collectors.toList()), DeltaApplyMode.ADD);
        }
    });
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) TimelineDiffHelper(org.apache.hudi.common.table.timeline.TimelineDiffHelper) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Set(java.util.Set) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) TimelineDiffResult(org.apache.hudi.common.table.timeline.TimelineDiffHelper.TimelineDiffResult) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) List(java.util.List) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Pair(org.apache.hudi.common.util.collection.Pair)

Example 7 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionUtils method getCompactionPlan.

public static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant) throws IOException {
    CompactionPlanMigrator migrator = new CompactionPlanMigrator(metaClient);
    HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeCompactionPlan(metaClient.getActiveTimeline().readCompactionPlanAsBytes(HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
    return migrator.upgradeToLatest(compactionPlan, compactionPlan.getVersion());
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionPlanMigrator(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionPlanMigrator)

Example 8 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionCommitSink method commitIfNecessary.

/**
 * Condition to commit: the commit buffer has equal size with the compaction plan operations
 * and all the compact commit event {@link CompactionCommitEvent} has the same compaction instant time.
 *
 * @param instant Compaction commit instant time
 * @param events  Commit events ever received for the instant
 */
private void commitIfNecessary(String instant, Collection<CompactionCommitEvent> events) throws IOException {
    HoodieCompactionPlan compactionPlan = compactionPlanCache.computeIfAbsent(instant, k -> {
        try {
            return CompactionUtils.getCompactionPlan(this.writeClient.getHoodieTable().getMetaClient(), instant);
        } catch (IOException e) {
            throw new HoodieException(e);
        }
    });
    boolean isReady = compactionPlan.getOperations().size() == events.size();
    if (!isReady) {
        return;
    }
    try {
        doCommit(instant, events);
    } catch (Throwable throwable) {
        // make it fail-safe
        LOG.error("Error while committing compaction instant: " + instant, throwable);
    } finally {
        // reset the status
        reset(instant);
    }
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 9 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionTestUtils method setupAndValidateCompactionOperations.

public static Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> setupAndValidateCompactionOperations(HoodieTableMetaClient metaClient, boolean inflight, int numEntriesInPlan1, int numEntriesInPlan2, int numEntriesInPlan3, int numEntriesInPlan4) throws IOException {
    HoodieCompactionPlan plan1 = createCompactionPlan(metaClient, "000", "001", numEntriesInPlan1, true, true);
    HoodieCompactionPlan plan2 = createCompactionPlan(metaClient, "002", "003", numEntriesInPlan2, false, true);
    HoodieCompactionPlan plan3 = createCompactionPlan(metaClient, "004", "005", numEntriesInPlan3, true, false);
    HoodieCompactionPlan plan4 = createCompactionPlan(metaClient, "006", "007", numEntriesInPlan4, false, false);
    if (inflight) {
        scheduleInflightCompaction(metaClient, "001", plan1);
        scheduleInflightCompaction(metaClient, "003", plan2);
        scheduleInflightCompaction(metaClient, "005", plan3);
        scheduleInflightCompaction(metaClient, "007", plan4);
    } else {
        scheduleCompaction(metaClient, "001", plan1);
        scheduleCompaction(metaClient, "003", plan2);
        scheduleCompaction(metaClient, "005", plan3);
        scheduleCompaction(metaClient, "007", plan4);
    }
    createDeltaCommit(metaClient, "000");
    createDeltaCommit(metaClient, "002");
    createDeltaCommit(metaClient, "004");
    createDeltaCommit(metaClient, "006");
    Map<String, String> baseInstantsToCompaction = new HashMap<String, String>() {

        {
            put("000", "001");
            put("002", "003");
            put("004", "005");
            put("006", "007");
        }
    };
    List<Integer> expectedNumEntries = Arrays.asList(numEntriesInPlan1, numEntriesInPlan2, numEntriesInPlan3, numEntriesInPlan4);
    List<HoodieCompactionPlan> plans = CollectionUtils.createImmutableList(plan1, plan2, plan3, plan4);
    IntStream.range(0, 4).boxed().forEach(idx -> {
        if (expectedNumEntries.get(idx) > 0) {
            assertEquals(expectedNumEntries.get(idx).longValue(), plans.get(idx).getOperations().size(), "check if plan " + idx + " has exp entries");
        } else {
            assertNull(plans.get(idx).getOperations(), "Plan " + idx + " has null ops");
        }
    });
    metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(metaClient.getBasePath()).setLoadActiveTimelineOnLoad(true).build();
    Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> pendingCompactionMap = CompactionUtils.getAllPendingCompactionOperations(metaClient);
    Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> expPendingCompactionMap = generateExpectedCompactionOperations(Arrays.asList(plan1, plan2, plan3, plan4), baseInstantsToCompaction);
    // Ensure Compaction operations are fine.
    assertEquals(expPendingCompactionMap, pendingCompactionMap);
    return expPendingCompactionMap;
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HashMap(java.util.HashMap) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Pair(org.apache.hudi.common.util.collection.Pair)

Example 10 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class TestHoodieHFileInputFormat method createCompactionFile.

private File createCompactionFile(java.nio.file.Path basePath, String commitTime) throws IOException {
    File file = basePath.resolve(".hoodie").resolve(HoodieTimeline.makeRequestedCompactionFileName(commitTime)).toFile();
    assertTrue(file.createNewFile());
    FileOutputStream os = new FileOutputStream(file);
    try {
        HoodieCompactionPlan compactionPlan = HoodieCompactionPlan.newBuilder().setVersion(2).build();
        // Write empty commit metadata
        os.write(TimelineMetadataUtils.serializeCompactionPlan(compactionPlan).get());
        return file;
    } finally {
        os.close();
    }
}
Also used : HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Aggregations

HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)41 IOException (java.io.IOException)20 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)18 List (java.util.List)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)16 Pair (org.apache.hudi.common.util.collection.Pair)16 ArrayList (java.util.ArrayList)15 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)15 Path (org.apache.hadoop.fs.Path)14 HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)13 Option (org.apache.hudi.common.util.Option)13 LogManager (org.apache.log4j.LogManager)13 Logger (org.apache.log4j.Logger)13 Map (java.util.Map)12 Set (java.util.Set)12 Collectors (java.util.stream.Collectors)12 CompactionOperation (org.apache.hudi.common.model.CompactionOperation)12 CompactionUtils (org.apache.hudi.common.util.CompactionUtils)12 FileSlice (org.apache.hudi.common.model.FileSlice)11 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)11