Search in sources :

Example 26 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionAdminClient method validateCompactionPlan.

/**
 * Validate all compaction operations in a compaction plan. Verifies the file-slices are consistent with corresponding
 * compaction operations.
 *
 * @param metaClient Hoodie Table Meta Client
 * @param compactionInstant Compaction Instant
 */
public List<ValidationOpResult> validateCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism) throws IOException {
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    if (plan.getOperations() != null) {
        List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
        context.setJobStatus(this.getClass().getSimpleName(), "Validate compaction operations");
        return context.map(ops, op -> {
            try {
                return validateCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView));
            } catch (IOException e) {
                throw new HoodieIOException(e.getMessage(), e);
            }
        }, parallelism);
    }
    return new ArrayList<>();
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 27 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionAdminClient method getRenamingActionsForUnschedulingCompactionPlan.

/**
 * Generate renaming actions for unscheduling a pending compaction plan. NOTE: Can only be used safely when no writer
 * (ingestion/compaction) is running.
 *
 * @param metaClient Hoodie Table MetaClient
 * @param compactionInstant Compaction Instant to be unscheduled
 * @param fsViewOpt Cached File System View
 * @param skipValidation Skip Validation
 * @return list of pairs of log-files (old, new) and for each pair, rename must be done to successfully unschedule
 *         compaction.
 */
public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism, Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
    HoodieTableFileSystemView fsView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    if (plan.getOperations() != null) {
        LOG.info("Number of Compaction Operations :" + plan.getOperations().size() + " for instant :" + compactionInstant);
        List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
        context.setJobStatus(this.getClass().getSimpleName(), "Generate compaction unscheduling operations");
        return context.flatMap(ops, op -> {
            try {
                return getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView), skipValidation).stream();
            } catch (IOException ioe) {
                throw new HoodieIOException(ioe.getMessage(), ioe);
            } catch (CompactionValidationException ve) {
                throw new HoodieException(ve);
            }
        }, parallelism);
    }
    LOG.warn("No operations for compaction instant : " + compactionInstant);
    return new ArrayList<>();
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 28 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class MetadataConversionUtils method createMetaWrapper.

public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInstant, HoodieTableMetaClient metaClient) throws IOException {
    HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
    archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
    archivedMetaWrapper.setActionState(hoodieInstant.getState().name());
    switch(hoodieInstant.getAction()) {
        case HoodieTimeline.CLEAN_ACTION:
            {
                if (hoodieInstant.isCompleted()) {
                    archivedMetaWrapper.setHoodieCleanMetadata(CleanerUtils.getCleanerMetadata(metaClient, hoodieInstant));
                } else {
                    archivedMetaWrapper.setHoodieCleanerPlan(CleanerUtils.getCleanerPlan(metaClient, hoodieInstant));
                }
                archivedMetaWrapper.setActionType(ActionType.clean.name());
                break;
            }
        case HoodieTimeline.COMMIT_ACTION:
            {
                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class);
                archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(commitMetadata));
                archivedMetaWrapper.setActionType(ActionType.commit.name());
                break;
            }
        case HoodieTimeline.DELTA_COMMIT_ACTION:
            {
                HoodieCommitMetadata deltaCommitMetadata = HoodieCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class);
                archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(deltaCommitMetadata));
                archivedMetaWrapper.setActionType(ActionType.deltacommit.name());
                break;
            }
        case HoodieTimeline.REPLACE_COMMIT_ACTION:
            {
                if (hoodieInstant.isCompleted()) {
                    HoodieReplaceCommitMetadata replaceCommitMetadata = HoodieReplaceCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieReplaceCommitMetadata.class);
                    archivedMetaWrapper.setHoodieReplaceCommitMetadata(ReplaceArchivalHelper.convertReplaceCommitMetadata(replaceCommitMetadata));
                } else if (hoodieInstant.isInflight()) {
                    // inflight replacecommit files have the same meta data body as HoodieCommitMetadata
                    // so we could re-use it without further creating an inflight extension.
                    // Or inflight replacecommit files are empty under clustering circumstance
                    Option<HoodieCommitMetadata> inflightCommitMetadata = getInflightReplaceMetadata(metaClient, hoodieInstant);
                    if (inflightCommitMetadata.isPresent()) {
                        archivedMetaWrapper.setHoodieInflightReplaceMetadata(convertCommitMetadata(inflightCommitMetadata.get()));
                    }
                } else {
                    // we may have cases with empty HoodieRequestedReplaceMetadata e.g. insert_overwrite_table or insert_overwrite
                    // without clustering. However, we should revisit the requested commit file standardization
                    Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata = getRequestedReplaceMetadata(metaClient, hoodieInstant);
                    if (requestedReplaceMetadata.isPresent()) {
                        archivedMetaWrapper.setHoodieRequestedReplaceMetadata(requestedReplaceMetadata.get());
                    }
                }
                archivedMetaWrapper.setActionType(ActionType.replacecommit.name());
                break;
            }
        case HoodieTimeline.ROLLBACK_ACTION:
            {
                if (hoodieInstant.isCompleted()) {
                    archivedMetaWrapper.setHoodieRollbackMetadata(TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class));
                }
                archivedMetaWrapper.setActionType(ActionType.rollback.name());
                break;
            }
        case HoodieTimeline.SAVEPOINT_ACTION:
            {
                archivedMetaWrapper.setHoodieSavePointMetadata(TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class));
                archivedMetaWrapper.setActionType(ActionType.savepoint.name());
                break;
            }
        case HoodieTimeline.COMPACTION_ACTION:
            {
                HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, hoodieInstant.getTimestamp());
                archivedMetaWrapper.setHoodieCompactionPlan(plan);
                archivedMetaWrapper.setActionType(ActionType.compaction.name());
                break;
            }
        default:
            {
                throw new UnsupportedOperationException("Action not fully supported yet");
            }
    }
    return archivedMetaWrapper;
}
Also used : HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 29 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionV1MigrationHandler method downgradeFrom.

@Override
public HoodieCompactionPlan downgradeFrom(HoodieCompactionPlan input) {
    ValidationUtils.checkArgument(input.getVersion() == 2, "Input version is " + input.getVersion() + ". Must be 2");
    HoodieCompactionPlan compactionPlan = new HoodieCompactionPlan();
    final Path basePath = new Path(metaClient.getBasePath());
    List<HoodieCompactionOperation> v1CompactionOperationList = new ArrayList<>();
    if (null != input.getOperations()) {
        v1CompactionOperationList = input.getOperations().stream().map(inp -> HoodieCompactionOperation.newBuilder().setBaseInstantTime(inp.getBaseInstantTime()).setFileId(inp.getFileId()).setPartitionPath(inp.getPartitionPath()).setMetrics(inp.getMetrics()).setDataFilePath(convertToV1Path(basePath, inp.getPartitionPath(), inp.getDataFilePath())).setDeltaFilePaths(inp.getDeltaFilePaths().stream().map(s -> convertToV1Path(basePath, inp.getPartitionPath(), s)).collect(Collectors.toList())).build()).collect(Collectors.toList());
    }
    compactionPlan.setOperations(v1CompactionOperationList);
    compactionPlan.setExtraMetadata(input.getExtraMetadata());
    compactionPlan.setVersion(getManagedVersion());
    return compactionPlan;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList)

Example 30 with HoodieCompactionPlan

use of org.apache.hudi.avro.model.HoodieCompactionPlan in project hudi by apache.

the class CompactionV2MigrationHandler method upgradeFrom.

@Override
public HoodieCompactionPlan upgradeFrom(HoodieCompactionPlan input) {
    ValidationUtils.checkArgument(input.getVersion() == 1, "Input version is " + input.getVersion() + ". Must be 1");
    HoodieCompactionPlan compactionPlan = new HoodieCompactionPlan();
    List<HoodieCompactionOperation> v2CompactionOperationList = new ArrayList<>();
    if (null != input.getOperations()) {
        v2CompactionOperationList = input.getOperations().stream().map(inp -> HoodieCompactionOperation.newBuilder().setBaseInstantTime(inp.getBaseInstantTime()).setFileId(inp.getFileId()).setPartitionPath(inp.getPartitionPath()).setMetrics(inp.getMetrics()).setDataFilePath(inp.getDataFilePath() == null ? null : new Path(inp.getDataFilePath()).getName()).setDeltaFilePaths(inp.getDeltaFilePaths().stream().map(s -> new Path(s).getName()).collect(Collectors.toList())).build()).collect(Collectors.toList());
    }
    compactionPlan.setOperations(v2CompactionOperationList);
    compactionPlan.setExtraMetadata(input.getExtraMetadata());
    compactionPlan.setVersion(getManagedVersion());
    return compactionPlan;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList)

Aggregations

HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)41 IOException (java.io.IOException)20 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)18 List (java.util.List)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)16 Pair (org.apache.hudi.common.util.collection.Pair)16 ArrayList (java.util.ArrayList)15 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)15 Path (org.apache.hadoop.fs.Path)14 HoodieCompactionOperation (org.apache.hudi.avro.model.HoodieCompactionOperation)13 Option (org.apache.hudi.common.util.Option)13 LogManager (org.apache.log4j.LogManager)13 Logger (org.apache.log4j.Logger)13 Map (java.util.Map)12 Set (java.util.Set)12 Collectors (java.util.stream.Collectors)12 CompactionOperation (org.apache.hudi.common.model.CompactionOperation)12 CompactionUtils (org.apache.hudi.common.util.CompactionUtils)12 FileSlice (org.apache.hudi.common.model.FileSlice)11 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)11