Search in sources :

Example 11 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class CompactionAdminClient method repairCompaction.

/**
 * Renames delta files to make file-slices consistent with the timeline as dictated by Hoodie metadata. Use when
 * compaction unschedule fails partially.
 *
 * This operation MUST be executed with compactions and writer turned OFF.
 *
 * @param compactionInstant Compaction Instant to be repaired
 * @param dryRun Dry Run Mode
 */
public List<RenameOpResult> repairCompaction(String compactionInstant, int parallelism, boolean dryRun) throws Exception {
    HoodieTableMetaClient metaClient = createMetaClient(false);
    List<ValidationOpResult> validationResults = validateCompactionPlan(metaClient, compactionInstant, parallelism);
    List<ValidationOpResult> failed = validationResults.stream().filter(v -> !v.isSuccess()).collect(Collectors.toList());
    if (failed.isEmpty()) {
        return new ArrayList<>();
    }
    final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = failed.stream().flatMap(v -> getRenamingActionsToAlignWithCompactionOperation(metaClient, compactionInstant, v.getOperation(), Option.of(fsView)).stream()).collect(Collectors.toList());
    return runRenamingOps(metaClient, renameActions, parallelism, dryRun);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) OperationResult(org.apache.hudi.table.action.compact.OperationResult) FileStatus(org.apache.hadoop.fs.FileStatus) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) ArrayList(java.util.ArrayList) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Pair(org.apache.hudi.common.util.collection.Pair)

Example 12 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class CompactionAdminClient method unscheduleCompactionFileId.

/**
 * Remove a fileId from pending compaction. Removes the associated compaction operation and rename delta-files that
 * were generated for that file-id after the compaction operation was scheduled.
 *
 * This operation MUST be executed with compactions and writer turned OFF.
 *
 * @param fgId FileGroupId to be unscheduled
 * @param skipValidation Skip validation
 * @param dryRun Dry Run Mode
 */
public List<RenameOpResult> unscheduleCompactionFileId(HoodieFileGroupId fgId, boolean skipValidation, boolean dryRun) throws Exception {
    HoodieTableMetaClient metaClient = createMetaClient(false);
    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = getRenamingActionsForUnschedulingCompactionForFileId(metaClient, fgId, Option.empty(), skipValidation);
    List<RenameOpResult> res = runRenamingOps(metaClient, renameActions, 1, dryRun);
    if (!dryRun && !res.isEmpty() && res.get(0).isExecuted() && res.get(0).isSuccess()) {
        // Ready to remove this file-Id from compaction request
        Pair<String, HoodieCompactionOperation> compactionOperationWithInstant = CompactionUtils.getAllPendingCompactionOperations(metaClient).get(fgId);
        HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, compactionOperationWithInstant.getKey());
        List<HoodieCompactionOperation> newOps = plan.getOperations().stream().filter(op -> (!op.getFileId().equals(fgId.getFileId())) && (!op.getPartitionPath().equals(fgId.getPartitionPath()))).collect(Collectors.toList());
        HoodieCompactionPlan newPlan = HoodieCompactionPlan.newBuilder().setOperations(newOps).setExtraMetadata(plan.getExtraMetadata()).build();
        HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION, compactionOperationWithInstant.getLeft());
        Path inflightPath = new Path(metaClient.getMetaPath(), inflight.getFileName());
        if (metaClient.getFs().exists(inflightPath)) {
            // revert if in inflight state
            metaClient.getActiveTimeline().revertCompactionInflightToRequested(inflight);
        }
        // Overwrite compaction plan with updated info
        metaClient.getActiveTimeline().saveToCompactionRequested(new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, compactionOperationWithInstant.getLeft()), TimelineMetadataUtils.serializeCompactionPlan(newPlan), true);
    }
    return res;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) OperationResult(org.apache.hudi.table.action.compact.OperationResult) FileStatus(org.apache.hadoop.fs.FileStatus) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) Pair(org.apache.hudi.common.util.collection.Pair)

Example 13 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class CompactionAdminClient method getRenamingActionsToAlignWithCompactionOperation.

/**
 * Get Renaming actions to ensure the log-files of merged file-slices is aligned with compaction operation. This
 * method is used to recover from failures during unschedule compaction operations.
 *
 * @param metaClient Hoodie Table Meta Client
 * @param compactionInstant Compaction Instant
 * @param op Compaction Operation
 * @param fsViewOpt File System View
 */
protected static List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsToAlignWithCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation op, Option<HoodieTableFileSystemView> fsViewOpt) {
    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    HoodieInstant lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant().get();
    FileSlice merged = fileSystemView.getLatestMergedFileSlicesBeforeOrOn(op.getPartitionPath(), lastInstant.getTimestamp()).filter(fs -> fs.getFileId().equals(op.getFileId())).findFirst().get();
    final int maxVersion = op.getDeltaFileNames().stream().map(lf -> FSUtils.getFileVersionFromLog(new Path(lf))).reduce((x, y) -> x > y ? x : y).orElse(0);
    List<HoodieLogFile> logFilesToBeMoved = merged.getLogFiles().filter(lf -> lf.getLogVersion() > maxVersion).collect(Collectors.toList());
    return logFilesToBeMoved.stream().map(lf -> {
        ValidationUtils.checkArgument(lf.getLogVersion() - maxVersion > 0, "Expect new log version to be sane");
        HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(), FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()), compactionInstant, lf.getLogVersion() - maxVersion, HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
        return Pair.of(lf, newLogFile);
    }).collect(Collectors.toList());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) OperationResult(org.apache.hudi.table.action.compact.OperationResult) FileStatus(org.apache.hadoop.fs.FileStatus) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 14 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class ITTestCompactionCommand method testRepairCompaction.

/**
 * This function mainly tests the workflow of 'compaction repair' command.
 * The real test of {@link org.apache.hudi.client.CompactionAdminClient#repairCompaction}
 * is {@link TestCompactionAdminClient#testRepairCompactionPlan}.
 */
@Test
public void testRepairCompaction() throws Exception {
    int numEntriesPerInstant = 10;
    String compactionInstant = "001";
    CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
    metaClient.reloadActiveTimeline();
    CompactionAdminClient client = new CompactionAdminClient(new HoodieSparkEngineContext(jsc), metaClient.getBasePath());
    List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
    renameFiles.forEach(lfPair -> {
        try {
            metaClient.getFs().rename(lfPair.getLeft().getPath(), lfPair.getRight().getPath());
        } catch (IOException e) {
            throw new HoodieIOException(e.getMessage(), e);
        }
    });
    client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
    CommandResult cr = getShell().executeCommand(String.format("compaction repair --instant %s --sparkMaster %s", compactionInstant, "local"));
    // All Executes is succeeded, result contains true and has no false
    // Expected:
    // ║ File Id │ Source File Path │ Destination File Path │ Rename Executed? │ Rename Succeeded? │ Error ║
    // ║ *       │     *            │        *              │    true          │     true          │       ║
    assertAll("Command run failed", () -> assertTrue(cr.isSuccess()), () -> assertTrue(removeNonWordAndStripSpace(cr.getResult().toString()).contains("true")), () -> assertFalse(removeNonWordAndStripSpace(cr.getResult().toString()).contains("false")));
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) TestCompactionAdminClient(org.apache.hudi.client.TestCompactionAdminClient) CompactionAdminClient(org.apache.hudi.client.CompactionAdminClient) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair) CommandResult(org.springframework.shell.core.CommandResult) AbstractShellIntegrationTest(org.apache.hudi.cli.testutils.AbstractShellIntegrationTest) Test(org.junit.jupiter.api.Test)

Example 15 with Pair

use of org.apache.hudi.common.util.collection.Pair in project hudi by apache.

the class AsyncCompactService method startService.

/**
 * Start Compaction Service.
 */
@Override
protected Pair<CompletableFuture, ExecutorService> startService() {
    ExecutorService executor = Executors.newFixedThreadPool(maxConcurrentCompaction, r -> {
        Thread t = new Thread(r, "async_compact_thread");
        t.setDaemon(isRunInDaemonMode());
        return t;
    });
    return Pair.of(CompletableFuture.allOf(IntStream.range(0, maxConcurrentCompaction).mapToObj(i -> CompletableFuture.supplyAsync(() -> {
        try {
            // Set Compactor Pool Name for allowing users to prioritize compaction
            LOG.info("Setting pool name for compaction to " + COMPACT_POOL_NAME);
            context.setProperty(EngineProperty.COMPACTION_POOL_NAME, COMPACT_POOL_NAME);
            while (!isShutdownRequested()) {
                final HoodieInstant instant = fetchNextAsyncServiceInstant();
                if (null != instant) {
                    LOG.info("Starting Compaction for instant " + instant);
                    compactor.compact(instant);
                    LOG.info("Finished Compaction for instant " + instant);
                }
            }
            LOG.info("Compactor shutting down properly!!");
        } catch (InterruptedException ie) {
            hasError = true;
            LOG.warn("Compactor executor thread got interrupted exception. Stopping", ie);
        } catch (IOException e) {
            hasError = true;
            LOG.error("Compactor executor failed due to IOException", e);
            throw new HoodieIOException(e.getMessage(), e);
        } catch (Exception e) {
            hasError = true;
            LOG.error("Compactor executor failed", e);
            throw e;
        }
        return true;
    }, executor)).toArray(CompletableFuture[]::new)), executor);
}
Also used : IntStream(java.util.stream.IntStream) BaseHoodieWriteClient(org.apache.hudi.client.BaseHoodieWriteClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) IOException(java.io.IOException) CompletableFuture(java.util.concurrent.CompletableFuture) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) BaseCompactor(org.apache.hudi.client.BaseCompactor) Executors(java.util.concurrent.Executors) Logger(org.apache.log4j.Logger) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) EngineProperty(org.apache.hudi.common.engine.EngineProperty) ExecutorService(java.util.concurrent.ExecutorService) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieIOException(org.apache.hudi.exception.HoodieIOException) ExecutorService(java.util.concurrent.ExecutorService) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

Pair (org.apache.hudi.common.util.collection.Pair)147 List (java.util.List)98 Map (java.util.Map)91 IOException (java.io.IOException)89 Collectors (java.util.stream.Collectors)87 Option (org.apache.hudi.common.util.Option)87 ArrayList (java.util.ArrayList)85 Path (org.apache.hadoop.fs.Path)81 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)76 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)66 HashMap (java.util.HashMap)65 LogManager (org.apache.log4j.LogManager)64 Logger (org.apache.log4j.Logger)64 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)63 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)58 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)54 HoodieIOException (org.apache.hudi.exception.HoodieIOException)54 Arrays (java.util.Arrays)48 HoodieTable (org.apache.hudi.table.HoodieTable)46 Test (org.junit.jupiter.api.Test)46