Search in sources :

Example 1 with RenameOpResult

use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.

the class HoodieCompactionAdminTool method run.

/**
 * Executes one of compaction admin operations.
 */
public void run(JavaSparkContext jsc) throws Exception {
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(cfg.basePath).build();
    try (CompactionAdminClient admin = new CompactionAdminClient(new HoodieSparkEngineContext(jsc), cfg.basePath)) {
        final FileSystem fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration());
        if (cfg.outputPath != null && fs.exists(new Path(cfg.outputPath))) {
            throw new IllegalStateException("Output File Path already exists");
        }
        switch(cfg.operation) {
            case VALIDATE:
                List<ValidationOpResult> res = admin.validateCompactionPlan(metaClient, cfg.compactionInstantTime, cfg.parallelism);
                if (cfg.printOutput) {
                    printOperationResult("Result of Validation Operation :", res);
                }
                serializeOperationResult(fs, res);
                break;
            case UNSCHEDULE_FILE:
                List<RenameOpResult> r = admin.unscheduleCompactionFileId(new HoodieFileGroupId(cfg.partitionPath, cfg.fileId), cfg.skipValidation, cfg.dryRun);
                if (cfg.printOutput) {
                    System.out.println(r);
                }
                serializeOperationResult(fs, r);
                break;
            case UNSCHEDULE_PLAN:
                List<RenameOpResult> r2 = admin.unscheduleCompactionPlan(cfg.compactionInstantTime, cfg.skipValidation, cfg.parallelism, cfg.dryRun);
                if (cfg.printOutput) {
                    printOperationResult("Result of Unscheduling Compaction Plan :", r2);
                }
                serializeOperationResult(fs, r2);
                break;
            case REPAIR:
                List<RenameOpResult> r3 = admin.repairCompaction(cfg.compactionInstantTime, cfg.parallelism, cfg.dryRun);
                if (cfg.printOutput) {
                    printOperationResult("Result of Repair Operation :", r3);
                }
                serializeOperationResult(fs, r3);
                break;
            default:
                throw new IllegalStateException("Not yet implemented !!");
        }
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) CompactionAdminClient(org.apache.hudi.client.CompactionAdminClient) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) FileSystem(org.apache.hadoop.fs.FileSystem) RenameOpResult(org.apache.hudi.client.CompactionAdminClient.RenameOpResult)

Example 2 with RenameOpResult

use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.

the class CompactionCommand method unscheduleCompaction.

@CliCommand(value = "compaction unschedule", help = "Unschedule Compaction")
public String unscheduleCompaction(@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant, @CliOption(key = { "parallelism" }, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "skipValidation" }, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV, @CliOption(key = { "dryRun" }, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
    HoodieTableMetaClient client = checkAndGetMetaClient();
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    String outputPathStr = getTmpSerializerFile();
    Path outputPath = new Path(outputPathStr);
    String output;
    try {
        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
        sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_PLAN.toString(), master, sparkMemory, client.getBasePath(), compactionInstant, outputPathStr, parallelism, Boolean.valueOf(skipV).toString(), Boolean.valueOf(dryRun).toString());
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
        if (exitCode != 0) {
            return "Failed to unschedule compaction for " + compactionInstant;
        }
        List<RenameOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
        output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "unschedule pending compaction");
    } finally {
        // Delete tmp file used to serialize result
        if (HoodieCLI.fs.exists(outputPath)) {
            HoodieCLI.fs.delete(outputPath, false);
        }
    }
    return output;
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) RenameOpResult(org.apache.hudi.client.CompactionAdminClient.RenameOpResult) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 3 with RenameOpResult

use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.

the class CompactionCommand method unscheduleCompactFile.

@CliCommand(value = "compaction unscheduleFileId", help = "UnSchedule Compaction for a fileId")
public String unscheduleCompactFile(@CliOption(key = "fileId", mandatory = true, help = "File Id") final String fileId, @CliOption(key = "partitionPath", mandatory = true, help = "partition path") final String partitionPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "skipValidation" }, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV, @CliOption(key = { "dryRun" }, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
    HoodieTableMetaClient client = checkAndGetMetaClient();
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    String outputPathStr = getTmpSerializerFile();
    Path outputPath = new Path(outputPathStr);
    String output;
    try {
        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
        sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_FILE.toString(), master, sparkMemory, client.getBasePath(), fileId, partitionPath, outputPathStr, "1", Boolean.valueOf(skipV).toString(), Boolean.valueOf(dryRun).toString());
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
        if (exitCode != 0) {
            return "Failed to unschedule compaction for file " + fileId;
        }
        List<RenameOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
        output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "unschedule file from pending compaction");
    } finally {
        // Delete tmp file used to serialize result
        if (HoodieCLI.fs.exists(outputPath)) {
            HoodieCLI.fs.delete(outputPath, false);
        }
    }
    return output;
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) RenameOpResult(org.apache.hudi.client.CompactionAdminClient.RenameOpResult) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 4 with RenameOpResult

use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.

the class CompactionCommand method repairCompaction.

@CliCommand(value = "compaction repair", help = "Renames the files to make them consistent with the timeline as " + "dictated by Hoodie metadata. Use when compaction unschedule fails partially.")
public String repairCompaction(@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant, @CliOption(key = { "parallelism" }, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "dryRun" }, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
    HoodieTableMetaClient client = checkAndGetMetaClient();
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    String outputPathStr = getTmpSerializerFile();
    Path outputPath = new Path(outputPathStr);
    String output;
    try {
        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
        sparkLauncher.addAppArgs(SparkCommand.COMPACT_REPAIR.toString(), master, sparkMemory, client.getBasePath(), compactionInstant, outputPathStr, parallelism, Boolean.valueOf(dryRun).toString());
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
        if (exitCode != 0) {
            return "Failed to unschedule compaction for " + compactionInstant;
        }
        List<RenameOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
        output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "repair compaction");
    } finally {
        // Delete tmp file used to serialize result
        if (HoodieCLI.fs.exists(outputPath)) {
            HoodieCLI.fs.delete(outputPath, false);
        }
    }
    return output;
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) RenameOpResult(org.apache.hudi.client.CompactionAdminClient.RenameOpResult) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

Path (org.apache.hadoop.fs.Path)4 RenameOpResult (org.apache.hudi.client.CompactionAdminClient.RenameOpResult)4 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)4 SparkLauncher (org.apache.spark.launcher.SparkLauncher)3 CliCommand (org.springframework.shell.core.annotation.CliCommand)3 FileSystem (org.apache.hadoop.fs.FileSystem)1 CompactionAdminClient (org.apache.hudi.client.CompactionAdminClient)1 ValidationOpResult (org.apache.hudi.client.CompactionAdminClient.ValidationOpResult)1 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)1 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)1