Search in sources :

Example 1 with ValidationOpResult

use of org.apache.hudi.client.CompactionAdminClient.ValidationOpResult in project hudi by apache.

the class HoodieCompactionAdminTool method run.

/**
 * Executes one of compaction admin operations.
 */
public void run(JavaSparkContext jsc) throws Exception {
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(cfg.basePath).build();
    try (CompactionAdminClient admin = new CompactionAdminClient(new HoodieSparkEngineContext(jsc), cfg.basePath)) {
        final FileSystem fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration());
        if (cfg.outputPath != null && fs.exists(new Path(cfg.outputPath))) {
            throw new IllegalStateException("Output File Path already exists");
        }
        switch(cfg.operation) {
            case VALIDATE:
                List<ValidationOpResult> res = admin.validateCompactionPlan(metaClient, cfg.compactionInstantTime, cfg.parallelism);
                if (cfg.printOutput) {
                    printOperationResult("Result of Validation Operation :", res);
                }
                serializeOperationResult(fs, res);
                break;
            case UNSCHEDULE_FILE:
                List<RenameOpResult> r = admin.unscheduleCompactionFileId(new HoodieFileGroupId(cfg.partitionPath, cfg.fileId), cfg.skipValidation, cfg.dryRun);
                if (cfg.printOutput) {
                    System.out.println(r);
                }
                serializeOperationResult(fs, r);
                break;
            case UNSCHEDULE_PLAN:
                List<RenameOpResult> r2 = admin.unscheduleCompactionPlan(cfg.compactionInstantTime, cfg.skipValidation, cfg.parallelism, cfg.dryRun);
                if (cfg.printOutput) {
                    printOperationResult("Result of Unscheduling Compaction Plan :", r2);
                }
                serializeOperationResult(fs, r2);
                break;
            case REPAIR:
                List<RenameOpResult> r3 = admin.repairCompaction(cfg.compactionInstantTime, cfg.parallelism, cfg.dryRun);
                if (cfg.printOutput) {
                    printOperationResult("Result of Repair Operation :", r3);
                }
                serializeOperationResult(fs, r3);
                break;
            default:
                throw new IllegalStateException("Not yet implemented !!");
        }
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) CompactionAdminClient(org.apache.hudi.client.CompactionAdminClient) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) FileSystem(org.apache.hadoop.fs.FileSystem) RenameOpResult(org.apache.hudi.client.CompactionAdminClient.RenameOpResult)

Example 2 with ValidationOpResult

use of org.apache.hudi.client.CompactionAdminClient.ValidationOpResult in project hudi by apache.

the class CompactionCommand method validateCompaction.

@CliCommand(value = "compaction validate", help = "Validate Compaction")
public String validateCompaction(@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant, @CliOption(key = { "parallelism" }, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
    HoodieTableMetaClient client = checkAndGetMetaClient();
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    String outputPathStr = getTmpSerializerFile();
    Path outputPath = new Path(outputPathStr);
    String output;
    try {
        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
        sparkLauncher.addAppArgs(SparkCommand.COMPACT_VALIDATE.toString(), master, sparkMemory, client.getBasePath(), compactionInstant, outputPathStr, parallelism);
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
        if (exitCode != 0) {
            return "Failed to validate compaction for " + compactionInstant;
        }
        List<ValidationOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
        boolean valid = res.stream().map(OperationResult::isSuccess).reduce(Boolean::logicalAnd).orElse(true);
        String message = "\n\n\t COMPACTION PLAN " + (valid ? "VALID" : "INVALID") + "\n\n";
        List<Comparable[]> rows = new ArrayList<>();
        res.forEach(r -> {
            Comparable[] row = new Comparable[] { r.getOperation().getFileId(), r.getOperation().getBaseInstantTime(), r.getOperation().getDataFileName().isPresent() ? r.getOperation().getDataFileName().get() : "", r.getOperation().getDeltaFileNames().size(), r.isSuccess(), r.getException().isPresent() ? r.getException().get().getMessage() : "" };
            rows.add(row);
        });
        Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
        TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_DATA_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_VALID).addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
        output = message + HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
    } finally {
        // Delete tmp file used to serialize result
        if (HoodieCLI.fs.exists(outputPath)) {
            HoodieCLI.fs.delete(outputPath, false);
        }
    }
    return output;
}
Also used : Path(org.apache.hadoop.fs.Path) TableHeader(org.apache.hudi.cli.TableHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) OperationResult(org.apache.hudi.table.action.compact.OperationResult) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) BiFunction(java.util.function.BiFunction) Function(java.util.function.Function) ValidationOpResult(org.apache.hudi.client.CompactionAdminClient.ValidationOpResult) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

Path (org.apache.hadoop.fs.Path)2 ValidationOpResult (org.apache.hudi.client.CompactionAdminClient.ValidationOpResult)2 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 BiFunction (java.util.function.BiFunction)1 Function (java.util.function.Function)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 TableHeader (org.apache.hudi.cli.TableHeader)1 CompactionAdminClient (org.apache.hudi.client.CompactionAdminClient)1 RenameOpResult (org.apache.hudi.client.CompactionAdminClient.RenameOpResult)1 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)1 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)1 OperationResult (org.apache.hudi.table.action.compact.OperationResult)1 SparkLauncher (org.apache.spark.launcher.SparkLauncher)1 CliCommand (org.springframework.shell.core.annotation.CliCommand)1