use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.
the class HoodieCompactionAdminTool method run.
/**
* Executes one of compaction admin operations.
*/
public void run(JavaSparkContext jsc) throws Exception {
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(cfg.basePath).build();
try (CompactionAdminClient admin = new CompactionAdminClient(new HoodieSparkEngineContext(jsc), cfg.basePath)) {
final FileSystem fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration());
if (cfg.outputPath != null && fs.exists(new Path(cfg.outputPath))) {
throw new IllegalStateException("Output File Path already exists");
}
switch(cfg.operation) {
case VALIDATE:
List<ValidationOpResult> res = admin.validateCompactionPlan(metaClient, cfg.compactionInstantTime, cfg.parallelism);
if (cfg.printOutput) {
printOperationResult("Result of Validation Operation :", res);
}
serializeOperationResult(fs, res);
break;
case UNSCHEDULE_FILE:
List<RenameOpResult> r = admin.unscheduleCompactionFileId(new HoodieFileGroupId(cfg.partitionPath, cfg.fileId), cfg.skipValidation, cfg.dryRun);
if (cfg.printOutput) {
System.out.println(r);
}
serializeOperationResult(fs, r);
break;
case UNSCHEDULE_PLAN:
List<RenameOpResult> r2 = admin.unscheduleCompactionPlan(cfg.compactionInstantTime, cfg.skipValidation, cfg.parallelism, cfg.dryRun);
if (cfg.printOutput) {
printOperationResult("Result of Unscheduling Compaction Plan :", r2);
}
serializeOperationResult(fs, r2);
break;
case REPAIR:
List<RenameOpResult> r3 = admin.repairCompaction(cfg.compactionInstantTime, cfg.parallelism, cfg.dryRun);
if (cfg.printOutput) {
printOperationResult("Result of Repair Operation :", r3);
}
serializeOperationResult(fs, r3);
break;
default:
throw new IllegalStateException("Not yet implemented !!");
}
}
}
use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.
the class CompactionCommand method unscheduleCompaction.
@CliCommand(value = "compaction unschedule", help = "Unschedule Compaction")
public String unscheduleCompaction(@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant, @CliOption(key = { "parallelism" }, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "skipValidation" }, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV, @CliOption(key = { "dryRun" }, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String outputPathStr = getTmpSerializerFile();
Path outputPath = new Path(outputPathStr);
String output;
try {
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_PLAN.toString(), master, sparkMemory, client.getBasePath(), compactionInstant, outputPathStr, parallelism, Boolean.valueOf(skipV).toString(), Boolean.valueOf(dryRun).toString());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to unschedule compaction for " + compactionInstant;
}
List<RenameOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "unschedule pending compaction");
} finally {
// Delete tmp file used to serialize result
if (HoodieCLI.fs.exists(outputPath)) {
HoodieCLI.fs.delete(outputPath, false);
}
}
return output;
}
use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.
the class CompactionCommand method unscheduleCompactFile.
@CliCommand(value = "compaction unscheduleFileId", help = "UnSchedule Compaction for a fileId")
public String unscheduleCompactFile(@CliOption(key = "fileId", mandatory = true, help = "File Id") final String fileId, @CliOption(key = "partitionPath", mandatory = true, help = "partition path") final String partitionPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "skipValidation" }, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV, @CliOption(key = { "dryRun" }, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String outputPathStr = getTmpSerializerFile();
Path outputPath = new Path(outputPathStr);
String output;
try {
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_FILE.toString(), master, sparkMemory, client.getBasePath(), fileId, partitionPath, outputPathStr, "1", Boolean.valueOf(skipV).toString(), Boolean.valueOf(dryRun).toString());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to unschedule compaction for file " + fileId;
}
List<RenameOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "unschedule file from pending compaction");
} finally {
// Delete tmp file used to serialize result
if (HoodieCLI.fs.exists(outputPath)) {
HoodieCLI.fs.delete(outputPath, false);
}
}
return output;
}
use of org.apache.hudi.client.CompactionAdminClient.RenameOpResult in project hudi by apache.
the class CompactionCommand method repairCompaction.
@CliCommand(value = "compaction repair", help = "Renames the files to make them consistent with the timeline as " + "dictated by Hoodie metadata. Use when compaction unschedule fails partially.")
public String repairCompaction(@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant, @CliOption(key = { "parallelism" }, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory, @CliOption(key = { "dryRun" }, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly) throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
String outputPathStr = getTmpSerializerFile();
Path outputPath = new Path(outputPathStr);
String output;
try {
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.COMPACT_REPAIR.toString(), master, sparkMemory, client.getBasePath(), compactionInstant, outputPathStr, parallelism, Boolean.valueOf(dryRun).toString());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to unschedule compaction for " + compactionInstant;
}
List<RenameOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "repair compaction");
} finally {
// Delete tmp file used to serialize result
if (HoodieCLI.fs.exists(outputPath)) {
HoodieCLI.fs.delete(outputPath, false);
}
}
return output;
}
Aggregations