use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class SavepointsCommand method deleteSavepoint.
@CliCommand(value = "savepoint delete", help = "Delete the savepoint")
public String deleteSavepoint(@CliOption(key = { "commit" }, help = "Delete a savepoint") final String instantTime, @CliOption(key = { "sparkProperties" }, help = "Spark Properties File Path") final String sparkPropertiesPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G", help = "Spark executor memory") final String sparkMemory) throws Exception {
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
HoodieTimeline completedInstants = metaClient.getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
if (completedInstants.empty()) {
throw new HoodieException("There are no completed savepoint to run delete");
}
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, instantTime);
if (!completedInstants.containsInstant(savePoint)) {
return "Commit " + instantTime + " not found in Commits " + completedInstants;
}
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkMain.SparkCommand.DELETE_SAVEPOINT.toString(), master, sparkMemory, instantTime, metaClient.getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
// Refresh the current
HoodieCLI.refreshTableMetadata();
if (exitCode != 0) {
return String.format("Failed: Could not delete savepoint \"%s\".", instantTime);
}
return String.format("Savepoint \"%s\" deleted.", instantTime);
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class SavepointsCommand method showSavepoints.
@CliCommand(value = "savepoints show", help = "Show the savepoints")
public String showSavepoints() {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getSavePointTimeline().filterCompletedInstants();
List<HoodieInstant> commits = timeline.getReverseOrderedInstants().collect(Collectors.toList());
String[][] rows = new String[commits.size()][];
for (int i = 0; i < commits.size(); i++) {
HoodieInstant commit = commits.get(i);
rows[i] = new String[] { commit.getTimestamp() };
}
return HoodiePrintHelper.print(new String[] { HoodieTableHeaderFields.HEADER_SAVEPOINT_TIME }, rows);
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class StatsCommand method writeAmplificationStats.
@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many " + "records were actually written")
public String writeAmplificationStats(@CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
long totalRecordsUpserted = 0;
long totalRecordsWritten = 0;
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
List<Comparable[]> rows = new ArrayList<>();
DecimalFormat df = new DecimalFormat("#.00");
for (HoodieInstant instantTime : timeline.getInstants().collect(Collectors.toList())) {
String waf = "0";
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(instantTime).get(), HoodieCommitMetadata.class);
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
}
rows.add(new Comparable[] { instantTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(), commit.fetchTotalRecordsWritten(), waf });
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
totalRecordsWritten += commit.fetchTotalRecordsWritten();
}
String waf = "0";
if (totalRecordsUpserted > 0) {
waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
}
rows.add(new Comparable[] { "Total", totalRecordsUpserted, totalRecordsWritten, waf });
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class ExportCommand method exportInstants.
@CliCommand(value = "export instants", help = "Export Instants and their metadata from the Timeline")
public String exportInstants(@CliOption(key = { "limit" }, help = "Limit Instants", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "actions" }, help = "Comma separated list of Instant actions to export", unspecifiedDefaultValue = "clean,commit,deltacommit,rollback,savepoint,restore") final String filter, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "localFolder" }, help = "Local Folder to export to", mandatory = true) String localFolder) throws Exception {
final String basePath = HoodieCLI.getTableMetaClient().getBasePath();
final Path archivePath = new Path(basePath + "/.hoodie/.commits_.archive*");
final Set<String> actionSet = new HashSet<String>(Arrays.asList(filter.split(",")));
int numExports = limit == -1 ? Integer.MAX_VALUE : limit;
int numCopied = 0;
if (!new File(localFolder).isDirectory()) {
throw new HoodieException(localFolder + " is not a valid local directory");
}
// The non archived instants can be listed from the Timeline.
HoodieTimeline timeline = HoodieCLI.getTableMetaClient().getActiveTimeline().filterCompletedInstants().filter(i -> actionSet.contains(i.getAction()));
List<HoodieInstant> nonArchivedInstants = timeline.getInstants().collect(Collectors.toList());
// Archived instants are in the commit archive files
FileStatus[] statuses = FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath);
List<FileStatus> archivedStatuses = Arrays.stream(statuses).sorted((f1, f2) -> (int) (f1.getModificationTime() - f2.getModificationTime())).collect(Collectors.toList());
if (descending) {
Collections.reverse(nonArchivedInstants);
numCopied = copyNonArchivedInstants(nonArchivedInstants, numExports, localFolder);
if (numCopied < numExports) {
Collections.reverse(archivedStatuses);
numCopied += copyArchivedInstants(archivedStatuses, actionSet, numExports - numCopied, localFolder);
}
} else {
numCopied = copyArchivedInstants(archivedStatuses, actionSet, numExports, localFolder);
if (numCopied < numExports) {
numCopied += copyNonArchivedInstants(nonArchivedInstants, numExports - numCopied, localFolder);
}
}
return "Exported " + numCopied + " Instants to " + localFolder;
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class FileSystemViewCommand method buildFileSystemView.
/**
* Build File System View.
*
* @param globRegex Path Regex
* @param maxInstant Max Instants to be used for displaying file-instants
* @param basefileOnly Include only base file view
* @param includeMaxInstant Include Max instant
* @param includeInflight Include inflight instants
* @param excludeCompaction Exclude Compaction instants
* @return
* @throws IOException
*/
private HoodieTableFileSystemView buildFileSystemView(String globRegex, String maxInstant, boolean basefileOnly, boolean includeMaxInstant, boolean includeInflight, boolean excludeCompaction) throws IOException {
HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(client.getHadoopConf()).setBasePath(client.getBasePath()).setLoadActiveTimelineOnLoad(true).build();
FileSystem fs = HoodieCLI.fs;
String globPath = String.format("%s/%s/*", client.getBasePath(), globRegex);
List<FileStatus> statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
Stream<HoodieInstant> instantsStream;
HoodieTimeline timeline;
if (basefileOnly) {
timeline = metaClient.getActiveTimeline().getCommitTimeline();
} else if (excludeCompaction) {
timeline = metaClient.getActiveTimeline().getCommitsTimeline();
} else {
timeline = metaClient.getActiveTimeline().getWriteTimeline();
}
if (!includeInflight) {
timeline = timeline.filterCompletedInstants();
}
instantsStream = timeline.getInstants();
if (!maxInstant.isEmpty()) {
final BiPredicate<String, String> predicate;
if (includeMaxInstant) {
predicate = HoodieTimeline.GREATER_THAN_OR_EQUALS;
} else {
predicate = HoodieTimeline.GREATER_THAN;
}
instantsStream = instantsStream.filter(is -> predicate.test(maxInstant, is.getTimestamp()));
}
HoodieTimeline filteredTimeline = new HoodieDefaultTimeline(instantsStream, (Function<HoodieInstant, Option<byte[]>> & Serializable) metaClient.getActiveTimeline()::getInstantDetails);
return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses.toArray(new FileStatus[0]));
}
Aggregations