Search in sources :

Example 36 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class SavepointsCommand method deleteSavepoint.

@CliCommand(value = "savepoint delete", help = "Delete the savepoint")
public String deleteSavepoint(@CliOption(key = { "commit" }, help = "Delete a savepoint") final String instantTime, @CliOption(key = { "sparkProperties" }, help = "Spark Properties File Path") final String sparkPropertiesPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G", help = "Spark executor memory") final String sparkMemory) throws Exception {
    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
    HoodieTimeline completedInstants = metaClient.getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
    if (completedInstants.empty()) {
        throw new HoodieException("There are no completed savepoint to run delete");
    }
    HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, instantTime);
    if (!completedInstants.containsInstant(savePoint)) {
        return "Commit " + instantTime + " not found in Commits " + completedInstants;
    }
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.DELETE_SAVEPOINT.toString(), master, sparkMemory, instantTime, metaClient.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    // Refresh the current
    HoodieCLI.refreshTableMetadata();
    if (exitCode != 0) {
        return String.format("Failed: Could not delete savepoint \"%s\".", instantTime);
    }
    return String.format("Savepoint \"%s\" deleted.", instantTime);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieException(org.apache.hudi.exception.HoodieException) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 37 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class SavepointsCommand method showSavepoints.

@CliCommand(value = "savepoints show", help = "Show the savepoints")
public String showSavepoints() {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getSavePointTimeline().filterCompletedInstants();
    List<HoodieInstant> commits = timeline.getReverseOrderedInstants().collect(Collectors.toList());
    String[][] rows = new String[commits.size()][];
    for (int i = 0; i < commits.size(); i++) {
        HoodieInstant commit = commits.get(i);
        rows[i] = new String[] { commit.getTimestamp() };
    }
    return HoodiePrintHelper.print(new String[] { HoodieTableHeaderFields.HEADER_SAVEPOINT_TIME }, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 38 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class StatsCommand method writeAmplificationStats.

@CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many " + "records were actually written")
public String writeAmplificationStats(@CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    long totalRecordsUpserted = 0;
    long totalRecordsWritten = 0;
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
    List<Comparable[]> rows = new ArrayList<>();
    DecimalFormat df = new DecimalFormat("#.00");
    for (HoodieInstant instantTime : timeline.getInstants().collect(Collectors.toList())) {
        String waf = "0";
        HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(instantTime).get(), HoodieCommitMetadata.class);
        if (commit.fetchTotalUpdateRecordsWritten() > 0) {
            waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
        }
        rows.add(new Comparable[] { instantTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(), commit.fetchTotalRecordsWritten(), waf });
        totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
        totalRecordsWritten += commit.fetchTotalRecordsWritten();
    }
    String waf = "0";
    if (totalRecordsUpserted > 0) {
        waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
    }
    rows.add(new Comparable[] { "Total", totalRecordsUpserted, totalRecordsWritten, waf });
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_UPSERTED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_WRITTEN).addTableHeaderField(HoodieTableHeaderFields.HEADER_WRITE_AMPLIFICATION_FACTOR);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) DecimalFormat(java.text.DecimalFormat) ArrayList(java.util.ArrayList) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 39 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class ExportCommand method exportInstants.

@CliCommand(value = "export instants", help = "Export Instants and their metadata from the Timeline")
public String exportInstants(@CliOption(key = { "limit" }, help = "Limit Instants", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "actions" }, help = "Comma separated list of Instant actions to export", unspecifiedDefaultValue = "clean,commit,deltacommit,rollback,savepoint,restore") final String filter, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "localFolder" }, help = "Local Folder to export to", mandatory = true) String localFolder) throws Exception {
    final String basePath = HoodieCLI.getTableMetaClient().getBasePath();
    final Path archivePath = new Path(basePath + "/.hoodie/.commits_.archive*");
    final Set<String> actionSet = new HashSet<String>(Arrays.asList(filter.split(",")));
    int numExports = limit == -1 ? Integer.MAX_VALUE : limit;
    int numCopied = 0;
    if (!new File(localFolder).isDirectory()) {
        throw new HoodieException(localFolder + " is not a valid local directory");
    }
    // The non archived instants can be listed from the Timeline.
    HoodieTimeline timeline = HoodieCLI.getTableMetaClient().getActiveTimeline().filterCompletedInstants().filter(i -> actionSet.contains(i.getAction()));
    List<HoodieInstant> nonArchivedInstants = timeline.getInstants().collect(Collectors.toList());
    // Archived instants are in the commit archive files
    FileStatus[] statuses = FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath);
    List<FileStatus> archivedStatuses = Arrays.stream(statuses).sorted((f1, f2) -> (int) (f1.getModificationTime() - f2.getModificationTime())).collect(Collectors.toList());
    if (descending) {
        Collections.reverse(nonArchivedInstants);
        numCopied = copyNonArchivedInstants(nonArchivedInstants, numExports, localFolder);
        if (numCopied < numExports) {
            Collections.reverse(archivedStatuses);
            numCopied += copyArchivedInstants(archivedStatuses, actionSet, numExports - numCopied, localFolder);
        }
    } else {
        numCopied = copyArchivedInstants(archivedStatuses, actionSet, numExports, localFolder);
        if (numCopied < numExports) {
            numCopied += copyNonArchivedInstants(nonArchivedInstants, numExports - numCopied, localFolder);
        }
    }
    return "Exported " + numCopied + " Instants to " + localFolder;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Arrays(java.util.Arrays) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HoodieArchivedMetaEntry(org.apache.hudi.avro.model.HoodieArchivedMetaEntry) Reader(org.apache.hudi.common.table.log.HoodieLogFormat.Reader) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) SpecificData(org.apache.avro.specific.SpecificData) CommandMarker(org.springframework.shell.core.CommandMarker) GenericRecord(org.apache.avro.generic.GenericRecord) CliCommand(org.springframework.shell.core.annotation.CliCommand) FileOutputStream(java.io.FileOutputStream) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) Collectors(java.util.stream.Collectors) File(java.io.File) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieException(org.apache.hudi.exception.HoodieException) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) File(java.io.File) HashSet(java.util.HashSet) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 40 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class FileSystemViewCommand method buildFileSystemView.

/**
 * Build File System View.
 *
 * @param globRegex Path Regex
 * @param maxInstant Max Instants to be used for displaying file-instants
 * @param basefileOnly Include only base file view
 * @param includeMaxInstant Include Max instant
 * @param includeInflight Include inflight instants
 * @param excludeCompaction Exclude Compaction instants
 * @return
 * @throws IOException
 */
private HoodieTableFileSystemView buildFileSystemView(String globRegex, String maxInstant, boolean basefileOnly, boolean includeMaxInstant, boolean includeInflight, boolean excludeCompaction) throws IOException {
    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(client.getHadoopConf()).setBasePath(client.getBasePath()).setLoadActiveTimelineOnLoad(true).build();
    FileSystem fs = HoodieCLI.fs;
    String globPath = String.format("%s/%s/*", client.getBasePath(), globRegex);
    List<FileStatus> statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
    Stream<HoodieInstant> instantsStream;
    HoodieTimeline timeline;
    if (basefileOnly) {
        timeline = metaClient.getActiveTimeline().getCommitTimeline();
    } else if (excludeCompaction) {
        timeline = metaClient.getActiveTimeline().getCommitsTimeline();
    } else {
        timeline = metaClient.getActiveTimeline().getWriteTimeline();
    }
    if (!includeInflight) {
        timeline = timeline.filterCompletedInstants();
    }
    instantsStream = timeline.getInstants();
    if (!maxInstant.isEmpty()) {
        final BiPredicate<String, String> predicate;
        if (includeMaxInstant) {
            predicate = HoodieTimeline.GREATER_THAN_OR_EQUALS;
        } else {
            predicate = HoodieTimeline.GREATER_THAN;
        }
        instantsStream = instantsStream.filter(is -> predicate.test(maxInstant, is.getTimestamp()));
    }
    HoodieTimeline filteredTimeline = new HoodieDefaultTimeline(instantsStream, (Function<HoodieInstant, Option<byte[]>> & Serializable) metaClient.getActiveTimeline()::getInstantDetails);
    return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses.toArray(new FileStatus[0]));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) CliOption(org.springframework.shell.core.annotation.CliOption) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BiPredicate(java.util.function.BiPredicate) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CommandMarker(org.springframework.shell.core.CommandMarker) CliCommand(org.springframework.shell.core.annotation.CliCommand) TableHeader(org.apache.hudi.cli.TableHeader) IOException(java.io.IOException) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) Collectors(java.util.stream.Collectors) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) Serializable(java.io.Serializable) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) Stream(java.util.stream.Stream) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) FSUtils(org.apache.hudi.common.fs.FSUtils) NumericUtils(org.apache.hudi.common.util.NumericUtils) HoodieDefaultTimeline(org.apache.hudi.common.table.timeline.HoodieDefaultTimeline) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) FileSystem(org.apache.hadoop.fs.FileSystem) Option(org.apache.hudi.common.util.Option) CliOption(org.springframework.shell.core.annotation.CliOption) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Aggregations

HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)118 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)74 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)39 List (java.util.List)36 IOException (java.io.IOException)34 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)34 ArrayList (java.util.ArrayList)32 Option (org.apache.hudi.common.util.Option)30 Collectors (java.util.stream.Collectors)29 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)29 HoodieException (org.apache.hudi.exception.HoodieException)26 Map (java.util.Map)25 FileStatus (org.apache.hadoop.fs.FileStatus)24 Path (org.apache.hadoop.fs.Path)24 Set (java.util.Set)22 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)22 FileSlice (org.apache.hudi.common.model.FileSlice)21 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)21 Pair (org.apache.hudi.common.util.collection.Pair)21 FSUtils (org.apache.hudi.common.fs.FSUtils)20