Search in sources :

Example 41 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class HoodieSyncCommand method validateSync.

@CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
public String validateSync(@CliOption(key = { "mode" }, unspecifiedDefaultValue = "complete", help = "Check mode") final String mode, @CliOption(key = { "sourceDb" }, unspecifiedDefaultValue = "rawdata", help = "source database") final String srcDb, @CliOption(key = { "targetDb" }, unspecifiedDefaultValue = "dwh_hoodie", help = "target database") final String tgtDb, @CliOption(key = { "partitionCount" }, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate") final int partitionCount, @CliOption(key = { "hiveServerUrl" }, mandatory = true, help = "hiveServerURL to connect to") final String hiveServerUrl, @CliOption(key = { "hiveUser" }, unspecifiedDefaultValue = "", help = "hive username to connect to") final String hiveUser, @CliOption(key = { "hivePass" }, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to") final String hivePass) throws Exception {
    if (HoodieCLI.syncTableMetadata == null) {
        throw new HoodieException("Sync validate request target table not null.");
    }
    HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
    HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline();
    HoodieTableMetaClient source = HoodieCLI.getTableMetaClient();
    HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline();
    long sourceCount = 0;
    long targetCount = 0;
    if ("complete".equals(mode)) {
        sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
        targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
    } else if ("latestPartitions".equals(mode)) {
        sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
        targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
    }
    String targetLatestCommit = targetTimeline.getInstants().iterator().hasNext() ? targetTimeline.lastInstant().get().getTimestamp() : "0";
    String sourceLatestCommit = sourceTimeline.getInstants().iterator().hasNext() ? sourceTimeline.lastInstant().get().getTimestamp() : "0";
    if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, HoodieTimeline.GREATER_THAN, sourceLatestCommit)) {
        // source is behind the target
        return getString(target, targetTimeline, source, sourceCount, targetCount, sourceLatestCommit);
    } else {
        return getString(source, sourceTimeline, target, targetCount, sourceCount, targetLatestCommit);
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieException(org.apache.hudi.exception.HoodieException) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 42 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class CommitsCommand method rollbackCommit.

@CliCommand(value = "commit rollback", help = "Rollback a commit")
public String rollbackCommit(@CliOption(key = { "commit" }, help = "Commit to rollback") final String instantTime, @CliOption(key = { "sparkProperties" }, help = "Spark Properties File Path") final String sparkPropertiesPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G", help = "Spark executor memory") final String sparkMemory, @CliOption(key = "rollbackUsingMarkers", unspecifiedDefaultValue = "true", help = "Enabling marker based rollback") final String rollbackUsingMarkers) throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
    HoodieTimeline filteredTimeline = completedTimeline.filter(instant -> instant.getTimestamp().equals(instantTime));
    if (filteredTimeline.empty()) {
        return "Commit " + instantTime + " not found in Commits " + completedTimeline;
    }
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), master, sparkMemory, instantTime, HoodieCLI.getTableMetaClient().getBasePath(), rollbackUsingMarkers);
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    // Refresh the current
    HoodieCLI.refreshTableMetadata();
    if (exitCode != 0) {
        return "Commit " + instantTime + " failed to roll back";
    }
    return "Commit " + instantTime + " rolled back";
}
Also used : HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 43 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class CommitsCommand method compareCommits.

@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie table")
public String compareCommits(@CliOption(key = { "path" }, help = "Path of the table to compare to") final String path) {
    HoodieTableMetaClient source = HoodieCLI.getTableMetaClient();
    HoodieTableMetaClient target = HoodieTableMetaClient.builder().setConf(HoodieCLI.conf).setBasePath(path).build();
    HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
    HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
    String targetLatestCommit = targetTimeline.getInstants().iterator().hasNext() ? targetTimeline.lastInstant().get().getTimestamp() : "0";
    String sourceLatestCommit = sourceTimeline.getInstants().iterator().hasNext() ? sourceTimeline.lastInstant().get().getTimestamp() : "0";
    if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, HoodieTimeline.GREATER_THAN, sourceLatestCommit)) {
        // source is behind the target
        List<String> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE).getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
        return "Source " + source.getTableConfig().getTableName() + " is behind by " + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
    } else {
        List<String> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE).getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
        return "Source " + source.getTableConfig().getTableName() + " is ahead by " + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 44 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class CleansCommand method showCleanPartitions.

@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
public String showCleanPartitions(@CliOption(key = { "clean" }, help = "clean to show") final String instantTime, @CliOption(key = { "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
    HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime);
    if (!timeline.containsInstant(cleanInstant)) {
        return "Clean " + instantTime + " not found in metadata " + timeline;
    }
    HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
        String path = entry.getKey();
        HoodieCleanPartitionMetadata stats = entry.getValue();
        String policy = stats.getPolicy();
        int totalSuccessDeletedFiles = stats.getSuccessDeleteFiles().size();
        int totalFailedDeletedFiles = stats.getFailedDeleteFiles().size();
        rows.add(new Comparable[] { path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles });
    }
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH).addTableHeaderField(HoodieTableHeaderFields.HEADER_CLEANING_POLICY).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_SUCCESSFULLY_DELETED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FAILED_DELETIONS);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) ArrayList(java.util.ArrayList) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HashMap(java.util.HashMap) Map(java.util.Map) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 45 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class CommitUtil method countNewRecords.

public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup) throws IOException {
    long totalNew = 0;
    HoodieTimeline timeline = target.reloadActiveTimeline().getCommitTimeline().filterCompletedInstants();
    for (String commit : commitsToCatchup) {
        HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit)).get(), HoodieCommitMetadata.class);
        totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
    }
    return totalNew;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline)

Aggregations

HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)118 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)74 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)39 List (java.util.List)36 IOException (java.io.IOException)34 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)34 ArrayList (java.util.ArrayList)32 Option (org.apache.hudi.common.util.Option)30 Collectors (java.util.stream.Collectors)29 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)29 HoodieException (org.apache.hudi.exception.HoodieException)26 Map (java.util.Map)25 FileStatus (org.apache.hadoop.fs.FileStatus)24 Path (org.apache.hadoop.fs.Path)24 Set (java.util.Set)22 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)22 FileSlice (org.apache.hudi.common.model.FileSlice)21 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)21 Pair (org.apache.hudi.common.util.collection.Pair)21 FSUtils (org.apache.hudi.common.fs.FSUtils)20