Search in sources :

Example 31 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class TestHoodieHFileInputFormat method testPendingCompactionWithActiveCommits.

// Verify that HoodieParquetInputFormat does not return instants after pending compaction
@Test
public void testPendingCompactionWithActiveCommits() throws IOException {
    // setup 4 sample instants in timeline
    List<HoodieInstant> instants = new ArrayList<>();
    HoodieInstant t1 = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "1");
    HoodieInstant t2 = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "2");
    HoodieInstant t3 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "3");
    HoodieInstant t4 = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "4");
    HoodieInstant t5 = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "5");
    HoodieInstant t6 = new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "6");
    instants.add(t1);
    instants.add(t2);
    instants.add(t3);
    instants.add(t4);
    instants.add(t5);
    instants.add(t6);
    HoodieTableMetaClient metaClient = HoodieTestUtils.init(basePath.toString(), HoodieFileFormat.HFILE);
    HoodieActiveTimeline timeline = new HoodieActiveTimeline(metaClient);
    timeline.setInstants(instants);
    // Verify getCommitsTimelineBeforePendingCompaction does not return instants after first compaction instant
    HoodieTimeline filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
    assertTrue(filteredTimeline.containsInstant(t1));
    assertTrue(filteredTimeline.containsInstant(t2));
    assertFalse(filteredTimeline.containsInstant(t3));
    assertFalse(filteredTimeline.containsInstant(t4));
    assertFalse(filteredTimeline.containsInstant(t5));
    assertFalse(filteredTimeline.containsInstant(t6));
    // remove compaction instant and setup timeline again
    instants.remove(t3);
    timeline = new HoodieActiveTimeline(metaClient);
    timeline.setInstants(instants);
    filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
    // verify all remaining instants are returned.
    assertTrue(filteredTimeline.containsInstant(t1));
    assertTrue(filteredTimeline.containsInstant(t2));
    assertFalse(filteredTimeline.containsInstant(t3));
    assertTrue(filteredTimeline.containsInstant(t4));
    assertFalse(filteredTimeline.containsInstant(t5));
    assertFalse(filteredTimeline.containsInstant(t6));
    // remove remaining compaction instant and setup timeline again
    instants.remove(t5);
    timeline = new HoodieActiveTimeline(metaClient);
    timeline.setInstants(instants);
    filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
    // verify all remaining instants are returned.
    assertTrue(filteredTimeline.containsInstant(t1));
    assertTrue(filteredTimeline.containsInstant(t2));
    assertFalse(filteredTimeline.containsInstant(t3));
    assertTrue(filteredTimeline.containsInstant(t4));
    assertFalse(filteredTimeline.containsInstant(t5));
    assertTrue(filteredTimeline.containsInstant(t6));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ArrayList(java.util.ArrayList) Test(org.junit.jupiter.api.Test)

Example 32 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class RollbacksCommand method showRollback.

@CliCommand(value = "show rollback", help = "Show details of a rollback instant")
public String showRollback(@CliOption(key = { "instant" }, help = "Rollback instant", mandatory = true) String rollbackInstant, @CliOption(key = { "limit" }, help = "Limit  #rows to be displayed", unspecifiedDefaultValue = "10") Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException {
    HoodieActiveTimeline activeTimeline = new RollbackTimeline(HoodieCLI.getTableMetaClient());
    final List<Comparable[]> rows = new ArrayList<>();
    HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(activeTimeline.getInstantDetails(new HoodieInstant(State.COMPLETED, ROLLBACK_ACTION, rollbackInstant)).get(), HoodieRollbackMetadata.class);
    metadata.getPartitionMetadata().forEach((key, value) -> Stream.concat(value.getSuccessDeleteFiles().stream().map(f -> Pair.of(f, true)), value.getFailedDeleteFiles().stream().map(f -> Pair.of(f, false))).forEach(fileWithDeleteStatus -> {
        Comparable[] row = new Comparable[5];
        row[0] = metadata.getStartRollbackTime();
        row[1] = metadata.getCommitsRollback().toString();
        row[2] = key;
        row[3] = fileWithDeleteStatus.getLeft();
        row[4] = fileWithDeleteStatus.getRight();
        rows.add(row);
    }));
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELETED_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_SUCCEEDED);
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CliCommand(org.springframework.shell.core.annotation.CliCommand) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TableHeader(org.apache.hudi.cli.TableHeader) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) HashMap(java.util.HashMap) CliOption(org.springframework.shell.core.annotation.CliOption) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Component(org.springframework.stereotype.Component) List(java.util.List) Stream(java.util.stream.Stream) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) ROLLBACK_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair) CommandMarker(org.springframework.shell.core.CommandMarker) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ArrayList(java.util.ArrayList) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 33 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class SavepointsCommand method rollbackToSavepoint.

@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
public String rollbackToSavepoint(@CliOption(key = { "savepoint" }, help = "Savepoint to rollback") final String instantTime, @CliOption(key = { "sparkProperties" }, help = "Spark Properties File Path") final String sparkPropertiesPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G", help = "Spark executor memory") final String sparkMemory) throws Exception {
    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
    if (metaClient.getActiveTimeline().getSavePointTimeline().filterCompletedInstants().empty()) {
        throw new HoodieException("There are no completed instants to run rollback");
    }
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
    List<HoodieInstant> instants = timeline.getInstants().filter(instant -> instant.getTimestamp().equals(instantTime)).collect(Collectors.toList());
    if (instants.isEmpty()) {
        return "Commit " + instantTime + " not found in Commits " + timeline;
    }
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), master, sparkMemory, instantTime, metaClient.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    // Refresh the current
    HoodieCLI.refreshTableMetadata();
    if (exitCode != 0) {
        return String.format("Savepoint \"%s\" failed to roll back", instantTime);
    }
    return String.format("Savepoint \"%s\" rolled back", instantTime);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CliCommand(org.springframework.shell.core.annotation.CliCommand) SparkLauncher(org.apache.spark.launcher.SparkLauncher) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) SparkUtil(org.apache.hudi.cli.utils.SparkUtil) CliOption(org.springframework.shell.core.annotation.CliOption) Collectors(java.util.stream.Collectors) HoodieCLI(org.apache.hudi.cli.HoodieCLI) InputStreamConsumer(org.apache.hudi.cli.utils.InputStreamConsumer) Component(org.springframework.stereotype.Component) List(java.util.List) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CommandMarker(org.springframework.shell.core.CommandMarker) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieException(org.apache.hudi.exception.HoodieException) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 34 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class SavepointsCommand method showSavepoints.

@CliCommand(value = "savepoints show", help = "Show the savepoints")
public String showSavepoints() {
    HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getSavePointTimeline().filterCompletedInstants();
    List<HoodieInstant> commits = timeline.getReverseOrderedInstants().collect(Collectors.toList());
    String[][] rows = new String[commits.size()][];
    for (int i = 0; i < commits.size(); i++) {
        HoodieInstant commit = commits.get(i);
        rows[i] = new String[] { commit.getTimestamp() };
    }
    return HoodiePrintHelper.print(new String[] { HoodieTableHeaderFields.HEADER_SAVEPOINT_TIME }, rows);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Example 35 with HoodieActiveTimeline

use of org.apache.hudi.common.table.timeline.HoodieActiveTimeline in project hudi by apache.

the class SavepointsCommand method savepoint.

@CliCommand(value = "savepoint create", help = "Savepoint a commit")
public String savepoint(@CliOption(key = { "commit" }, help = "Commit to savepoint") final String commitTime, @CliOption(key = { "user" }, unspecifiedDefaultValue = "default", help = "User who is creating the savepoint") final String user, @CliOption(key = { "comments" }, unspecifiedDefaultValue = "default", help = "Comments for creating the savepoint") final String comments, @CliOption(key = { "sparkProperties" }, help = "Spark Properties File Path") final String sparkPropertiesPath, @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master, @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G", help = "Spark executor memory") final String sparkMemory) throws Exception {
    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    if (!activeTimeline.getCommitsTimeline().filterCompletedInstants().containsInstant(commitTime)) {
        return "Commit " + commitTime + " not found in Commits " + activeTimeline;
    }
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.SAVEPOINT.toString(), master, sparkMemory, commitTime, user, comments, metaClient.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    // Refresh the current
    HoodieCLI.refreshTableMetadata();
    if (exitCode != 0) {
        return String.format("Failed: Could not create savepoint \"%s\".", commitTime);
    }
    return String.format("The commit \"%s\" has been savepointed.", commitTime);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) SparkLauncher(org.apache.spark.launcher.SparkLauncher) CliCommand(org.springframework.shell.core.annotation.CliCommand)

Aggregations

HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)95 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)70 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)47 Test (org.junit.jupiter.api.Test)45 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)37 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)32 List (java.util.List)30 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)30 HashMap (java.util.HashMap)28 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)26 Map (java.util.Map)25 Option (org.apache.hudi.common.util.Option)22 Pair (org.apache.hudi.common.util.collection.Pair)22 Collectors (java.util.stream.Collectors)21 Path (org.apache.hadoop.fs.Path)21 Logger (org.apache.log4j.Logger)21 LogManager (org.apache.log4j.LogManager)20 Stream (java.util.stream.Stream)19 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)19