Search in sources :

Example 1 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class TestCleaner method testFailedInsertAndCleanByCommits.

/**
 * Test Helper for Cleaning failed commits by commits logic from HoodieWriteClient API perspective.
 *
 * @param insertFn Insert API to be tested
 * @param isPreppedAPI Flag to indicate if a prepped-version is used. If true, a wrapper function will be used during
 *        record generation to also tag the regards (de-dupe is implicit as we use uniq record-gen APIs)
 * @throws Exception in case of errors
 */
private void testFailedInsertAndCleanByCommits(Function3<JavaRDD<WriteStatus>, SparkRDDWriteClient, JavaRDD<HoodieRecord>, String> insertFn, boolean isPreppedAPI) throws Exception {
    // keep upto 3 commits from the past
    int maxCommits = 3;
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withHeartbeatIntervalInMs(3000).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(maxCommits).build()).withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1).withDeleteParallelism(1).withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build()).build();
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    final Function2<List<HoodieRecord>, String, Integer> recordInsertGenWrappedFunction = generateWrapRecordsFn(isPreppedAPI, cfg, dataGen::generateInserts);
    Pair<String, JavaRDD<WriteStatus>> result = insertFirstBigBatchForClientCleanerTest(cfg, client, recordInsertGenWrappedFunction, insertFn, HoodieCleaningPolicy.KEEP_LATEST_COMMITS);
    client.commit(result.getLeft(), result.getRight());
    HoodieTable table = HoodieSparkTable.create(client.getConfig(), context, metaClient);
    assertTrue(table.getCompletedCleanTimeline().empty());
    insertFirstFailedBigBatchForClientCleanerTest(cfg, client, recordInsertGenWrappedFunction, insertFn, HoodieCleaningPolicy.KEEP_LATEST_COMMITS);
    insertFirstFailedBigBatchForClientCleanerTest(cfg, client, recordInsertGenWrappedFunction, insertFn, HoodieCleaningPolicy.KEEP_LATEST_COMMITS);
    Pair<String, JavaRDD<WriteStatus>> ret = insertFirstFailedBigBatchForClientCleanerTest(cfg, client, recordInsertGenWrappedFunction, insertFn, HoodieCleaningPolicy.KEEP_LATEST_COMMITS);
    // Await till enough time passes such that the last failed commits heartbeats are expired
    await().atMost(10, TimeUnit.SECONDS).until(() -> client.getHeartbeatClient().isHeartbeatExpired(ret.getLeft()));
    List<HoodieCleanStat> cleanStats = runCleaner(cfg);
    assertEquals(0, cleanStats.size(), "Must not clean any files");
    HoodieActiveTimeline timeline = metaClient.reloadActiveTimeline();
    assertTrue(timeline.getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION)).filterCompletedInstants().countInstants() == 3);
    Option<HoodieInstant> rollBackInstantForFailedCommit = timeline.getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION)).filterCompletedInstants().lastInstant();
    HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.deserializeAvroMetadata(timeline.getInstantDetails(rollBackInstantForFailedCommit.get()).get(), HoodieRollbackMetadata.class);
    // Rollback of one of the failed writes should have deleted 3 files
    assertEquals(3, rollbackMetadata.getTotalFilesDeleted());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class TestRollbacksCommand method testShowRollback.

/**
 * Test case for command 'show rollback'.
 */
@Test
public void testShowRollback() throws IOException {
    // get instant
    HoodieActiveTimeline activeTimeline = new RollbacksCommand.RollbackTimeline(HoodieCLI.getTableMetaClient());
    Stream<HoodieInstant> rollback = activeTimeline.getRollbackTimeline().filterCompletedInstants().getInstants();
    HoodieInstant instant = rollback.findFirst().orElse(null);
    assertNotNull(instant, "The instant can not be null.");
    CommandResult cr = shell().executeCommand("show rollback --instant " + instant.getTimestamp());
    assertTrue(cr.isSuccess());
    List<Comparable[]> rows = new ArrayList<>();
    // get metadata of instant
    HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(activeTimeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
    // generate expect result
    metadata.getPartitionMetadata().forEach((key, value) -> Stream.concat(value.getSuccessDeleteFiles().stream().map(f -> Pair.of(f, true)), value.getFailedDeleteFiles().stream().map(f -> Pair.of(f, false))).forEach(fileWithDeleteStatus -> {
        Comparable[] row = new Comparable[5];
        row[0] = metadata.getStartRollbackTime();
        row[1] = metadata.getCommitsRollback().toString();
        row[2] = key;
        row[3] = fileWithDeleteStatus.getLeft();
        row[4] = fileWithDeleteStatus.getRight();
        rows.add(row);
    }));
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION).addTableHeaderField(HoodieTableHeaderFields.HEADER_DELETED_FILE).addTableHeaderField(HoodieTableHeaderFields.HEADER_SUCCEEDED);
    String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BeforeEach(org.junit.jupiter.api.BeforeEach) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) HashMap(java.util.HashMap) DEFAULT_FIRST_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH) ArrayList(java.util.ArrayList) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) DEFAULT_PARTITION_PATHS(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Tag(org.junit.jupiter.api.Tag) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CLIFunctionalTestHarness(org.apache.hudi.cli.functional.CLIFunctionalTestHarness) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) BaseHoodieWriteClient(org.apache.hudi.client.BaseHoodieWriteClient) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) DEFAULT_SECOND_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH) TableHeader(org.apache.hudi.cli.TableHeader) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Test(org.junit.jupiter.api.Test) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) CommandResult(org.springframework.shell.core.CommandResult) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) Pair(org.apache.hudi.common.util.collection.Pair) DEFAULT_THIRD_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ArrayList(java.util.ArrayList) CommandResult(org.springframework.shell.core.CommandResult) Test(org.junit.jupiter.api.Test)

Example 3 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class TestRollbacksCommand method testShowRollbacks.

/**
 * Test case for command 'show rollbacks'.
 */
@Test
public void testShowRollbacks() {
    CommandResult cr = shell().executeCommand("show rollbacks");
    assertTrue(cr.isSuccess());
    // get rollback instants
    HoodieActiveTimeline activeTimeline = new RollbacksCommand.RollbackTimeline(HoodieCLI.getTableMetaClient());
    Stream<HoodieInstant> rollback = activeTimeline.getRollbackTimeline().filterCompletedInstants().getInstants();
    List<Comparable[]> rows = new ArrayList<>();
    rollback.sorted().forEach(instant -> {
        try {
            // get pair of rollback time and instant time
            HoodieRollbackMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(activeTimeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
            metadata.getCommitsRollback().forEach(c -> {
                Comparable[] row = new Comparable[5];
                row[0] = metadata.getStartRollbackTime();
                row[1] = c;
                // expect data
                row[2] = 3;
                row[3] = metadata.getTimeTakenInMillis();
                row[4] = 3;
                rows.add(row);
            });
        } catch (IOException e) {
            e.printStackTrace();
        }
    });
    TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_DELETED).addTableHeaderField(HoodieTableHeaderFields.HEADER_TIME_TOKEN_MILLIS).addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_PARTITIONS);
    String expected = HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) TableHeader(org.apache.hudi.cli.TableHeader) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CommandResult(org.springframework.shell.core.CommandResult) Test(org.junit.jupiter.api.Test)

Example 4 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class TestTimelineUtils method getRestoreMetadata.

private byte[] getRestoreMetadata(String basePath, String partition, String commitTs, int count, String actionType) throws IOException {
    List<HoodieRollbackMetadata> rollbackM = new ArrayList<>();
    rollbackM.add(getRollbackMetadataInstance(basePath, partition, commitTs, count, actionType));
    List<HoodieInstant> rollbackInstants = new ArrayList<>();
    rollbackInstants.add(new HoodieInstant(false, commitTs, actionType));
    HoodieRestoreMetadata metadata = TimelineMetadataUtils.convertRestoreMetadata(commitTs, 200, rollbackInstants, CollectionUtils.createImmutableMap(commitTs, rollbackM));
    return TimelineMetadataUtils.serializeRestoreMetadata(metadata).get();
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) ArrayList(java.util.ArrayList) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata)

Example 5 with HoodieRollbackMetadata

use of org.apache.hudi.avro.model.HoodieRollbackMetadata in project hudi by apache.

the class HoodieTestTable method getRollbackMetadata.

public HoodieRollbackMetadata getRollbackMetadata(String instantTimeToDelete, Map<String, List<String>> partitionToFilesMeta) throws Exception {
    HoodieRollbackMetadata rollbackMetadata = new HoodieRollbackMetadata();
    rollbackMetadata.setCommitsRollback(Collections.singletonList(instantTimeToDelete));
    rollbackMetadata.setStartRollbackTime(instantTimeToDelete);
    Map<String, HoodieRollbackPartitionMetadata> partitionMetadataMap = new HashMap<>();
    for (Map.Entry<String, List<String>> entry : partitionToFilesMeta.entrySet()) {
        HoodieRollbackPartitionMetadata rollbackPartitionMetadata = new HoodieRollbackPartitionMetadata();
        rollbackPartitionMetadata.setPartitionPath(entry.getKey());
        rollbackPartitionMetadata.setSuccessDeleteFiles(entry.getValue());
        rollbackPartitionMetadata.setFailedDeleteFiles(new ArrayList<>());
        long rollbackLogFileSize = 50 + RANDOM.nextInt(500);
        String fileId = UUID.randomUUID().toString();
        String logFileName = logFileName(instantTimeToDelete, fileId, 0);
        FileCreateUtils.createLogFile(basePath, entry.getKey(), instantTimeToDelete, fileId, 0, (int) rollbackLogFileSize);
        rollbackPartitionMetadata.setRollbackLogFiles(createImmutableMap(logFileName, rollbackLogFileSize));
        partitionMetadataMap.put(entry.getKey(), rollbackPartitionMetadata);
    }
    rollbackMetadata.setPartitionMetadata(partitionMetadataMap);
    rollbackMetadata.setInstantsRollback(Collections.singletonList(new HoodieInstantInfo(instantTimeToDelete, HoodieTimeline.ROLLBACK_ACTION)));
    return rollbackMetadata;
}
Also used : HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HashMap(java.util.HashMap) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) HoodieRollbackPartitionMetadata(org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) CollectionUtils.createImmutableMap(org.apache.hudi.common.util.CollectionUtils.createImmutableMap) HashMap(java.util.HashMap)

Aggregations

HoodieRollbackMetadata (org.apache.hudi.avro.model.HoodieRollbackMetadata)23 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)15 ArrayList (java.util.ArrayList)14 List (java.util.List)14 IOException (java.io.IOException)12 HashMap (java.util.HashMap)12 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)12 Map (java.util.Map)10 HoodieRestoreMetadata (org.apache.hudi.avro.model.HoodieRestoreMetadata)8 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)8 Collections (java.util.Collections)7 Collectors (java.util.stream.Collectors)7 Option (org.apache.hudi.common.util.Option)7 ValidationUtils (org.apache.hudi.common.util.ValidationUtils)7 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)7 Logger (org.apache.log4j.Logger)7 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)6 LogManager (org.apache.log4j.LogManager)6 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)5 HoodieInstantInfo (org.apache.hudi.avro.model.HoodieInstantInfo)5