Search in sources :

Example 11 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TimelineMetadataUtils method convertRollbackMetadata.

public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime, Option<Long> durationInMs, List<HoodieInstant> instants, List<HoodieRollbackStat> rollbackStats) {
    Map<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder = new HashMap<>();
    int totalDeleted = 0;
    for (HoodieRollbackStat stat : rollbackStats) {
        Map<String, Long> rollbackLogFiles = stat.getCommandBlocksCount().keySet().stream().collect(Collectors.toMap(f -> f.getPath().toString(), FileStatus::getLen));
        HoodieRollbackPartitionMetadata metadata = new HoodieRollbackPartitionMetadata(stat.getPartitionPath(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles(), rollbackLogFiles);
        partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
        totalDeleted += stat.getSuccessDeleteFiles().size();
    }
    return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L), totalDeleted, instants.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()), Collections.unmodifiableMap(partitionMetadataBuilder), DEFAULT_VERSION, instants.stream().map(instant -> new HoodieInstantInfo(instant.getTimestamp(), instant.getAction())).collect(Collectors.toList()));
}
Also used : HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan) FileReader(org.apache.avro.file.FileReader) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieSavepointMetadata(org.apache.hudi.avro.model.HoodieSavepointMetadata) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieRollbackPartitionMetadata(org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata) SpecificData(org.apache.avro.specific.SpecificData) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Schema(org.apache.avro.Schema) DatumWriter(org.apache.avro.io.DatumWriter) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) SpecificRecordBase(org.apache.avro.specific.SpecificRecordBase) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) DatumReader(org.apache.avro.io.DatumReader) List(java.util.List) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieReplaceCommitMetadata(org.apache.hudi.avro.model.HoodieReplaceCommitMetadata) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) HoodieSavepointPartitionMetadata(org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Collections(java.util.Collections) DataFileReader(org.apache.avro.file.DataFileReader) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HashMap(java.util.HashMap) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) HoodieRollbackPartitionMetadata(org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata)

Example 12 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TestRollbackUtils method testMergeRollbackStat.

@Test
public void testMergeRollbackStat() {
    String partitionPath1 = "/partitionPath1/";
    String partitionPath2 = "/partitionPath2/";
    // prepare HoodieRollbackStat for different partition
    Map<FileStatus, Boolean> dataFilesOnlyStat1Files = new HashMap<>();
    dataFilesOnlyStat1Files.put(generateFileStatus(partitionPath1 + "dataFile1" + BASE_FILE_EXTENSION), true);
    dataFilesOnlyStat1Files.put(generateFileStatus(partitionPath1 + "dataFile2" + BASE_FILE_EXTENSION), true);
    HoodieRollbackStat dataFilesOnlyStat1 = HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath1).withDeletedFileResults(dataFilesOnlyStat1Files).build();
    Map<FileStatus, Boolean> dataFilesOnlyStat2Files = new HashMap<>();
    dataFilesOnlyStat2Files.put(generateFileStatus(partitionPath2 + "dataFile1" + BASE_FILE_EXTENSION), true);
    dataFilesOnlyStat2Files.put(generateFileStatus(partitionPath2 + "dataFile2" + BASE_FILE_EXTENSION), true);
    HoodieRollbackStat dataFilesOnlyStat2 = HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath2).withDeletedFileResults(dataFilesOnlyStat1Files).build();
    // 1. test different partitionpath merge
    assertThrows(IllegalArgumentException.class, () -> {
        RollbackUtils.mergeRollbackStat(dataFilesOnlyStat1, dataFilesOnlyStat2);
    }, "different partition rollbackstat merge will failed");
    // prepare HoodieRollbackStat for failed and block append
    Map<FileStatus, Boolean> dataFilesOnlyStat3Files = new HashMap<>();
    dataFilesOnlyStat3Files.put(generateFileStatus(partitionPath1 + "dataFile1.log"), true);
    dataFilesOnlyStat3Files.put(generateFileStatus(partitionPath1 + "dataFile3" + BASE_FILE_EXTENSION), false);
    HoodieRollbackStat dataFilesOnlyStat3 = HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath1).withDeletedFileResults(dataFilesOnlyStat3Files).build();
    Map<FileStatus, Long> dataFilesOnlyStat4Files = new HashMap<>();
    dataFilesOnlyStat4Files.put(generateFileStatus(partitionPath1 + "dataFile1.log"), 10L);
    HoodieRollbackStat dataFilesOnlyStat4 = HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath1).withRollbackBlockAppendResults(dataFilesOnlyStat4Files).build();
    // 2. test merge dataFilesOnlyStat1 and dataFilesOnlyStat3
    HoodieRollbackStat dataFilesOnlyStatMerge1 = RollbackUtils.mergeRollbackStat(dataFilesOnlyStat1, dataFilesOnlyStat3);
    assertEquals(partitionPath1, dataFilesOnlyStatMerge1.getPartitionPath());
    assertIterableEquals(CollectionUtils.createImmutableList(partitionPath1 + "dataFile3" + BASE_FILE_EXTENSION), dataFilesOnlyStatMerge1.getFailedDeleteFiles());
    assertIterableEquals(CollectionUtils.createImmutableList(partitionPath1 + "dataFile1" + BASE_FILE_EXTENSION, partitionPath1 + "dataFile2" + BASE_FILE_EXTENSION, partitionPath1 + "dataFile1.log").stream().sorted().collect(Collectors.toList()), dataFilesOnlyStatMerge1.getSuccessDeleteFiles().stream().sorted().collect(Collectors.toList()));
    assertEquals(0, dataFilesOnlyStatMerge1.getCommandBlocksCount().size());
    // 3. test merge dataFilesOnlyStatMerge1 and dataFilesOnlyStat4
    HoodieRollbackStat dataFilesOnlyStatMerge2 = RollbackUtils.mergeRollbackStat(dataFilesOnlyStatMerge1, dataFilesOnlyStat4);
    assertEquals(partitionPath1, dataFilesOnlyStatMerge1.getPartitionPath());
    assertIterableEquals(CollectionUtils.createImmutableList(partitionPath1 + "dataFile3" + BASE_FILE_EXTENSION).stream().sorted().collect(Collectors.toList()), dataFilesOnlyStatMerge2.getFailedDeleteFiles().stream().sorted().collect(Collectors.toList()));
    assertIterableEquals(CollectionUtils.createImmutableList(partitionPath1 + "dataFile1" + BASE_FILE_EXTENSION, partitionPath1 + "dataFile2" + BASE_FILE_EXTENSION, partitionPath1 + "dataFile1.log").stream().sorted().collect(Collectors.toList()), dataFilesOnlyStatMerge2.getSuccessDeleteFiles().stream().sorted().collect(Collectors.toList()));
    assertEquals(CollectionUtils.createImmutableMap(generateFileStatus(partitionPath1 + "dataFile1.log"), 10L), dataFilesOnlyStatMerge2.getCommandBlocksCount());
}
Also used : HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) Test(org.junit.jupiter.api.Test)

Example 13 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TestMarkerBasedRollbackStrategy method testMergeOnReadRollback.

@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testMergeOnReadRollback(boolean useFileListingMetadata) throws Exception {
    // init MERGE_ON_READ_TABLE
    tearDown();
    tableType = HoodieTableType.MERGE_ON_READ;
    setUp();
    HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(true).withAutoCommit(false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(useFileListingMetadata).build()).withPath(basePath).build();
    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
    try (SparkRDDWriteClient writeClient = new SparkRDDWriteClient(engineContext, writeConfig)) {
        // rollback 2nd commit and ensure stats reflect the info.
        List<HoodieRollbackStat> stats = testRun(useFileListingMetadata, writeConfig, writeClient);
        assertEquals(3, stats.size());
        for (HoodieRollbackStat stat : stats) {
            assertEquals(0, stat.getSuccessDeleteFiles().size());
            assertEquals(0, stat.getFailedDeleteFiles().size());
            assertEquals(1, stat.getCommandBlocksCount().size());
            stat.getCommandBlocksCount().forEach((fileStatus, len) -> assertTrue(fileStatus.getPath().getName().contains(HoodieFileFormat.HOODIE_LOG.getFileExtension())));
        }
    }
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 14 with HoodieRollbackStat

use of org.apache.hudi.common.HoodieRollbackStat in project hudi by apache.

the class TestIncrementalFSViewSync method performRestore.

/**
 * Simulate Restore of an instant in timeline and fsview.
 *
 * @param instant Instant to be rolled-back
 * @param files List of files to be deleted as part of rollback
 * @param rollbackInstant Restore Instant
 */
private void performRestore(HoodieInstant instant, List<String> files, String rollbackInstant, boolean isRestore) throws IOException {
    Map<String, List<String>> partititonToFiles = deleteFiles(files);
    List<HoodieRollbackStat> rollbackStats = partititonToFiles.entrySet().stream().map(e -> new HoodieRollbackStat(e.getKey(), e.getValue(), new ArrayList<>(), new HashMap<>())).collect(Collectors.toList());
    List<HoodieInstant> rollbacks = new ArrayList<>();
    rollbacks.add(instant);
    HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.convertRollbackMetadata(rollbackInstant, Option.empty(), rollbacks, rollbackStats);
    if (isRestore) {
        List<HoodieRollbackMetadata> rollbackM = new ArrayList<>();
        rollbackM.add(rollbackMetadata);
        HoodieRestoreMetadata metadata = TimelineMetadataUtils.convertRestoreMetadata(rollbackInstant, 100, Collections.singletonList(instant), CollectionUtils.createImmutableMap(rollbackInstant, rollbackM));
        HoodieInstant restoreInstant = new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, rollbackInstant);
        metaClient.getActiveTimeline().createNewInstant(restoreInstant);
        metaClient.getActiveTimeline().saveAsComplete(restoreInstant, TimelineMetadataUtils.serializeRestoreMetadata(metadata));
    } else {
        metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, rollbackInstant));
        metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, rollbackInstant), TimelineMetadataUtils.serializeRollbackMetadata(rollbackMetadata));
    }
    boolean deleted = metaClient.getFs().delete(new Path(metaClient.getMetaPath(), instant.getFileName()), false);
    assertTrue(deleted);
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) ArrayList(java.util.ArrayList) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

HoodieRollbackStat (org.apache.hudi.common.HoodieRollbackStat)14 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)9 ArrayList (java.util.ArrayList)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 IOException (java.io.IOException)4 HashMap (java.util.HashMap)4 Test (org.junit.jupiter.api.Test)4 Collections (java.util.Collections)3 List (java.util.List)3 Map (java.util.Map)3 Collectors (java.util.stream.Collectors)3 HoodieRollbackMetadata (org.apache.hudi.avro.model.HoodieRollbackMetadata)3 HoodieTimer (org.apache.hudi.common.util.HoodieTimer)3 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)3 HoodieIOException (org.apache.hudi.exception.HoodieIOException)3 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)3 HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)2 HoodieCompactionPlan (org.apache.hudi.avro.model.HoodieCompactionPlan)2 HoodieRequestedReplaceMetadata (org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata)2 HoodieRestoreMetadata (org.apache.hudi.avro.model.HoodieRestoreMetadata)2