Search in sources :

Example 1 with CleanMetadataMigrator

use of org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator in project hudi by apache.

the class TestCleaner method testCleanMetadataUpgradeDowngrade.

@Test
public void testCleanMetadataUpgradeDowngrade() {
    String instantTime = "000";
    String partition1 = DEFAULT_PARTITION_PATHS[0];
    String partition2 = DEFAULT_PARTITION_PATHS[1];
    String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
    String fileName1 = "data1_1_000" + extension;
    String fileName2 = "data2_1_000" + extension;
    String filePath1 = metaClient.getBasePath() + "/" + partition1 + "/" + fileName1;
    String filePath2 = metaClient.getBasePath() + "/" + partition1 + "/" + fileName2;
    List<String> deletePathPatterns1 = Arrays.asList(filePath1, filePath2);
    List<String> successDeleteFiles1 = Collections.singletonList(filePath1);
    List<String> failedDeleteFiles1 = Collections.singletonList(filePath2);
    // create partition1 clean stat.
    HoodieCleanStat cleanStat1 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, partition1, deletePathPatterns1, successDeleteFiles1, failedDeleteFiles1, instantTime);
    List<String> deletePathPatterns2 = new ArrayList<>();
    List<String> successDeleteFiles2 = new ArrayList<>();
    List<String> failedDeleteFiles2 = new ArrayList<>();
    // create partition2 empty clean stat.
    HoodieCleanStat cleanStat2 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS, partition2, deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2, instantTime);
    // map with absolute file path.
    Map<String, Tuple3> oldExpected = new HashMap<>();
    oldExpected.put(partition1, new Tuple3<>(deletePathPatterns1, successDeleteFiles1, failedDeleteFiles1));
    oldExpected.put(partition2, new Tuple3<>(deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2));
    // map with relative path.
    Map<String, Tuple3> newExpected = new HashMap<>();
    newExpected.put(partition1, new Tuple3<>(Arrays.asList(fileName1, fileName2), Collections.singletonList(fileName1), Collections.singletonList(fileName2)));
    newExpected.put(partition2, new Tuple3<>(deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2));
    HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata(instantTime, Option.of(0L), Arrays.asList(cleanStat1, cleanStat2));
    metadata.setVersion(CleanerUtils.CLEAN_METADATA_VERSION_1);
    // NOw upgrade and check
    CleanMetadataMigrator metadataMigrator = new CleanMetadataMigrator(metaClient);
    metadata = metadataMigrator.upgradeToLatest(metadata, metadata.getVersion());
    assertCleanMetadataPathEquals(newExpected, metadata);
    CleanMetadataMigrator migrator = new CleanMetadataMigrator(metaClient);
    HoodieCleanMetadata oldMetadata = migrator.migrateToVersion(metadata, metadata.getVersion(), CleanerUtils.CLEAN_METADATA_VERSION_1);
    assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion());
    assertCleanMetadataEquals(metadata, oldMetadata);
    assertCleanMetadataPathEquals(oldExpected, oldMetadata);
    HoodieCleanMetadata newMetadata = migrator.upgradeToLatest(oldMetadata, oldMetadata.getVersion());
    assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion());
    assertCleanMetadataEquals(oldMetadata, newMetadata);
    assertCleanMetadataPathEquals(newExpected, newMetadata);
    assertCleanMetadataPathEquals(oldExpected, oldMetadata);
}
Also used : HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HashMap(java.util.HashMap) Tuple3(scala.Tuple3) CleanMetadataMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator) ArrayList(java.util.ArrayList) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 2 with CleanMetadataMigrator

use of org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator in project hudi by apache.

the class CleanerUtils method getCleanerMetadata.

/**
 * Get Latest Version of Hoodie Cleaner Metadata - Output of cleaner operation.
 * @param metaClient Hoodie Table Meta Client
 * @param cleanInstant Instant referring to clean action
 * @return Latest version of Clean metadata corresponding to clean instant
 * @throws IOException
 */
public static HoodieCleanMetadata getCleanerMetadata(HoodieTableMetaClient metaClient, HoodieInstant cleanInstant) throws IOException {
    CleanMetadataMigrator metadataMigrator = new CleanMetadataMigrator(metaClient);
    HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(metaClient.getActiveTimeline().readCleanerInfoAsBytes(cleanInstant).get());
    return metadataMigrator.upgradeToLatest(cleanMetadata, cleanMetadata.getVersion());
}
Also used : CleanMetadataMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata)

Aggregations

HoodieCleanMetadata (org.apache.hudi.avro.model.HoodieCleanMetadata)2 CleanMetadataMigrator (org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HoodieCleanStat (org.apache.hudi.common.HoodieCleanStat)1 Test (org.junit.jupiter.api.Test)1 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)1 Tuple3 (scala.Tuple3)1