Search in sources :

Example 1 with HoodieTableVersion

use of org.apache.hudi.common.table.HoodieTableVersion in project hudi by apache.

the class TestUpgradeDowngrade method testDowngrade.

@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
    MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
    // init config, table and client.
    Map<String, String> params = new HashMap<>();
    if (fromVersion == HoodieTableVersion.TWO) {
        addNewTableParamsToProps(params);
    }
    if (tableType == HoodieTableType.MERGE_ON_READ) {
        params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
        metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
    }
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true).withMarkersType(markerType.name()).withProps(params).build();
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    if (fromVersion == HoodieTableVersion.TWO) {
        // set table configs
        HoodieTableConfig tableConfig = metaClient.getTableConfig();
        tableConfig.setValue(HoodieTableConfig.NAME, cfg.getTableName());
        tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
        tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
        tableConfig.setValue(BASE_FILE_FORMAT, cfg.getString(BASE_FILE_FORMAT));
    }
    // prepare data. Make 2 commits, in which 2nd is not committed.
    List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
    List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
    Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
    // delete one of the marker files in 2nd commit if need be.
    WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
    List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
    if (deletePartialMarkerFiles) {
        String toDeleteMarkerFile = markerPaths.get(0);
        table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
        markerPaths.remove(toDeleteMarkerFile);
    }
    // set hoodie.table.version to fromVersion in hoodie.properties file
    HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
    if (fromVersion == HoodieTableVersion.TWO) {
        prepForDowngradeFromTwoToOne();
        toVersion = HoodieTableVersion.ONE;
    } else {
        prepForDowngradeFromOneToZero();
    }
    // downgrade should be performed. all marker files should be deleted
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(toVersion, null);
    if (fromVersion == HoodieTableVersion.TWO) {
        // assert marker files
        assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
    }
    // verify hoodie.table.version got downgraded
    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
    assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
    List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);

    // Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
    assertRows(inputRecords.getKey(), thirdBatch);
     */
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTable(org.apache.hudi.table.HoodieTable) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) MarkerType(org.apache.hudi.common.table.marker.MarkerType) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) List(java.util.List) ArrayList(java.util.ArrayList) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 2 with HoodieTableVersion

use of org.apache.hudi.common.table.HoodieTableVersion in project hudi by apache.

the class UpgradeDowngrade method run.

/**
 * Perform Upgrade or Downgrade steps if required and updated table version if need be.
 * <p>
 * Starting from version 0.6.0, this upgrade/downgrade step will be added in all write paths.
 * <p>
 * Essentially, if a dataset was created using an previous table version in an older release,
 * and Hoodie version was upgraded to a new release with new table version supported,
 * Hoodie table version gets bumped to the new version and there are some upgrade steps need
 * to be executed before doing any writes.
 * <p>
 * Similarly, if a dataset was created using an newer table version in an newer release,
 * and then hoodie was downgraded to an older release or to older Hoodie table version,
 * then some downgrade steps need to be executed before proceeding w/ any writes.
 * <p>
 * Below shows the table version corresponding to the Hudi release:
 * Hudi release -> table version
 * pre 0.6.0 -> v0
 * 0.6.0 to 0.8.0 -> v1
 * 0.9.0 -> v2
 * 0.10.0 to current -> v3
 * <p>
 * On a high level, these are the steps performed
 * <p>
 * Step1 : Understand current hoodie table version and table version from hoodie.properties file
 * Step2 : Delete any left over .updated from previous upgrade/downgrade
 * Step3 : If version are different, perform upgrade/downgrade.
 * Step4 : Copy hoodie.properties -> hoodie.properties.updated with the version updated
 * Step6 : Rename hoodie.properties.updated to hoodie.properties
 * </p>
 *
 * @param toVersion   version to which upgrade or downgrade has to be done.
 * @param instantTime current instant time that should not be touched.
 */
public void run(HoodieTableVersion toVersion, String instantTime) {
    // Fetch version from property file and current version
    HoodieTableVersion fromVersion = metaClient.getTableConfig().getTableVersion();
    if (!needsUpgradeOrDowngrade(toVersion)) {
        return;
    }
    // Perform the actual upgrade/downgrade; this has to be idempotent, for now.
    LOG.info("Attempting to move table from version " + fromVersion + " to " + toVersion);
    Map<ConfigProperty, String> tableProps = new Hashtable<>();
    if (fromVersion.versionCode() < toVersion.versionCode()) {
        // upgrade
        while (fromVersion.versionCode() < toVersion.versionCode()) {
            HoodieTableVersion nextVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() + 1);
            tableProps.putAll(upgrade(fromVersion, nextVersion, instantTime));
            fromVersion = nextVersion;
        }
    } else {
        // downgrade
        while (fromVersion.versionCode() > toVersion.versionCode()) {
            HoodieTableVersion prevVersion = HoodieTableVersion.versionFromCode(fromVersion.versionCode() - 1);
            tableProps.putAll(downgrade(fromVersion, prevVersion, instantTime));
            fromVersion = prevVersion;
        }
    }
    // Write out the current version in hoodie.properties.updated file
    for (Map.Entry<ConfigProperty, String> entry : tableProps.entrySet()) {
        metaClient.getTableConfig().setValue(entry.getKey(), entry.getValue());
    }
    metaClient.getTableConfig().setTableVersion(toVersion);
    HoodieTableConfig.update(metaClient.getFs(), new Path(metaClient.getMetaPath()), metaClient.getTableConfig().getProps());
}
Also used : Path(org.apache.hadoop.fs.Path) Hashtable(java.util.Hashtable) ConfigProperty(org.apache.hudi.common.config.ConfigProperty) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) Map(java.util.Map)

Aggregations

Path (org.apache.hadoop.fs.Path)2 HoodieTableVersion (org.apache.hudi.common.table.HoodieTableVersion)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Hashtable (java.util.Hashtable)1 List (java.util.List)1 Map (java.util.Map)1 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)1 ConfigProperty (org.apache.hudi.common.config.ConfigProperty)1 FileSlice (org.apache.hudi.common.model.FileSlice)1 HoodieTableConfig (org.apache.hudi.common.table.HoodieTableConfig)1 MarkerType (org.apache.hudi.common.table.marker.MarkerType)1 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)1 TimelineLayoutVersion (org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion)1 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)1 HoodieTable (org.apache.hudi.table.HoodieTable)1 WriteMarkers (org.apache.hudi.table.marker.WriteMarkers)1 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)1 MethodSource (org.junit.jupiter.params.provider.MethodSource)1