Search in sources :

Example 1 with TimelineLayoutVersion

use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.

the class TestUpgradeDowngrade method testUpgradeZeroToOneInternal.

public void testUpgradeZeroToOneInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
    // init config, table and client.
    Map<String, String> params = new HashMap<>();
    if (tableType == HoodieTableType.MERGE_ON_READ) {
        params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
        metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
    }
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    // prepare data. Make 2 commits, in which 2nd is not committed.
    List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
    List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
    Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
    // delete one of the marker files in 2nd commit if need be.
    WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
    List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
    if (deletePartialMarkerFiles) {
        String toDeleteMarkerFile = markerPaths.get(0);
        table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
        markerPaths.remove(toDeleteMarkerFile);
    }
    // set hoodie.table.version to 0 in hoodie.properties file
    metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ZERO);
    if (induceResiduesFromPrevUpgrade) {
        createResidualFile();
    }
    // should re-create marker files for 2nd commit since its pending.
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.ONE, null);
    // assert marker files
    assertMarkerFilesForUpgrade(table, commitInstant, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
    // verify hoodie.table.version got upgraded
    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ONE.versionCode());
    assertTableVersionFromPropertyFile(HoodieTableVersion.ONE);
// trigger 3rd commit with marker based rollback enabled.
/* HUDI-2310
    List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, true);

    // Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
    assertRows(inputRecords.getKey(), thirdBatch);
    if (induceResiduesFromPrevUpgrade) {
      assertFalse(dfs.exists(new Path(metaClient.getMetaPath(), SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE)));
    }*/
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTable(org.apache.hudi.table.HoodieTable) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) List(java.util.List) ArrayList(java.util.ArrayList)

Example 2 with TimelineLayoutVersion

use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.

the class TestUpgradeDowngrade method testUpgradeOneToTwo.

@ParameterizedTest
@EnumSource(value = HoodieTableType.class)
public void testUpgradeOneToTwo(HoodieTableType tableType) throws IOException {
    // init config, table and client.
    Map<String, String> params = new HashMap<>();
    addNewTableParamsToProps(params);
    if (tableType == HoodieTableType.MERGE_ON_READ) {
        params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
        metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
    }
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    // Write inserts
    doInsert(client);
    // downgrade table props
    downgradeTableConfigsFromTwoToOne(cfg);
    // perform upgrade
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.TWO, null);
    // verify hoodie.table.version got upgraded
    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.TWO.versionCode());
    assertTableVersionFromPropertyFile(HoodieTableVersion.TWO);
    // verify table props
    assertTableProps(cfg);
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with TimelineLayoutVersion

use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.

the class TestUpgradeDowngrade method testDowngrade.

@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
    MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
    // init config, table and client.
    Map<String, String> params = new HashMap<>();
    if (fromVersion == HoodieTableVersion.TWO) {
        addNewTableParamsToProps(params);
    }
    if (tableType == HoodieTableType.MERGE_ON_READ) {
        params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
        metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
    }
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true).withMarkersType(markerType.name()).withProps(params).build();
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    if (fromVersion == HoodieTableVersion.TWO) {
        // set table configs
        HoodieTableConfig tableConfig = metaClient.getTableConfig();
        tableConfig.setValue(HoodieTableConfig.NAME, cfg.getTableName());
        tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
        tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
        tableConfig.setValue(BASE_FILE_FORMAT, cfg.getString(BASE_FILE_FORMAT));
    }
    // prepare data. Make 2 commits, in which 2nd is not committed.
    List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
    List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
    Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
    // delete one of the marker files in 2nd commit if need be.
    WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
    List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
    if (deletePartialMarkerFiles) {
        String toDeleteMarkerFile = markerPaths.get(0);
        table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
        markerPaths.remove(toDeleteMarkerFile);
    }
    // set hoodie.table.version to fromVersion in hoodie.properties file
    HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
    if (fromVersion == HoodieTableVersion.TWO) {
        prepForDowngradeFromTwoToOne();
        toVersion = HoodieTableVersion.ONE;
    } else {
        prepForDowngradeFromOneToZero();
    }
    // downgrade should be performed. all marker files should be deleted
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(toVersion, null);
    if (fromVersion == HoodieTableVersion.TWO) {
        // assert marker files
        assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
    }
    // verify hoodie.table.version got downgraded
    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
    assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
    List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);

    // Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
    assertRows(inputRecords.getKey(), thirdBatch);
     */
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTable(org.apache.hudi.table.HoodieTable) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) MarkerType(org.apache.hudi.common.table.marker.MarkerType) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) List(java.util.List) ArrayList(java.util.ArrayList) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 4 with TimelineLayoutVersion

use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.

the class TestTimelineLayout method testTimelineLayoutFilter.

@Test
public void testTimelineLayoutFilter() {
    List<HoodieInstant> rawInstants = Arrays.asList(new HoodieInstant(State.REQUESTED, HoodieTimeline.CLEAN_ACTION, "001"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.CLEAN_ACTION, "001"), new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, "001"), new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, "002"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "002"), new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "002"), new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "003"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "003"), new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "003"), new HoodieInstant(State.REQUESTED, HoodieTimeline.CLEAN_ACTION, "004"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.CLEAN_ACTION, "004"), new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, "005"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "005"), new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "005"), new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "006"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "006"), new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, "007"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "007"));
    List<HoodieInstant> layout0Instants = TimelineLayout.getLayout(new TimelineLayoutVersion(0)).filterHoodieInstants(rawInstants.stream()).collect(Collectors.toList());
    assertEquals(rawInstants, layout0Instants);
    List<HoodieInstant> layout1Instants = TimelineLayout.getLayout(TimelineLayoutVersion.CURR_LAYOUT_VERSION).filterHoodieInstants(rawInstants.stream()).collect(Collectors.toList());
    assertEquals(7, layout1Instants.size());
    assertTrue(layout1Instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "007")));
    assertTrue(layout1Instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "006")));
    assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "005")));
    assertTrue(layout1Instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.CLEAN_ACTION, "004")));
    assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "003")));
    assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "002")));
    assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, "001")));
}
Also used : TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) Test(org.junit.jupiter.api.Test)

Example 5 with TimelineLayoutVersion

use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.

the class TestUpgradeDowngrade method testUpgradeDowngradeBetweenThreeAndCurrentVersion.

@Test
public void testUpgradeDowngradeBetweenThreeAndCurrentVersion() throws IOException {
    // init config, table and client.
    Map<String, String> params = new HashMap<>();
    addNewTableParamsToProps(params);
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
    // write inserts
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    doInsert(client);
    // current version should have TABLE_CHECKSUM key
    assertEquals(HoodieTableVersion.current(), metaClient.getTableConfig().getTableVersion());
    assertTableVersionFromPropertyFile(HoodieTableVersion.current());
    assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
    String checksum = metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key());
    // downgrade to version 3 and check TABLE_CHECKSUM is still present
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.THREE, null);
    assertEquals(HoodieTableVersion.THREE.versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
    assertTableVersionFromPropertyFile(HoodieTableVersion.THREE);
    assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
    assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
    // remove TABLE_CHECKSUM and upgrade to current version
    metaClient.getTableConfig().getProps().remove(HoodieTableConfig.TABLE_CHECKSUM.key());
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.current(), null);
    // verify upgrade and TABLE_CHECKSUM
    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
    assertEquals(HoodieTableVersion.current().versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
    assertTableVersionFromPropertyFile(HoodieTableVersion.current());
    assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
    assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

TimelineLayoutVersion (org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion)7 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)5 HashMap (java.util.HashMap)4 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)4 Path (org.apache.hadoop.fs.Path)3 Test (org.junit.jupiter.api.Test)3 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 FileSlice (org.apache.hudi.common.model.FileSlice)2 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)2 HoodieTable (org.apache.hudi.table.HoodieTable)2 WriteMarkers (org.apache.hudi.table.marker.WriteMarkers)2 IOException (java.io.IOException)1 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)1 HoodieTableConfig (org.apache.hudi.common.table.HoodieTableConfig)1 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)1 HoodieTableVersion (org.apache.hudi.common.table.HoodieTableVersion)1 MarkerType (org.apache.hudi.common.table.marker.MarkerType)1 HoodieSavepointException (org.apache.hudi.exception.HoodieSavepointException)1