use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.
the class TestUpgradeDowngrade method testUpgradeZeroToOneInternal.
public void testUpgradeZeroToOneInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
// init config, table and client.
Map<String, String> params = new HashMap<>();
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
// prepare data. Make 2 commits, in which 2nd is not committed.
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
HoodieTable table = this.getHoodieTable(metaClient, cfg);
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
markerPaths.remove(toDeleteMarkerFile);
}
// set hoodie.table.version to 0 in hoodie.properties file
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ZERO);
if (induceResiduesFromPrevUpgrade) {
createResidualFile();
}
// should re-create marker files for 2nd commit since its pending.
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.ONE, null);
// assert marker files
assertMarkerFilesForUpgrade(table, commitInstant, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
// verify hoodie.table.version got upgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ONE.versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.ONE);
// trigger 3rd commit with marker based rollback enabled.
/* HUDI-2310
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, true);
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
assertRows(inputRecords.getKey(), thirdBatch);
if (induceResiduesFromPrevUpgrade) {
assertFalse(dfs.exists(new Path(metaClient.getMetaPath(), SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE)));
}*/
}
use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.
the class TestUpgradeDowngrade method testUpgradeOneToTwo.
@ParameterizedTest
@EnumSource(value = HoodieTableType.class)
public void testUpgradeOneToTwo(HoodieTableType tableType) throws IOException {
// init config, table and client.
Map<String, String> params = new HashMap<>();
addNewTableParamsToProps(params);
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
// Write inserts
doInsert(client);
// downgrade table props
downgradeTableConfigsFromTwoToOne(cfg);
// perform upgrade
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.TWO, null);
// verify hoodie.table.version got upgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.TWO.versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.TWO);
// verify table props
assertTableProps(cfg);
}
use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.
the class TestUpgradeDowngrade method testDowngrade.
@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
// init config, table and client.
Map<String, String> params = new HashMap<>();
if (fromVersion == HoodieTableVersion.TWO) {
addNewTableParamsToProps(params);
}
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true).withMarkersType(markerType.name()).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
if (fromVersion == HoodieTableVersion.TWO) {
// set table configs
HoodieTableConfig tableConfig = metaClient.getTableConfig();
tableConfig.setValue(HoodieTableConfig.NAME, cfg.getTableName());
tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
tableConfig.setValue(BASE_FILE_FORMAT, cfg.getString(BASE_FILE_FORMAT));
}
// prepare data. Make 2 commits, in which 2nd is not committed.
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
HoodieTable table = this.getHoodieTable(metaClient, cfg);
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
markerPaths.remove(toDeleteMarkerFile);
}
// set hoodie.table.version to fromVersion in hoodie.properties file
HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
if (fromVersion == HoodieTableVersion.TWO) {
prepForDowngradeFromTwoToOne();
toVersion = HoodieTableVersion.ONE;
} else {
prepForDowngradeFromOneToZero();
}
// downgrade should be performed. all marker files should be deleted
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(toVersion, null);
if (fromVersion == HoodieTableVersion.TWO) {
// assert marker files
assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
}
// verify hoodie.table.version got downgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
assertRows(inputRecords.getKey(), thirdBatch);
*/
}
use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.
the class TestTimelineLayout method testTimelineLayoutFilter.
@Test
public void testTimelineLayoutFilter() {
List<HoodieInstant> rawInstants = Arrays.asList(new HoodieInstant(State.REQUESTED, HoodieTimeline.CLEAN_ACTION, "001"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.CLEAN_ACTION, "001"), new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, "001"), new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, "002"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "002"), new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "002"), new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "003"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "003"), new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "003"), new HoodieInstant(State.REQUESTED, HoodieTimeline.CLEAN_ACTION, "004"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.CLEAN_ACTION, "004"), new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, "005"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "005"), new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "005"), new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "006"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "006"), new HoodieInstant(State.REQUESTED, HoodieTimeline.DELTA_COMMIT_ACTION, "007"), new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "007"));
List<HoodieInstant> layout0Instants = TimelineLayout.getLayout(new TimelineLayoutVersion(0)).filterHoodieInstants(rawInstants.stream()).collect(Collectors.toList());
assertEquals(rawInstants, layout0Instants);
List<HoodieInstant> layout1Instants = TimelineLayout.getLayout(TimelineLayoutVersion.CURR_LAYOUT_VERSION).filterHoodieInstants(rawInstants.stream()).collect(Collectors.toList());
assertEquals(7, layout1Instants.size());
assertTrue(layout1Instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "007")));
assertTrue(layout1Instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "006")));
assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "005")));
assertTrue(layout1Instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.CLEAN_ACTION, "004")));
assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "003")));
assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "002")));
assertTrue(layout1Instants.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, "001")));
}
use of org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion in project hudi by apache.
the class TestUpgradeDowngrade method testUpgradeDowngradeBetweenThreeAndCurrentVersion.
@Test
public void testUpgradeDowngradeBetweenThreeAndCurrentVersion() throws IOException {
// init config, table and client.
Map<String, String> params = new HashMap<>();
addNewTableParamsToProps(params);
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
// write inserts
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
doInsert(client);
// current version should have TABLE_CHECKSUM key
assertEquals(HoodieTableVersion.current(), metaClient.getTableConfig().getTableVersion());
assertTableVersionFromPropertyFile(HoodieTableVersion.current());
assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
String checksum = metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key());
// downgrade to version 3 and check TABLE_CHECKSUM is still present
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.THREE, null);
assertEquals(HoodieTableVersion.THREE.versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.THREE);
assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
// remove TABLE_CHECKSUM and upgrade to current version
metaClient.getTableConfig().getProps().remove(HoodieTableConfig.TABLE_CHECKSUM.key());
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.current(), null);
// verify upgrade and TABLE_CHECKSUM
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(HoodieTableVersion.current().versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.current());
assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
}
Aggregations