use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class CompactionTestBase method moveCompactionFromRequestedToInflight.
protected void moveCompactionFromRequestedToInflight(String compactionInstantTime, HoodieWriteConfig cfg) {
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant);
HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants().filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get();
assertTrue(instant.isInflight(), "Instant must be marked inflight");
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestUpgradeDowngrade method testUpgradeZeroToOneInternal.
public void testUpgradeZeroToOneInternal(boolean induceResiduesFromPrevUpgrade, boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException {
// init config, table and client.
Map<String, String> params = new HashMap<>();
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
// prepare data. Make 2 commits, in which 2nd is not committed.
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
HoodieTable table = this.getHoodieTable(metaClient, cfg);
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
markerPaths.remove(toDeleteMarkerFile);
}
// set hoodie.table.version to 0 in hoodie.properties file
metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ZERO);
if (induceResiduesFromPrevUpgrade) {
createResidualFile();
}
// should re-create marker files for 2nd commit since its pending.
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.ONE, null);
// assert marker files
assertMarkerFilesForUpgrade(table, commitInstant, firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices);
// verify hoodie.table.version got upgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ONE.versionCode());
assertTableVersionFromPropertyFile(HoodieTableVersion.ONE);
// trigger 3rd commit with marker based rollback enabled.
/* HUDI-2310
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, true);
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
assertRows(inputRecords.getKey(), thirdBatch);
if (induceResiduesFromPrevUpgrade) {
assertFalse(dfs.exists(new Path(metaClient.getMetaPath(), SparkUpgradeDowngrade.HOODIE_UPDATED_PROPERTY_FILE)));
}*/
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestUpgradeDowngrade method testDowngrade.
@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
// init config, table and client.
Map<String, String> params = new HashMap<>();
if (fromVersion == HoodieTableVersion.TWO) {
addNewTableParamsToProps(params);
}
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true).withMarkersType(markerType.name()).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
if (fromVersion == HoodieTableVersion.TWO) {
// set table configs
HoodieTableConfig tableConfig = metaClient.getTableConfig();
tableConfig.setValue(HoodieTableConfig.NAME, cfg.getTableName());
tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
tableConfig.setValue(BASE_FILE_FORMAT, cfg.getString(BASE_FILE_FORMAT));
}
// prepare data. Make 2 commits, in which 2nd is not committed.
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
HoodieTable table = this.getHoodieTable(metaClient, cfg);
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
markerPaths.remove(toDeleteMarkerFile);
}
// set hoodie.table.version to fromVersion in hoodie.properties file
HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
if (fromVersion == HoodieTableVersion.TWO) {
prepForDowngradeFromTwoToOne();
toVersion = HoodieTableVersion.ONE;
} else {
prepForDowngradeFromOneToZero();
}
// downgrade should be performed. all marker files should be deleted
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(toVersion, null);
if (fromVersion == HoodieTableVersion.TWO) {
// assert marker files
assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
}
// verify hoodie.table.version got downgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
assertRows(inputRecords.getKey(), thirdBatch);
*/
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class HoodieClientTestUtils method getLatestFileIDsToFullPath.
private static HashMap<String, String> getLatestFileIDsToFullPath(String basePath, HoodieTimeline commitTimeline, List<HoodieInstant> commitsToReturn) throws IOException {
HashMap<String, String> fileIdToFullPath = new HashMap<>();
for (HoodieInstant commit : commitsToReturn) {
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit).get(), HoodieCommitMetadata.class);
fileIdToFullPath.putAll(metadata.getFileIdAndFullPaths(basePath));
}
return fileIdToFullPath;
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class HoodieClientTestUtils method readCommit.
public static Dataset<Row> readCommit(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline, String instantTime, boolean filterByCommitTime) {
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
if (!commitTimeline.containsInstant(commitInstant)) {
throw new HoodieException("No commit exists at " + instantTime);
}
try {
HashMap<String, String> paths = getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant));
LOG.info("Path :" + paths.values());
Dataset<Row> unFilteredRows = null;
if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.PARQUET)) {
unFilteredRows = sqlContext.read().parquet(paths.values().toArray(new String[paths.size()]));
} else if (HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.ORC)) {
unFilteredRows = sqlContext.read().orc(paths.values().toArray(new String[paths.size()]));
}
if (unFilteredRows != null) {
if (filterByCommitTime) {
return unFilteredRows.filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime));
} else {
return unFilteredRows;
}
} else {
return sqlContext.emptyDataFrame();
}
} catch (Exception e) {
throw new HoodieException("Error reading commit " + instantTime, e);
}
}
Aggregations