use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.
the class TestUpgradeDowngrade method testDowngrade.
@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
// init config, table and client.
Map<String, String> params = new HashMap<>();
if (fromVersion == HoodieTableVersion.TWO) {
addNewTableParamsToProps(params);
}
if (tableType == HoodieTableType.MERGE_ON_READ) {
params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
}
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true).withMarkersType(markerType.name()).withProps(params).build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
if (fromVersion == HoodieTableVersion.TWO) {
// set table configs
HoodieTableConfig tableConfig = metaClient.getTableConfig();
tableConfig.setValue(HoodieTableConfig.NAME, cfg.getTableName());
tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
tableConfig.setValue(BASE_FILE_FORMAT, cfg.getString(BASE_FILE_FORMAT));
}
// prepare data. Make 2 commits, in which 2nd is not committed.
List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
HoodieTable table = this.getHoodieTable(metaClient, cfg);
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
markerPaths.remove(toDeleteMarkerFile);
}
// set hoodie.table.version to fromVersion in hoodie.properties file
HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
if (fromVersion == HoodieTableVersion.TWO) {
prepForDowngradeFromTwoToOne();
toVersion = HoodieTableVersion.ONE;
} else {
prepForDowngradeFromOneToZero();
}
// downgrade should be performed. all marker files should be deleted
new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(toVersion, null);
if (fromVersion == HoodieTableVersion.TWO) {
// assert marker files
assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
}
// verify hoodie.table.version got downgraded
metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);
// Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
assertRows(inputRecords.getKey(), thirdBatch);
*/
}
use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.
the class MarkerUtils method readMarkerType.
/**
* Reads the marker type from `MARKERS.type` file.
*
* @param fileSystem file system to use.
* @param markerDir marker directory.
* @return the marker type, or empty if the marker type file does not exist.
*/
public static Option<MarkerType> readMarkerType(FileSystem fileSystem, String markerDir) {
Path markerTypeFilePath = new Path(markerDir, MARKER_TYPE_FILENAME);
FSDataInputStream fsDataInputStream = null;
Option<MarkerType> content = Option.empty();
try {
if (!doesMarkerTypeFileExist(fileSystem, markerDir)) {
return Option.empty();
}
fsDataInputStream = fileSystem.open(markerTypeFilePath);
content = Option.of(MarkerType.valueOf(FileIOUtils.readAsUTFString(fsDataInputStream)));
} catch (IOException e) {
throw new HoodieIOException("Cannot read marker type file " + markerTypeFilePath.toString() + "; " + e.getMessage(), e);
} finally {
closeQuietly(fsDataInputStream);
}
return content;
}
use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.
the class MarkerBasedRollbackUtils method getAllMarkerPaths.
/**
* Gets all marker paths.
*
* @param table instance of {@code HoodieTable} to use
* @param context instance of {@code HoodieEngineContext} to use
* @param instant instant of interest to rollback
* @param parallelism parallelism to use
* @return a list of all markers
* @throws IOException
*/
public static List<String> getAllMarkerPaths(HoodieTable table, HoodieEngineContext context, String instant, int parallelism) throws IOException {
String markerDir = table.getMetaClient().getMarkerFolderPath(instant);
FileSystem fileSystem = table.getMetaClient().getFs();
Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
// If there is no marker type file "MARKERS.type", we assume "DIRECT" markers are used
if (!markerTypeOption.isPresent()) {
WriteMarkers writeMarkers = WriteMarkersFactory.get(MarkerType.DIRECT, table, instant);
return new ArrayList<>(writeMarkers.allMarkerFilePaths());
}
switch(markerTypeOption.get()) {
case TIMELINE_SERVER_BASED:
// Reads all markers written by the timeline server
Map<String, Set<String>> markersMap = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(markerDir, fileSystem, context, parallelism);
return markersMap.values().stream().flatMap(Collection::stream).collect(Collectors.toCollection(ArrayList::new));
default:
throw new HoodieException("The marker type \"" + markerTypeOption.get().name() + "\" is not supported.");
}
}
use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.
the class TwoToOneDowngradeHandler method convertToDirectMarkers.
/**
* Converts the markers in new format(timeline server based) to old format of direct markers,
* i.e., one marker file per data file, without MARKERS.type file.
* This needs to be idempotent.
* 1. read all markers from timeline server based marker files
* 2. create direct style markers
* 3. delete marker type file
* 4. delete timeline server based marker files
*
* @param commitInstantTime instant of interest for marker conversion.
* @param table instance of {@link HoodieTable} to use
* @param context instance of {@link HoodieEngineContext} to use
* @param parallelism parallelism to use
*/
private void convertToDirectMarkers(final String commitInstantTime, HoodieTable table, HoodieEngineContext context, int parallelism) throws IOException {
String markerDir = table.getMetaClient().getMarkerFolderPath(commitInstantTime);
FileSystem fileSystem = FSUtils.getFs(markerDir, context.getHadoopConf().newCopy());
Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
if (markerTypeOption.isPresent()) {
switch(markerTypeOption.get()) {
case TIMELINE_SERVER_BASED:
// Reads all markers written by the timeline server
Map<String, Set<String>> markersMap = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(markerDir, fileSystem, context, parallelism);
DirectWriteMarkers directWriteMarkers = new DirectWriteMarkers(table, commitInstantTime);
// Recreates the markers in the direct format
markersMap.values().stream().flatMap(Collection::stream).forEach(directWriteMarkers::create);
// Deletes marker type file
MarkerUtils.deleteMarkerTypeFile(fileSystem, markerDir);
// Deletes timeline server based markers
deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
break;
default:
throw new HoodieException("The marker type \"" + markerTypeOption.get().name() + "\" is not supported for rollback.");
}
} else {
if (fileSystem.exists(new Path(markerDir))) {
// In case of partial failures during downgrade, there is a chance that marker type file was deleted,
// but timeline server based marker files are left. So deletes them if any
deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
}
}
}
Aggregations