Search in sources :

Example 1 with MarkerType

use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.

the class TestUpgradeDowngrade method testDowngrade.

@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
    MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
    // init config, table and client.
    Map<String, String> params = new HashMap<>();
    if (fromVersion == HoodieTableVersion.TWO) {
        addNewTableParamsToProps(params);
    }
    if (tableType == HoodieTableType.MERGE_ON_READ) {
        params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
        metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
    }
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true).withMarkersType(markerType.name()).withProps(params).build();
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    if (fromVersion == HoodieTableVersion.TWO) {
        // set table configs
        HoodieTableConfig tableConfig = metaClient.getTableConfig();
        tableConfig.setValue(HoodieTableConfig.NAME, cfg.getTableName());
        tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
        tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
        tableConfig.setValue(BASE_FILE_FORMAT, cfg.getString(BASE_FILE_FORMAT));
    }
    // prepare data. Make 2 commits, in which 2nd is not committed.
    List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
    List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
    Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
    // delete one of the marker files in 2nd commit if need be.
    WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
    List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
    if (deletePartialMarkerFiles) {
        String toDeleteMarkerFile = markerPaths.get(0);
        table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
        markerPaths.remove(toDeleteMarkerFile);
    }
    // set hoodie.table.version to fromVersion in hoodie.properties file
    HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
    if (fromVersion == HoodieTableVersion.TWO) {
        prepForDowngradeFromTwoToOne();
        toVersion = HoodieTableVersion.ONE;
    } else {
        prepForDowngradeFromOneToZero();
    }
    // downgrade should be performed. all marker files should be deleted
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(toVersion, null);
    if (fromVersion == HoodieTableVersion.TWO) {
        // assert marker files
        assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
    }
    // verify hoodie.table.version got downgraded
    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
    assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
    List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);

    // Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
    assertRows(inputRecords.getKey(), thirdBatch);
     */
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTable(org.apache.hudi.table.HoodieTable) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) MarkerType(org.apache.hudi.common.table.marker.MarkerType) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) List(java.util.List) ArrayList(java.util.ArrayList) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 2 with MarkerType

use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.

the class MarkerUtils method readMarkerType.

/**
 * Reads the marker type from `MARKERS.type` file.
 *
 * @param fileSystem file system to use.
 * @param markerDir  marker directory.
 * @return the marker type, or empty if the marker type file does not exist.
 */
public static Option<MarkerType> readMarkerType(FileSystem fileSystem, String markerDir) {
    Path markerTypeFilePath = new Path(markerDir, MARKER_TYPE_FILENAME);
    FSDataInputStream fsDataInputStream = null;
    Option<MarkerType> content = Option.empty();
    try {
        if (!doesMarkerTypeFileExist(fileSystem, markerDir)) {
            return Option.empty();
        }
        fsDataInputStream = fileSystem.open(markerTypeFilePath);
        content = Option.of(MarkerType.valueOf(FileIOUtils.readAsUTFString(fsDataInputStream)));
    } catch (IOException e) {
        throw new HoodieIOException("Cannot read marker type file " + markerTypeFilePath.toString() + "; " + e.getMessage(), e);
    } finally {
        closeQuietly(fsDataInputStream);
    }
    return content;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) MarkerType(org.apache.hudi.common.table.marker.MarkerType) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 3 with MarkerType

use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.

the class MarkerBasedRollbackUtils method getAllMarkerPaths.

/**
 * Gets all marker paths.
 *
 * @param table       instance of {@code HoodieTable} to use
 * @param context     instance of {@code HoodieEngineContext} to use
 * @param instant     instant of interest to rollback
 * @param parallelism parallelism to use
 * @return a list of all markers
 * @throws IOException
 */
public static List<String> getAllMarkerPaths(HoodieTable table, HoodieEngineContext context, String instant, int parallelism) throws IOException {
    String markerDir = table.getMetaClient().getMarkerFolderPath(instant);
    FileSystem fileSystem = table.getMetaClient().getFs();
    Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
    // If there is no marker type file "MARKERS.type", we assume "DIRECT" markers are used
    if (!markerTypeOption.isPresent()) {
        WriteMarkers writeMarkers = WriteMarkersFactory.get(MarkerType.DIRECT, table, instant);
        return new ArrayList<>(writeMarkers.allMarkerFilePaths());
    }
    switch(markerTypeOption.get()) {
        case TIMELINE_SERVER_BASED:
            // Reads all markers written by the timeline server
            Map<String, Set<String>> markersMap = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(markerDir, fileSystem, context, parallelism);
            return markersMap.values().stream().flatMap(Collection::stream).collect(Collectors.toCollection(ArrayList::new));
        default:
            throw new HoodieException("The marker type \"" + markerTypeOption.get().name() + "\" is not supported.");
    }
}
Also used : Set(java.util.Set) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieException(org.apache.hudi.exception.HoodieException)

Example 4 with MarkerType

use of org.apache.hudi.common.table.marker.MarkerType in project hudi by apache.

the class TwoToOneDowngradeHandler method convertToDirectMarkers.

/**
 * Converts the markers in new format(timeline server based) to old format of direct markers,
 * i.e., one marker file per data file, without MARKERS.type file.
 * This needs to be idempotent.
 * 1. read all markers from timeline server based marker files
 * 2. create direct style markers
 * 3. delete marker type file
 * 4. delete timeline server based marker files
 *
 * @param commitInstantTime instant of interest for marker conversion.
 * @param table             instance of {@link HoodieTable} to use
 * @param context           instance of {@link HoodieEngineContext} to use
 * @param parallelism       parallelism to use
 */
private void convertToDirectMarkers(final String commitInstantTime, HoodieTable table, HoodieEngineContext context, int parallelism) throws IOException {
    String markerDir = table.getMetaClient().getMarkerFolderPath(commitInstantTime);
    FileSystem fileSystem = FSUtils.getFs(markerDir, context.getHadoopConf().newCopy());
    Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
    if (markerTypeOption.isPresent()) {
        switch(markerTypeOption.get()) {
            case TIMELINE_SERVER_BASED:
                // Reads all markers written by the timeline server
                Map<String, Set<String>> markersMap = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(markerDir, fileSystem, context, parallelism);
                DirectWriteMarkers directWriteMarkers = new DirectWriteMarkers(table, commitInstantTime);
                // Recreates the markers in the direct format
                markersMap.values().stream().flatMap(Collection::stream).forEach(directWriteMarkers::create);
                // Deletes marker type file
                MarkerUtils.deleteMarkerTypeFile(fileSystem, markerDir);
                // Deletes timeline server based markers
                deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
                break;
            default:
                throw new HoodieException("The marker type \"" + markerTypeOption.get().name() + "\" is not supported for rollback.");
        }
    } else {
        if (fileSystem.exists(new Path(markerDir))) {
            // In case of partial failures during downgrade, there is a chance that marker type file was deleted,
            // but timeline server based marker files are left.  So deletes them if any
            deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
        }
    }
}
Also used : DirectWriteMarkers(org.apache.hudi.table.marker.DirectWriteMarkers) Path(org.apache.hadoop.fs.Path) Set(java.util.Set) FileSystem(org.apache.hadoop.fs.FileSystem) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieException(org.apache.hudi.exception.HoodieException)

Aggregations

MarkerType (org.apache.hudi.common.table.marker.MarkerType)4 Path (org.apache.hadoop.fs.Path)3 ArrayList (java.util.ArrayList)2 Set (java.util.Set)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 HoodieException (org.apache.hudi.exception.HoodieException)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 List (java.util.List)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)1 FileSlice (org.apache.hudi.common.model.FileSlice)1 HoodieTableConfig (org.apache.hudi.common.table.HoodieTableConfig)1 HoodieTableVersion (org.apache.hudi.common.table.HoodieTableVersion)1 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)1 TimelineLayoutVersion (org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion)1 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)1 HoodieIOException (org.apache.hudi.exception.HoodieIOException)1 HoodieTable (org.apache.hudi.table.HoodieTable)1 DirectWriteMarkers (org.apache.hudi.table.marker.DirectWriteMarkers)1