Search in sources :

Example 1 with HoodieTableConfig

use of org.apache.hudi.common.table.HoodieTableConfig in project hudi by apache.

the class HoodieRowDataCreateHandle method makeNewPath.

private Path makeNewPath(String partitionPath) {
    Path path = FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath);
    try {
        if (!fs.exists(path)) {
            // create a new partition as needed.
            fs.mkdirs(path);
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to make dir " + path, e);
    }
    HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
    return new Path(path.toString(), FSUtils.makeDataFileName(instantTime, getWriteToken(), fileId, tableConfig.getBaseFileFormat().getFileExtension()));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 2 with HoodieTableConfig

use of org.apache.hudi.common.table.HoodieTableConfig in project hudi by apache.

the class TestUpgradeDowngrade method testDowngrade.

@ParameterizedTest(name = TEST_NAME_WITH_DOWNGRADE_PARAMS)
@MethodSource("downGradeConfigParams")
public void testDowngrade(boolean deletePartialMarkerFiles, HoodieTableType tableType, HoodieTableVersion fromVersion) throws IOException {
    MarkerType markerType = fromVersion == HoodieTableVersion.TWO ? MarkerType.TIMELINE_SERVER_BASED : MarkerType.DIRECT;
    // init config, table and client.
    Map<String, String> params = new HashMap<>();
    if (fromVersion == HoodieTableVersion.TWO) {
        addNewTableParamsToProps(params);
    }
    if (tableType == HoodieTableType.MERGE_ON_READ) {
        params.put(TYPE.key(), HoodieTableType.MERGE_ON_READ.name());
        metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
    }
    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(true).withMarkersType(markerType.name()).withProps(params).build();
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    if (fromVersion == HoodieTableVersion.TWO) {
        // set table configs
        HoodieTableConfig tableConfig = metaClient.getTableConfig();
        tableConfig.setValue(HoodieTableConfig.NAME, cfg.getTableName());
        tableConfig.setValue(HoodieTableConfig.PARTITION_FIELDS, cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
        tableConfig.setValue(HoodieTableConfig.RECORDKEY_FIELDS, cfg.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
        tableConfig.setValue(BASE_FILE_FORMAT, cfg.getString(BASE_FILE_FORMAT));
    }
    // prepare data. Make 2 commits, in which 2nd is not committed.
    List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
    List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
    Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, secondPartitionCommit2FileSlices, cfg, client, false);
    HoodieTable table = this.getHoodieTable(metaClient, cfg);
    HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
    // delete one of the marker files in 2nd commit if need be.
    WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstant.getTimestamp());
    List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
    if (deletePartialMarkerFiles) {
        String toDeleteMarkerFile = markerPaths.get(0);
        table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
        markerPaths.remove(toDeleteMarkerFile);
    }
    // set hoodie.table.version to fromVersion in hoodie.properties file
    HoodieTableVersion toVersion = HoodieTableVersion.ZERO;
    if (fromVersion == HoodieTableVersion.TWO) {
        prepForDowngradeFromTwoToOne();
        toVersion = HoodieTableVersion.ONE;
    } else {
        prepForDowngradeFromOneToZero();
    }
    // downgrade should be performed. all marker files should be deleted
    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(toVersion, null);
    if (fromVersion == HoodieTableVersion.TWO) {
        // assert marker files
        assertMarkerFilesForDowngrade(table, commitInstant, toVersion == HoodieTableVersion.ONE);
    }
    // verify hoodie.table.version got downgraded
    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath()).setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), toVersion.versionCode());
    assertTableVersionFromPropertyFile(toVersion);
// trigger 3rd commit with marker based rollback disabled.
/* HUDI-2310
    List<HoodieRecord> thirdBatch = triggerCommit("003", tableType, false);

    // Check the entire dataset has all records only from 1st commit and 3rd commit since 2nd is expected to be rolledback.
    assertRows(inputRecords.getKey(), thirdBatch);
     */
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTable(org.apache.hudi.table.HoodieTable) WriteMarkers(org.apache.hudi.table.marker.WriteMarkers) MarkerType(org.apache.hudi.common.table.marker.MarkerType) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) List(java.util.List) ArrayList(java.util.ArrayList) HoodieTableVersion(org.apache.hudi.common.table.HoodieTableVersion) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 3 with HoodieTableConfig

use of org.apache.hudi.common.table.HoodieTableConfig in project hudi by apache.

the class TestRepairsCommand method testOverwriteHoodieProperties.

/**
 * Test case for 'repair overwrite-hoodie-props'.
 */
@Test
public void testOverwriteHoodieProperties() throws IOException {
    URL newProps = this.getClass().getClassLoader().getResource("table-config.properties");
    assertNotNull(newProps, "New property file must exist");
    CommandResult cr = shell().executeCommand("repair overwrite-hoodie-props --new-props-file " + newProps.getPath());
    assertTrue(cr.isSuccess());
    Map<String, String> oldProps = HoodieCLI.getTableMetaClient().getTableConfig().propsMap();
    // after overwrite, the stored value in .hoodie is equals to which read from properties.
    HoodieTableConfig tableConfig = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig();
    Map<String, String> result = tableConfig.propsMap();
    // validate table checksum
    assertTrue(result.containsKey(TABLE_CHECKSUM.key()));
    assertTrue(validateChecksum(tableConfig.getProps()));
    Properties expectProps = new Properties();
    expectProps.load(new FileInputStream(newProps.getPath()));
    Map<String, String> expected = expectProps.entrySet().stream().collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
    expected.putIfAbsent(TABLE_CHECKSUM.key(), String.valueOf(generateChecksum(tableConfig.getProps())));
    assertEquals(expected, result);
    // check result
    List<String> allPropsStr = Arrays.asList(NAME.key(), TYPE.key(), VERSION.key(), ARCHIVELOG_FOLDER.key(), TIMELINE_LAYOUT_VERSION.key(), TABLE_CHECKSUM.key());
    String[][] rows = allPropsStr.stream().sorted().map(key -> new String[] { key, oldProps.getOrDefault(key, "null"), result.getOrDefault(key, "null") }).toArray(String[][]::new);
    String expect = HoodiePrintHelper.print(new String[] { HoodieTableHeaderFields.HEADER_HOODIE_PROPERTY, HoodieTableHeaderFields.HEADER_OLD_VALUE, HoodieTableHeaderFields.HEADER_NEW_VALUE }, rows);
    expect = removeNonWordAndStripSpace(expect);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expect, got);
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) ARCHIVELOG_FOLDER(org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) URL(java.net.URL) VERSION(org.apache.hudi.common.table.HoodieTableConfig.VERSION) TYPE(org.apache.hudi.common.table.HoodieTableConfig.TYPE) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieTableHeaderFields(org.apache.hudi.cli.HoodieTableHeaderFields) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieTableConfig.generateChecksum(org.apache.hudi.common.table.HoodieTableConfig.generateChecksum) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) NAME(org.apache.hudi.common.table.HoodieTableConfig.NAME) Tag(org.junit.jupiter.api.Tag) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) CLIFunctionalTestHarness(org.apache.hudi.cli.functional.CLIFunctionalTestHarness) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) Properties(java.util.Properties) Files(java.nio.file.Files) TABLE_CHECKSUM(org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM) HoodieTestCommitMetadataGenerator(org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Collectors(java.util.stream.Collectors) HoodieCLI(org.apache.hudi.cli.HoodieCLI) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) Paths(java.nio.file.Paths) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) CommandResult(org.springframework.shell.core.CommandResult) HoodiePrintHelper(org.apache.hudi.cli.HoodiePrintHelper) TIMELINE_LAYOUT_VERSION(org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_LAYOUT_VERSION) HoodieTableConfig.validateChecksum(org.apache.hudi.common.table.HoodieTableConfig.validateChecksum) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Properties(java.util.Properties) URL(java.net.URL) FileInputStream(java.io.FileInputStream) CommandResult(org.springframework.shell.core.CommandResult) Test(org.junit.jupiter.api.Test)

Example 4 with HoodieTableConfig

use of org.apache.hudi.common.table.HoodieTableConfig in project hudi by apache.

the class MultipleSparkJobExecutionStrategy method readRecordsForGroupWithLogs.

/**
 * Read records from baseFiles, apply updates and convert to RDD.
 */
private HoodieData<HoodieRecord<T>> readRecordsForGroupWithLogs(JavaSparkContext jsc, List<ClusteringOperation> clusteringOps, String instantTime) {
    HoodieWriteConfig config = getWriteConfig();
    HoodieTable table = getHoodieTable();
    return HoodieJavaRDD.of(jsc.parallelize(clusteringOps, clusteringOps.size()).mapPartitions(clusteringOpsPartition -> {
        List<Iterator<HoodieRecord<T>>> recordIterators = new ArrayList<>();
        clusteringOpsPartition.forEachRemaining(clusteringOp -> {
            long maxMemoryPerCompaction = IOUtils.getMaxMemoryPerCompaction(new SparkTaskContextSupplier(), config);
            LOG.info("MaxMemoryPerCompaction run as part of clustering => " + maxMemoryPerCompaction);
            try {
                Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
                HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder().withFileSystem(table.getMetaClient().getFs()).withBasePath(table.getMetaClient().getBasePath()).withLogFilePaths(clusteringOp.getDeltaFilePaths()).withReaderSchema(readerSchema).withLatestInstantTime(instantTime).withMaxMemorySizeInBytes(maxMemoryPerCompaction).withReadBlocksLazily(config.getCompactionLazyBlockReadEnabled()).withReverseReader(config.getCompactionReverseLogReadEnabled()).withBufferSize(config.getMaxDFSStreamBufferSize()).withSpillableMapBasePath(config.getSpillableMapBasePath()).withPartition(clusteringOp.getPartitionPath()).build();
                Option<HoodieFileReader> baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath()) ? Option.empty() : Option.of(HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())));
                HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
                recordIterators.add(getFileSliceReader(baseFileReader, scanner, readerSchema, tableConfig.getPayloadClass(), tableConfig.getPreCombineField(), tableConfig.populateMetaFields() ? Option.empty() : Option.of(Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()))));
            } catch (IOException e) {
                throw new HoodieClusteringException("Error reading input data for " + clusteringOp.getDataFilePath() + " and " + clusteringOp.getDeltaFilePaths(), e);
            }
        });
        return new ConcatenatingIterator<>(recordIterators);
    }));
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) RewriteAvroPayload(org.apache.hudi.common.model.RewriteAvroPayload) ConcatenatingIterator(org.apache.hudi.client.utils.ConcatenatingIterator) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) Logger(org.apache.log4j.Logger) RDDSpatialCurveSortPartitioner(org.apache.hudi.execution.bulkinsert.RDDSpatialCurveSortPartitioner) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) PLAN_STRATEGY_SORT_COLUMNS(org.apache.hudi.config.HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) ClusteringOperation(org.apache.hudi.common.model.ClusteringOperation) Collectors(java.util.stream.Collectors) List(java.util.List) Stream(java.util.stream.Stream) KeyGenUtils(org.apache.hudi.keygen.KeyGenUtils) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) CompletableFuture(java.util.concurrent.CompletableFuture) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieFileSliceReader.getFileSliceReader(org.apache.hudi.common.table.log.HoodieFileSliceReader.getFileSliceReader) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) ClusteringExecutionStrategy(org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy) IndexedRecord(org.apache.avro.generic.IndexedRecord) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) RDDCustomColumnsSortPartitioner(org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) FutureUtils(org.apache.hudi.common.util.FutureUtils) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) IOException(java.io.IOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) IOUtils(org.apache.hudi.io.IOUtils) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieClusteringException(org.apache.hudi.exception.HoodieClusteringException) HoodieTable(org.apache.hudi.table.HoodieTable) List(java.util.List) ArrayList(java.util.ArrayList) Option(org.apache.hudi.common.util.Option)

Example 5 with HoodieTableConfig

use of org.apache.hudi.common.table.HoodieTableConfig in project hudi by apache.

the class HoodieRowCreateHandle method makeNewPath.

private Path makeNewPath(String partitionPath) {
    Path path = FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath);
    try {
        if (!fs.exists(path)) {
            // create a new partition as needed.
            fs.mkdirs(path);
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to make dir " + path, e);
    }
    HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
    return new Path(path.toString(), FSUtils.makeDataFileName(instantTime, getWriteToken(), fileId, tableConfig.getBaseFileFormat().getFileExtension()));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieTableConfig (org.apache.hudi.common.table.HoodieTableConfig)8 IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 HoodieIOException (org.apache.hudi.exception.HoodieIOException)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Schema (org.apache.avro.Schema)3 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)3 HoodieTable (org.apache.hudi.table.HoodieTable)3 Map (java.util.Map)2 Properties (java.util.Properties)2 Collectors (java.util.stream.Collectors)2 TimelineLayoutVersion (org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion)2 FileInputStream (java.io.FileInputStream)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 URL (java.net.URL)1 Files (java.nio.file.Files)1 Paths (java.nio.file.Paths)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1