Search in sources :

Example 91 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestAvroDFSSource method prepareDFSSource.

@Override
protected Source prepareDFSSource() {
    TypedProperties props = new TypedProperties();
    props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
    try {
        return new AvroDFSSource(props, jsc, sparkSession, schemaProvider);
    } catch (IOException e) {
        return null;
    }
}
Also used : IOException(java.io.IOException) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 92 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieIncrSource method readAndAssert.

private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy, Option<String> checkpointToPull, int expectedCount, String expectedCheckpoint) {
    Properties properties = new Properties();
    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.path", basePath);
    properties.setProperty("hoodie.deltastreamer.source.hoodieincr.missing.checkpoint.strategy", missingCheckpointStrategy.name());
    TypedProperties typedProperties = new TypedProperties(properties);
    HoodieIncrSource incrSource = new HoodieIncrSource(typedProperties, jsc, sparkSession, new TestSchemaProvider(HoodieTestDataGenerator.AVRO_SCHEMA));
    // read everything until latest
    Pair<Option<Dataset<Row>>, String> batchCheckPoint = incrSource.fetchNextBatch(checkpointToPull, 500);
    Assertions.assertNotNull(batchCheckPoint.getValue());
    if (expectedCount == 0) {
        assertFalse(batchCheckPoint.getKey().isPresent());
    } else {
        assertEquals(batchCheckPoint.getKey().get().count(), expectedCount);
    }
    Assertions.assertEquals(batchCheckPoint.getRight(), expectedCheckpoint);
}
Also used : Option(org.apache.hudi.common.util.Option) Row(org.apache.spark.sql.Row) Properties(java.util.Properties) TypedProperties(org.apache.hudi.common.config.TypedProperties) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 93 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class HoodieTableConfig method modify.

private static void modify(FileSystem fs, Path metadataFolder, Properties modifyProps, BiConsumer<Properties, Properties> modifyFn) {
    Path cfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
    Path backupCfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE_BACKUP);
    try {
        // 0. do any recovery from prior attempts.
        recoverIfNeeded(fs, cfgPath, backupCfgPath);
        // 1. backup the existing properties.
        try (FSDataInputStream in = fs.open(cfgPath);
            FSDataOutputStream out = fs.create(backupCfgPath, false)) {
            FileIOUtils.copy(in, out);
        }
        // / 2. delete the properties file, reads will go to the backup, until we are done.
        fs.delete(cfgPath, false);
        // 3. read current props, upsert and save back.
        String checksum;
        try (FSDataInputStream in = fs.open(backupCfgPath);
            FSDataOutputStream out = fs.create(cfgPath, true)) {
            Properties props = new TypedProperties();
            props.load(in);
            modifyFn.accept(props, modifyProps);
            checksum = storeProperties(props, out);
        }
        // 4. verify and remove backup.
        try (FSDataInputStream in = fs.open(cfgPath)) {
            Properties props = new TypedProperties();
            props.load(in);
            if (!props.containsKey(TABLE_CHECKSUM.key()) || !props.getProperty(TABLE_CHECKSUM.key()).equals(checksum)) {
                // delete the properties file and throw exception indicating update failure
                // subsequent writes will recover and update, reads will go to the backup until then
                fs.delete(cfgPath, false);
                throw new HoodieIOException("Checksum property missing or does not match.");
            }
        }
        fs.delete(backupCfgPath, false);
    } catch (IOException e) {
        throw new HoodieIOException("Error updating table configs.", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Properties(java.util.Properties) TypedProperties(org.apache.hudi.common.config.TypedProperties) OrderedProperties(org.apache.hudi.common.config.OrderedProperties) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 94 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieTestSuiteJob method getProperties.

private static TypedProperties getProperties() {
    TypedProperties props = new TypedProperties();
    props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
    props.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
    props.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.type", "UNIX_TIMESTAMP");
    props.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", "yyyy/MM/dd");
    props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
    props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/source.avsc");
    props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsBasePath + "/input");
    props.setProperty("hoodie.datasource.hive_sync.assume_date_partitioning", "true");
    props.setProperty("hoodie.datasource.hive_sync.skip_ro_suffix", "true");
    props.setProperty("hoodie.datasource.write.keytranslator.class", "org.apache.hudi" + ".DayBasedPartitionPathKeyTranslator");
    props.setProperty("hoodie.compact.inline.max.delta.commits", "3");
    props.setProperty("hoodie.parquet.max.file.size", "1024000");
    props.setProperty("hoodie.compact.inline.max.delta.commits", "0");
    props.setProperty("hoodie.index.type", HoodieIndex.IndexType.GLOBAL_SIMPLE.name());
    props.setProperty("hoodie.global.simple.index.parallelism", "2");
    // Reduce shuffle parallelism, spark hangs when numPartitions >> numRecords to process
    props.setProperty("hoodie.insert.shuffle.parallelism", "10");
    props.setProperty("hoodie.upsert.shuffle.parallelism", "10");
    props.setProperty("hoodie.bulkinsert.shuffle.parallelism", "10");
    props.setProperty("hoodie.compact.inline.max.delta.commits", "0");
    // Make path selection test suite specific
    props.setProperty("hoodie.deltastreamer.source.input.selector", DFSTestSuitePathSelector.class.getName());
    // Hive Configs
    props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/");
    props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb1");
    props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), "table1");
    props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr");
    props.setProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(), TimestampBasedKeyGenerator.class.getName());
    props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider");
    props.setProperty("hoodie.write.lock.hivemetastore.database", "testdb1");
    props.setProperty("hoodie.write.lock.hivemetastore.table", "table1");
    props.setProperty("hoodie.write.lock.zookeeper.url", "127.0.0.1");
    props.setProperty("hoodie.write.lock.zookeeper.port", "2828");
    props.setProperty("hoodie.write.lock.wait_time_ms", "1200000");
    props.setProperty("hoodie.write.lock.num_retries", "10");
    props.setProperty("hoodie.write.lock.zookeeper.lock_key", "test_table");
    props.setProperty("hoodie.write.lock.zookeeper.zk_base_path", "/test");
    return props;
}
Also used : TimestampBasedKeyGenerator(org.apache.hudi.keygen.TimestampBasedKeyGenerator) DFSTestSuitePathSelector(org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 95 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHoodieTestSuiteJob method initClass.

@BeforeAll
public static void initClass() throws Exception {
    UtilitiesTestBase.initClass();
    // prepare the configs.
    UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + BASE_PROPERTIES_DOCKER_DEMO_RELATIVE_PATH, dfs, dfsBasePath + "/base.properties");
    UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + SOURCE_SCHEMA_DOCKER_DEMO_RELATIVE_PATH, dfs, dfsBasePath + "/source.avsc");
    UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + TARGET_SCHEMA_DOCKER_DEMO_RELATIVE_PATH, dfs, dfsBasePath + "/target.avsc");
    UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + COW_DAG_SOURCE_PATH, dfs, dfsBasePath + "/" + COW_DAG_FILE_NAME);
    UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + MOR_DAG_SOURCE_PATH, dfs, dfsBasePath + "/" + MOR_DAG_FILE_NAME);
    TypedProperties props = getProperties();
    UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-source" + ".properties");
    UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + COW_DAG_SPARK_DATASOURCE_NODES_RELATIVE_PATH, dfs, dfsBasePath + "/" + COW_DAG_FILE_NAME_SPARK_DATASOURCE_NODES);
    UtilitiesTestBase.Helpers.savePropsToDFS(getProperties(), dfs, dfsBasePath + "/test-source" + ".properties");
    UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + SPARK_SQL_DAG_SOURCE_PATH, dfs, dfsBasePath + "/" + SPARK_SQL_DAG_FILE_NAME);
    // Properties used for the delta-streamer which incrementally pulls from upstream DFS Avro source and
    // writes to downstream hudi table
    TypedProperties downstreamProps = new TypedProperties();
    downstreamProps.setProperty("include", "base.properties");
    downstreamProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
    downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
    // Source schema is the target schema of upstream table
    downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
    downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/source.avsc");
    UtilitiesTestBase.Helpers.savePropsToDFS(downstreamProps, dfs, dfsBasePath + "/test-downstream-source.properties");
    // these tests cause a lot of log verbosity from spark, turning it down
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN);
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties) BeforeAll(org.junit.jupiter.api.BeforeAll)

Aggregations

TypedProperties (org.apache.hudi.common.config.TypedProperties)143 Test (org.junit.jupiter.api.Test)47 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)22 JavaRDD (org.apache.spark.api.java.JavaRDD)16 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)15 IOException (java.io.IOException)14 Path (org.apache.hadoop.fs.Path)14 Properties (java.util.Properties)13 GenericRecord (org.apache.avro.generic.GenericRecord)13 SourceFormatAdapter (org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter)12 Row (org.apache.spark.sql.Row)12 BeforeEach (org.junit.jupiter.api.BeforeEach)11 ArrayList (java.util.ArrayList)10 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)10 HoodieKey (org.apache.hudi.common.model.HoodieKey)9 DFSPropertiesConfiguration (org.apache.hudi.common.config.DFSPropertiesConfiguration)8 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)8 Dataset (org.apache.spark.sql.Dataset)8 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)7