use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestAvroDFSSource method prepareDFSSource.
@Override
protected Source prepareDFSSource() {
TypedProperties props = new TypedProperties();
props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
try {
return new AvroDFSSource(props, jsc, sparkSession, schemaProvider);
} catch (IOException e) {
return null;
}
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieIncrSource method readAndAssert.
private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy, Option<String> checkpointToPull, int expectedCount, String expectedCheckpoint) {
Properties properties = new Properties();
properties.setProperty("hoodie.deltastreamer.source.hoodieincr.path", basePath);
properties.setProperty("hoodie.deltastreamer.source.hoodieincr.missing.checkpoint.strategy", missingCheckpointStrategy.name());
TypedProperties typedProperties = new TypedProperties(properties);
HoodieIncrSource incrSource = new HoodieIncrSource(typedProperties, jsc, sparkSession, new TestSchemaProvider(HoodieTestDataGenerator.AVRO_SCHEMA));
// read everything until latest
Pair<Option<Dataset<Row>>, String> batchCheckPoint = incrSource.fetchNextBatch(checkpointToPull, 500);
Assertions.assertNotNull(batchCheckPoint.getValue());
if (expectedCount == 0) {
assertFalse(batchCheckPoint.getKey().isPresent());
} else {
assertEquals(batchCheckPoint.getKey().get().count(), expectedCount);
}
Assertions.assertEquals(batchCheckPoint.getRight(), expectedCheckpoint);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class HoodieTableConfig method modify.
private static void modify(FileSystem fs, Path metadataFolder, Properties modifyProps, BiConsumer<Properties, Properties> modifyFn) {
Path cfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
Path backupCfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE_BACKUP);
try {
// 0. do any recovery from prior attempts.
recoverIfNeeded(fs, cfgPath, backupCfgPath);
// 1. backup the existing properties.
try (FSDataInputStream in = fs.open(cfgPath);
FSDataOutputStream out = fs.create(backupCfgPath, false)) {
FileIOUtils.copy(in, out);
}
// / 2. delete the properties file, reads will go to the backup, until we are done.
fs.delete(cfgPath, false);
// 3. read current props, upsert and save back.
String checksum;
try (FSDataInputStream in = fs.open(backupCfgPath);
FSDataOutputStream out = fs.create(cfgPath, true)) {
Properties props = new TypedProperties();
props.load(in);
modifyFn.accept(props, modifyProps);
checksum = storeProperties(props, out);
}
// 4. verify and remove backup.
try (FSDataInputStream in = fs.open(cfgPath)) {
Properties props = new TypedProperties();
props.load(in);
if (!props.containsKey(TABLE_CHECKSUM.key()) || !props.getProperty(TABLE_CHECKSUM.key()).equals(checksum)) {
// delete the properties file and throw exception indicating update failure
// subsequent writes will recover and update, reads will go to the backup until then
fs.delete(cfgPath, false);
throw new HoodieIOException("Checksum property missing or does not match.");
}
}
fs.delete(backupCfgPath, false);
} catch (IOException e) {
throw new HoodieIOException("Error updating table configs.", e);
}
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieTestSuiteJob method getProperties.
private static TypedProperties getProperties() {
TypedProperties props = new TypedProperties();
props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
props.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
props.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.type", "UNIX_TIMESTAMP");
props.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", "yyyy/MM/dd");
props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/source.avsc");
props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsBasePath + "/input");
props.setProperty("hoodie.datasource.hive_sync.assume_date_partitioning", "true");
props.setProperty("hoodie.datasource.hive_sync.skip_ro_suffix", "true");
props.setProperty("hoodie.datasource.write.keytranslator.class", "org.apache.hudi" + ".DayBasedPartitionPathKeyTranslator");
props.setProperty("hoodie.compact.inline.max.delta.commits", "3");
props.setProperty("hoodie.parquet.max.file.size", "1024000");
props.setProperty("hoodie.compact.inline.max.delta.commits", "0");
props.setProperty("hoodie.index.type", HoodieIndex.IndexType.GLOBAL_SIMPLE.name());
props.setProperty("hoodie.global.simple.index.parallelism", "2");
// Reduce shuffle parallelism, spark hangs when numPartitions >> numRecords to process
props.setProperty("hoodie.insert.shuffle.parallelism", "10");
props.setProperty("hoodie.upsert.shuffle.parallelism", "10");
props.setProperty("hoodie.bulkinsert.shuffle.parallelism", "10");
props.setProperty("hoodie.compact.inline.max.delta.commits", "0");
// Make path selection test suite specific
props.setProperty("hoodie.deltastreamer.source.input.selector", DFSTestSuitePathSelector.class.getName());
// Hive Configs
props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/");
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb1");
props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), "table1");
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr");
props.setProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(), TimestampBasedKeyGenerator.class.getName());
props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider");
props.setProperty("hoodie.write.lock.hivemetastore.database", "testdb1");
props.setProperty("hoodie.write.lock.hivemetastore.table", "table1");
props.setProperty("hoodie.write.lock.zookeeper.url", "127.0.0.1");
props.setProperty("hoodie.write.lock.zookeeper.port", "2828");
props.setProperty("hoodie.write.lock.wait_time_ms", "1200000");
props.setProperty("hoodie.write.lock.num_retries", "10");
props.setProperty("hoodie.write.lock.zookeeper.lock_key", "test_table");
props.setProperty("hoodie.write.lock.zookeeper.zk_base_path", "/test");
return props;
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHoodieTestSuiteJob method initClass.
@BeforeAll
public static void initClass() throws Exception {
UtilitiesTestBase.initClass();
// prepare the configs.
UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + BASE_PROPERTIES_DOCKER_DEMO_RELATIVE_PATH, dfs, dfsBasePath + "/base.properties");
UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + SOURCE_SCHEMA_DOCKER_DEMO_RELATIVE_PATH, dfs, dfsBasePath + "/source.avsc");
UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + TARGET_SCHEMA_DOCKER_DEMO_RELATIVE_PATH, dfs, dfsBasePath + "/target.avsc");
UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + COW_DAG_SOURCE_PATH, dfs, dfsBasePath + "/" + COW_DAG_FILE_NAME);
UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + MOR_DAG_SOURCE_PATH, dfs, dfsBasePath + "/" + MOR_DAG_FILE_NAME);
TypedProperties props = getProperties();
UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-source" + ".properties");
UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + COW_DAG_SPARK_DATASOURCE_NODES_RELATIVE_PATH, dfs, dfsBasePath + "/" + COW_DAG_FILE_NAME_SPARK_DATASOURCE_NODES);
UtilitiesTestBase.Helpers.savePropsToDFS(getProperties(), dfs, dfsBasePath + "/test-source" + ".properties");
UtilitiesTestBase.Helpers.copyToDFSFromAbsolutePath(System.getProperty("user.dir") + "/.." + SPARK_SQL_DAG_SOURCE_PATH, dfs, dfsBasePath + "/" + SPARK_SQL_DAG_FILE_NAME);
// Properties used for the delta-streamer which incrementally pulls from upstream DFS Avro source and
// writes to downstream hudi table
TypedProperties downstreamProps = new TypedProperties();
downstreamProps.setProperty("include", "base.properties");
downstreamProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "timestamp");
// Source schema is the target schema of upstream table
downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/source.avsc");
UtilitiesTestBase.Helpers.savePropsToDFS(downstreamProps, dfs, dfsBasePath + "/test-downstream-source.properties");
// these tests cause a lot of log verbosity from spark, turning it down
Logger.getLogger("org.apache.spark").setLevel(Level.WARN);
}
Aggregations