use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class HoodieMultiTableDeltaStreamer method populateTableExecutionContextList.
// commonProps are passed as parameter which contain table to config file mapping
private void populateTableExecutionContextList(TypedProperties properties, String configFolder, FileSystem fs, Config config) throws IOException {
List<String> tablesToBeIngested = getTablesToBeIngested(properties);
logger.info("tables to be ingested via MultiTableDeltaStreamer : " + tablesToBeIngested);
TableExecutionContext executionContext;
for (String table : tablesToBeIngested) {
String[] tableWithDatabase = table.split("\\.");
String database = tableWithDatabase.length > 1 ? tableWithDatabase[0] : "default";
String currentTable = tableWithDatabase.length > 1 ? tableWithDatabase[1] : table;
String configProp = Constants.INGESTION_PREFIX + database + Constants.DELIMITER + currentTable + Constants.INGESTION_CONFIG_SUFFIX;
String configFilePath = properties.getString(configProp, Helpers.getDefaultConfigFilePath(configFolder, database, currentTable));
checkIfTableConfigFileExists(configFolder, fs, configFilePath);
TypedProperties tableProperties = UtilHelpers.readConfig(fs.getConf(), new Path(configFilePath), new ArrayList<String>()).getProps();
properties.forEach((k, v) -> {
if (tableProperties.get(k) == null) {
tableProperties.setProperty(k.toString(), v.toString());
}
});
final HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config();
// copy all the values from config to cfg
String targetBasePath = resetTarget(config, database, currentTable);
Helpers.deepCopyConfigs(config, cfg);
String overriddenTargetBasePath = tableProperties.getString(Constants.TARGET_BASE_PATH_PROP, "");
cfg.targetBasePath = StringUtils.isNullOrEmpty(overriddenTargetBasePath) ? targetBasePath : overriddenTargetBasePath;
if (cfg.enableMetaSync && StringUtils.isNullOrEmpty(tableProperties.getString(DataSourceWriteOptions.HIVE_TABLE().key(), ""))) {
throw new HoodieException("Meta sync table field not provided!");
}
populateSchemaProviderProps(cfg, tableProperties);
executionContext = new TableExecutionContext();
executionContext.setProperties(tableProperties);
executionContext.setConfig(cfg);
executionContext.setDatabase(database);
executionContext.setTableName(currentTable);
this.tableExecutionContexts.add(executionContext);
}
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class HoodieFlinkStreamer method main.
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final FlinkStreamerConfig cfg = new FlinkStreamerConfig();
JCommander cmd = new JCommander(cfg, null, args);
if (cfg.help || args.length == 0) {
cmd.usage();
System.exit(1);
}
env.enableCheckpointing(cfg.checkpointInterval);
env.getConfig().setGlobalJobParameters(cfg);
// We use checkpoint to trigger write operation, including instant generating and committing,
// There can only be one checkpoint at one time.
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
env.setStateBackend(cfg.stateBackend);
if (cfg.flinkCheckPointPath != null) {
env.getCheckpointConfig().setCheckpointStorage(cfg.flinkCheckPointPath);
}
TypedProperties kafkaProps = DFSPropertiesConfiguration.getGlobalProps();
kafkaProps.putAll(StreamerUtil.appendKafkaProps(cfg));
// Read from kafka source
RowType rowType = (RowType) AvroSchemaConverter.convertToDataType(StreamerUtil.getSourceSchema(cfg)).getLogicalType();
Configuration conf = FlinkStreamerConfig.toFlinkConfig(cfg);
long ckpTimeout = env.getCheckpointConfig().getCheckpointTimeout();
int parallelism = env.getParallelism();
conf.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, ckpTimeout);
DataStream<RowData> dataStream = env.addSource(new FlinkKafkaConsumer<>(cfg.kafkaTopic, new JsonRowDataDeserializationSchema(rowType, InternalTypeInfo.of(rowType), false, true, TimestampFormat.ISO_8601), kafkaProps)).name("kafka_source").uid("uid_kafka_source");
if (cfg.transformerClassNames != null && !cfg.transformerClassNames.isEmpty()) {
Option<Transformer> transformer = StreamerUtil.createTransformer(cfg.transformerClassNames);
if (transformer.isPresent()) {
dataStream = transformer.get().apply(dataStream);
}
}
DataStream<HoodieRecord> hoodieRecordDataStream = Pipelines.bootstrap(conf, rowType, parallelism, dataStream);
DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, hoodieRecordDataStream);
if (StreamerUtil.needsAsyncCompaction(conf)) {
Pipelines.compact(conf, pipeline);
} else {
Pipelines.clean(conf, pipeline);
}
env.execute(cfg.targetTableName);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestHiveMetastoreBasedLockProvider method init.
@BeforeEach
public void init() throws Exception {
TypedProperties properties = new TypedProperties();
properties.setProperty(HIVE_DATABASE_NAME_PROP_KEY, TEST_DB_NAME);
properties.setProperty(HIVE_TABLE_NAME_PROP_KEY, TEST_TABLE_NAME);
properties.setProperty(LOCK_ACQUIRE_NUM_RETRIES_PROP_KEY, DEFAULT_LOCK_ACQUIRE_NUM_RETRIES);
properties.setProperty(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY, DEFAULT_LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS);
properties.setProperty(ZK_CONNECT_URL_PROP_KEY, zkService().connectString());
properties.setProperty(ZK_PORT_PROP_KEY, hiveConf().get("hive.zookeeper.client.port"));
properties.setProperty(ZK_SESSION_TIMEOUT_MS_PROP_KEY, hiveConf().get("hive.zookeeper.session.timeout"));
properties.setProperty(ZK_CONNECTION_TIMEOUT_MS_PROP_KEY, String.valueOf(DEFAULT_ZK_CONNECTION_TIMEOUT_MS));
properties.setProperty(LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, String.valueOf(1000));
lockConfiguration = new LockConfiguration(properties);
lockComponent.setTablename(TEST_TABLE_NAME);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestComplexKeyGenerator method getCommonProps.
private TypedProperties getCommonProps(boolean getComplexRecordKey) {
TypedProperties properties = new TypedProperties();
if (getComplexRecordKey) {
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key, pii_col");
} else {
properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");
}
properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE.key(), "true");
return properties;
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestComplexKeyGenerator method testSingleValueKeyGenerator.
@Test
public void testSingleValueKeyGenerator() {
TypedProperties properties = new TypedProperties();
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "timestamp");
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 1);
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 1);
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
String rowKey = record.get("_row_key").toString();
String partitionPath = record.get("timestamp").toString();
HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
assertEquals("_row_key:" + rowKey, hoodieKey.getRecordKey());
assertEquals(partitionPath, hoodieKey.getPartitionPath());
Row row = KeyGeneratorTestUtilities.getRow(record, HoodieTestDataGenerator.AVRO_SCHEMA, AvroConversionUtils.convertAvroSchemaToStructType(HoodieTestDataGenerator.AVRO_SCHEMA));
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(row), partitionPath);
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(internalRow, row.schema()), partitionPath);
}
Aggregations