Search in sources :

Example 6 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class HoodieMultiTableDeltaStreamer method populateTableExecutionContextList.

// commonProps are passed as parameter which contain table to config file mapping
private void populateTableExecutionContextList(TypedProperties properties, String configFolder, FileSystem fs, Config config) throws IOException {
    List<String> tablesToBeIngested = getTablesToBeIngested(properties);
    logger.info("tables to be ingested via MultiTableDeltaStreamer : " + tablesToBeIngested);
    TableExecutionContext executionContext;
    for (String table : tablesToBeIngested) {
        String[] tableWithDatabase = table.split("\\.");
        String database = tableWithDatabase.length > 1 ? tableWithDatabase[0] : "default";
        String currentTable = tableWithDatabase.length > 1 ? tableWithDatabase[1] : table;
        String configProp = Constants.INGESTION_PREFIX + database + Constants.DELIMITER + currentTable + Constants.INGESTION_CONFIG_SUFFIX;
        String configFilePath = properties.getString(configProp, Helpers.getDefaultConfigFilePath(configFolder, database, currentTable));
        checkIfTableConfigFileExists(configFolder, fs, configFilePath);
        TypedProperties tableProperties = UtilHelpers.readConfig(fs.getConf(), new Path(configFilePath), new ArrayList<String>()).getProps();
        properties.forEach((k, v) -> {
            if (tableProperties.get(k) == null) {
                tableProperties.setProperty(k.toString(), v.toString());
            }
        });
        final HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config();
        // copy all the values from config to cfg
        String targetBasePath = resetTarget(config, database, currentTable);
        Helpers.deepCopyConfigs(config, cfg);
        String overriddenTargetBasePath = tableProperties.getString(Constants.TARGET_BASE_PATH_PROP, "");
        cfg.targetBasePath = StringUtils.isNullOrEmpty(overriddenTargetBasePath) ? targetBasePath : overriddenTargetBasePath;
        if (cfg.enableMetaSync && StringUtils.isNullOrEmpty(tableProperties.getString(DataSourceWriteOptions.HIVE_TABLE().key(), ""))) {
            throw new HoodieException("Meta sync table field not provided!");
        }
        populateSchemaProviderProps(cfg, tableProperties);
        executionContext = new TableExecutionContext();
        executionContext.setProperties(tableProperties);
        executionContext.setConfig(cfg);
        executionContext.setDatabase(database);
        executionContext.setTableName(currentTable);
        this.tableExecutionContexts.add(executionContext);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 7 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class HoodieFlinkStreamer method main.

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final FlinkStreamerConfig cfg = new FlinkStreamerConfig();
    JCommander cmd = new JCommander(cfg, null, args);
    if (cfg.help || args.length == 0) {
        cmd.usage();
        System.exit(1);
    }
    env.enableCheckpointing(cfg.checkpointInterval);
    env.getConfig().setGlobalJobParameters(cfg);
    // We use checkpoint to trigger write operation, including instant generating and committing,
    // There can only be one checkpoint at one time.
    env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
    env.setStateBackend(cfg.stateBackend);
    if (cfg.flinkCheckPointPath != null) {
        env.getCheckpointConfig().setCheckpointStorage(cfg.flinkCheckPointPath);
    }
    TypedProperties kafkaProps = DFSPropertiesConfiguration.getGlobalProps();
    kafkaProps.putAll(StreamerUtil.appendKafkaProps(cfg));
    // Read from kafka source
    RowType rowType = (RowType) AvroSchemaConverter.convertToDataType(StreamerUtil.getSourceSchema(cfg)).getLogicalType();
    Configuration conf = FlinkStreamerConfig.toFlinkConfig(cfg);
    long ckpTimeout = env.getCheckpointConfig().getCheckpointTimeout();
    int parallelism = env.getParallelism();
    conf.setLong(FlinkOptions.WRITE_COMMIT_ACK_TIMEOUT, ckpTimeout);
    DataStream<RowData> dataStream = env.addSource(new FlinkKafkaConsumer<>(cfg.kafkaTopic, new JsonRowDataDeserializationSchema(rowType, InternalTypeInfo.of(rowType), false, true, TimestampFormat.ISO_8601), kafkaProps)).name("kafka_source").uid("uid_kafka_source");
    if (cfg.transformerClassNames != null && !cfg.transformerClassNames.isEmpty()) {
        Option<Transformer> transformer = StreamerUtil.createTransformer(cfg.transformerClassNames);
        if (transformer.isPresent()) {
            dataStream = transformer.get().apply(dataStream);
        }
    }
    DataStream<HoodieRecord> hoodieRecordDataStream = Pipelines.bootstrap(conf, rowType, parallelism, dataStream);
    DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, hoodieRecordDataStream);
    if (StreamerUtil.needsAsyncCompaction(conf)) {
        Pipelines.compact(conf, pipeline);
    } else {
        Pipelines.clean(conf, pipeline);
    }
    env.execute(cfg.targetTableName);
}
Also used : Transformer(org.apache.hudi.sink.transform.Transformer) Configuration(org.apache.flink.configuration.Configuration) DFSPropertiesConfiguration(org.apache.hudi.common.config.DFSPropertiesConfiguration) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) RowType(org.apache.flink.table.types.logical.RowType) TypedProperties(org.apache.hudi.common.config.TypedProperties) JsonRowDataDeserializationSchema(org.apache.flink.formats.json.JsonRowDataDeserializationSchema) RowData(org.apache.flink.table.data.RowData) JCommander(com.beust.jcommander.JCommander) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 8 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestHiveMetastoreBasedLockProvider method init.

@BeforeEach
public void init() throws Exception {
    TypedProperties properties = new TypedProperties();
    properties.setProperty(HIVE_DATABASE_NAME_PROP_KEY, TEST_DB_NAME);
    properties.setProperty(HIVE_TABLE_NAME_PROP_KEY, TEST_TABLE_NAME);
    properties.setProperty(LOCK_ACQUIRE_NUM_RETRIES_PROP_KEY, DEFAULT_LOCK_ACQUIRE_NUM_RETRIES);
    properties.setProperty(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY, DEFAULT_LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS);
    properties.setProperty(ZK_CONNECT_URL_PROP_KEY, zkService().connectString());
    properties.setProperty(ZK_PORT_PROP_KEY, hiveConf().get("hive.zookeeper.client.port"));
    properties.setProperty(ZK_SESSION_TIMEOUT_MS_PROP_KEY, hiveConf().get("hive.zookeeper.session.timeout"));
    properties.setProperty(ZK_CONNECTION_TIMEOUT_MS_PROP_KEY, String.valueOf(DEFAULT_ZK_CONNECTION_TIMEOUT_MS));
    properties.setProperty(LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, String.valueOf(1000));
    lockConfiguration = new LockConfiguration(properties);
    lockComponent.setTablename(TEST_TABLE_NAME);
}
Also used : LockConfiguration(org.apache.hudi.common.config.LockConfiguration) TypedProperties(org.apache.hudi.common.config.TypedProperties) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 9 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestComplexKeyGenerator method getCommonProps.

private TypedProperties getCommonProps(boolean getComplexRecordKey) {
    TypedProperties properties = new TypedProperties();
    if (getComplexRecordKey) {
        properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key, pii_col");
    } else {
        properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");
    }
    properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE.key(), "true");
    return properties;
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 10 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestComplexKeyGenerator method testSingleValueKeyGenerator.

@Test
public void testSingleValueKeyGenerator() {
    TypedProperties properties = new TypedProperties();
    properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");
    properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "timestamp");
    ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
    assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 1);
    assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 1);
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
    String rowKey = record.get("_row_key").toString();
    String partitionPath = record.get("timestamp").toString();
    HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
    assertEquals("_row_key:" + rowKey, hoodieKey.getRecordKey());
    assertEquals(partitionPath, hoodieKey.getPartitionPath());
    Row row = KeyGeneratorTestUtilities.getRow(record, HoodieTestDataGenerator.AVRO_SCHEMA, AvroConversionUtils.convertAvroSchemaToStructType(HoodieTestDataGenerator.AVRO_SCHEMA));
    Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(row), partitionPath);
    InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
    Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(internalRow, row.schema()), partitionPath);
}
Also used : HoodieKey(org.apache.hudi.common.model.HoodieKey) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Row(org.apache.spark.sql.Row) TypedProperties(org.apache.hudi.common.config.TypedProperties) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Test(org.junit.jupiter.api.Test)

Aggregations

TypedProperties (org.apache.hudi.common.config.TypedProperties)143 Test (org.junit.jupiter.api.Test)47 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)22 JavaRDD (org.apache.spark.api.java.JavaRDD)16 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)15 IOException (java.io.IOException)14 Path (org.apache.hadoop.fs.Path)14 Properties (java.util.Properties)13 GenericRecord (org.apache.avro.generic.GenericRecord)13 SourceFormatAdapter (org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter)12 Row (org.apache.spark.sql.Row)12 BeforeEach (org.junit.jupiter.api.BeforeEach)11 ArrayList (java.util.ArrayList)10 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)10 HoodieKey (org.apache.hudi.common.model.HoodieKey)9 DFSPropertiesConfiguration (org.apache.hudi.common.config.DFSPropertiesConfiguration)8 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)8 Dataset (org.apache.spark.sql.Dataset)8 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)7