Search in sources :

Example 16 with HiveSyncConfig

use of org.apache.hudi.hive.HiveSyncConfig in project hudi by apache.

the class TestDataSourceUtils method testBuildHiveSyncConfig.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testBuildHiveSyncConfig(boolean useSyncMode) {
    TypedProperties props = new TypedProperties();
    if (useSyncMode) {
        props.setProperty(DataSourceWriteOptions.HIVE_SYNC_MODE().key(), HMS.name());
        props.setProperty(DataSourceWriteOptions.HIVE_USE_JDBC().key(), String.valueOf(false));
    }
    props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), HIVE_DATABASE);
    props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), HIVE_TABLE);
    HiveSyncConfig hiveSyncConfig = DataSourceUtils.buildHiveSyncConfig(props, config.getBasePath(), PARQUET.name());
    if (useSyncMode) {
        assertFalse(hiveSyncConfig.useJdbc);
        assertEquals(HMS.name(), hiveSyncConfig.syncMode);
    } else {
        assertTrue(hiveSyncConfig.useJdbc);
        assertNull(hiveSyncConfig.syncMode);
    }
    assertEquals(HIVE_DATABASE, hiveSyncConfig.databaseName);
    assertEquals(HIVE_TABLE, hiveSyncConfig.tableName);
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 17 with HiveSyncConfig

use of org.apache.hudi.hive.HiveSyncConfig in project hudi by apache.

the class DataSourceUtils method buildHiveSyncConfig.

public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String basePath, String baseFileFormat) {
    checkRequiredProperties(props, Collections.singletonList(DataSourceWriteOptions.HIVE_TABLE().key()));
    HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
    hiveSyncConfig.basePath = basePath;
    hiveSyncConfig.usePreApacheInputFormat = props.getBoolean(DataSourceWriteOptions.HIVE_USE_PRE_APACHE_INPUT_FORMAT().key(), Boolean.parseBoolean(DataSourceWriteOptions.HIVE_USE_PRE_APACHE_INPUT_FORMAT().defaultValue()));
    hiveSyncConfig.databaseName = props.getString(DataSourceWriteOptions.HIVE_DATABASE().key(), DataSourceWriteOptions.HIVE_DATABASE().defaultValue());
    hiveSyncConfig.tableName = props.getString(DataSourceWriteOptions.HIVE_TABLE().key());
    hiveSyncConfig.baseFileFormat = baseFileFormat;
    hiveSyncConfig.hiveUser = props.getString(DataSourceWriteOptions.HIVE_USER().key(), DataSourceWriteOptions.HIVE_USER().defaultValue());
    hiveSyncConfig.hivePass = props.getString(DataSourceWriteOptions.HIVE_PASS().key(), DataSourceWriteOptions.HIVE_PASS().defaultValue());
    hiveSyncConfig.jdbcUrl = props.getString(DataSourceWriteOptions.HIVE_URL().key(), DataSourceWriteOptions.HIVE_URL().defaultValue());
    hiveSyncConfig.metastoreUris = props.getString(DataSourceWriteOptions.METASTORE_URIS().key(), DataSourceWriteOptions.METASTORE_URIS().defaultValue());
    hiveSyncConfig.partitionFields = props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), ",", new ArrayList<>());
    hiveSyncConfig.partitionValueExtractorClass = props.getString(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), SlashEncodedDayPartitionValueExtractor.class.getName());
    hiveSyncConfig.useJdbc = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_USE_JDBC().key(), DataSourceWriteOptions.HIVE_USE_JDBC().defaultValue()));
    if (props.containsKey(DataSourceWriteOptions.HIVE_SYNC_MODE().key())) {
        hiveSyncConfig.syncMode = props.getString(DataSourceWriteOptions.HIVE_SYNC_MODE().key());
    }
    hiveSyncConfig.autoCreateDatabase = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE().key(), DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE().defaultValue()));
    hiveSyncConfig.ignoreExceptions = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS().key(), DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS().defaultValue()));
    hiveSyncConfig.skipROSuffix = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE().key(), DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE().defaultValue()));
    hiveSyncConfig.supportTimestamp = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SUPPORT_TIMESTAMP_TYPE().key(), DataSourceWriteOptions.HIVE_SUPPORT_TIMESTAMP_TYPE().defaultValue()));
    hiveSyncConfig.isConditionalSync = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_CONDITIONAL_SYNC().key(), DataSourceWriteOptions.HIVE_CONDITIONAL_SYNC().defaultValue()));
    hiveSyncConfig.bucketSpec = props.getBoolean(DataSourceWriteOptions.HIVE_SYNC_BUCKET_SYNC().key(), (boolean) DataSourceWriteOptions.HIVE_SYNC_BUCKET_SYNC().defaultValue()) ? HiveSyncConfig.getBucketSpec(props.getString(HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD.key()), props.getInteger(HoodieIndexConfig.BUCKET_INDEX_NUM_BUCKETS.key())) : null;
    if (props.containsKey(HiveExternalCatalog.CREATED_SPARK_VERSION())) {
        hiveSyncConfig.sparkVersion = props.getString(HiveExternalCatalog.CREATED_SPARK_VERSION());
    }
    hiveSyncConfig.syncComment = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SYNC_COMMENT().key(), DataSourceWriteOptions.HIVE_SYNC_COMMENT().defaultValue()));
    return hiveSyncConfig;
}
Also used : ArrayList(java.util.ArrayList) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig)

Example 18 with HiveSyncConfig

use of org.apache.hudi.hive.HiveSyncConfig in project hudi by apache.

the class KafkaConnectTransactionServices method syncHive.

private void syncHive() {
    HiveSyncConfig hiveSyncConfig = KafkaConnectUtils.buildSyncConfig(new TypedProperties(connectConfigs.getProps()), tableBasePath);
    String url;
    if (!StringUtils.isNullOrEmpty(hiveSyncConfig.syncMode) && HiveSyncMode.of(hiveSyncConfig.syncMode) == HiveSyncMode.HMS) {
        url = hadoopConf.get(KafkaConnectConfigs.HIVE_METASTORE_URIS);
    } else {
        url = hiveSyncConfig.jdbcUrl;
    }
    LOG.info("Syncing target hoodie table with hive table(" + hiveSyncConfig.tableName + "). Hive URL :" + url + ", basePath :" + tableBasePath);
    LOG.info("Hive Sync Conf => " + hiveSyncConfig);
    FileSystem fs = FSUtils.getFs(tableBasePath, hadoopConf);
    HiveConf hiveConf = new HiveConf();
    hiveConf.addResource(fs.getConf());
    LOG.info("Hive Conf => " + hiveConf.getAllProperties().toString());
    new HiveSyncTool(hiveSyncConfig, hiveConf, fs).syncHoodieTable();
}
Also used : HiveSyncTool(org.apache.hudi.hive.HiveSyncTool) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 19 with HiveSyncConfig

use of org.apache.hudi.hive.HiveSyncConfig in project hudi by apache.

the class KafkaConnectUtils method buildSyncConfig.

/**
 * Build Hive Sync Config
 * Note: This method is a temporary solution.
 * Future solutions can be referred to: https://issues.apache.org/jira/browse/HUDI-3199
 */
public static HiveSyncConfig buildSyncConfig(TypedProperties props, String tableBasePath) {
    HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
    hiveSyncConfig.basePath = tableBasePath;
    hiveSyncConfig.usePreApacheInputFormat = props.getBoolean(KafkaConnectConfigs.HIVE_USE_PRE_APACHE_INPUT_FORMAT, false);
    hiveSyncConfig.databaseName = props.getString(KafkaConnectConfigs.HIVE_DATABASE, "default");
    hiveSyncConfig.tableName = props.getString(KafkaConnectConfigs.HIVE_TABLE, "");
    hiveSyncConfig.hiveUser = props.getString(KafkaConnectConfigs.HIVE_USER, "");
    hiveSyncConfig.hivePass = props.getString(KafkaConnectConfigs.HIVE_PASS, "");
    hiveSyncConfig.jdbcUrl = props.getString(KafkaConnectConfigs.HIVE_URL, "");
    hiveSyncConfig.partitionFields = props.getStringList(KafkaConnectConfigs.HIVE_PARTITION_FIELDS, ",", Collections.emptyList());
    hiveSyncConfig.partitionValueExtractorClass = props.getString(KafkaConnectConfigs.HIVE_PARTITION_EXTRACTOR_CLASS, SlashEncodedDayPartitionValueExtractor.class.getName());
    hiveSyncConfig.useJdbc = props.getBoolean(KafkaConnectConfigs.HIVE_USE_JDBC, true);
    if (props.containsKey(KafkaConnectConfigs.HIVE_SYNC_MODE)) {
        hiveSyncConfig.syncMode = props.getString(KafkaConnectConfigs.HIVE_SYNC_MODE);
    }
    hiveSyncConfig.autoCreateDatabase = props.getBoolean(KafkaConnectConfigs.HIVE_AUTO_CREATE_DATABASE, true);
    hiveSyncConfig.ignoreExceptions = props.getBoolean(KafkaConnectConfigs.HIVE_IGNORE_EXCEPTIONS, false);
    hiveSyncConfig.skipROSuffix = props.getBoolean(KafkaConnectConfigs.HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE, false);
    hiveSyncConfig.supportTimestamp = props.getBoolean(KafkaConnectConfigs.HIVE_SUPPORT_TIMESTAMP_TYPE, false);
    return hiveSyncConfig;
}
Also used : HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig)

Example 20 with HiveSyncConfig

use of org.apache.hudi.hive.HiveSyncConfig in project hudi by apache.

the class HiveSyncContext method create.

public static HiveSyncContext create(Configuration conf) {
    HiveSyncConfig syncConfig = buildSyncConfig(conf);
    org.apache.hadoop.conf.Configuration hadoopConf = StreamerUtil.getHadoopConf();
    String path = conf.getString(FlinkOptions.PATH);
    FileSystem fs = FSUtils.getFs(path, hadoopConf);
    HiveConf hiveConf = new HiveConf();
    if (!FlinkOptions.isDefaultValueDefined(conf, FlinkOptions.HIVE_SYNC_METASTORE_URIS)) {
        hadoopConf.set(HiveConf.ConfVars.METASTOREURIS.varname, conf.getString(FlinkOptions.HIVE_SYNC_METASTORE_URIS));
    }
    hiveConf.addResource(hadoopConf);
    return new HiveSyncContext(syncConfig, hiveConf, fs);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveSyncConfig(org.apache.hudi.hive.HiveSyncConfig)

Aggregations

HiveSyncConfig (org.apache.hudi.hive.HiveSyncConfig)20 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 HiveSyncTool (org.apache.hudi.hive.HiveSyncTool)3 HiveQueryDDLExecutor (org.apache.hudi.hive.ddl.HiveQueryDDLExecutor)3 FileSystem (org.apache.hadoop.fs.FileSystem)2 TypedProperties (org.apache.hudi.common.config.TypedProperties)2 HoodieHiveClient (org.apache.hudi.hive.HoodieHiveClient)2 Test (org.junit.jupiter.api.Test)2 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)2 Connection (java.sql.Connection)1 ResultSet (java.sql.ResultSet)1 Statement (java.sql.Statement)1 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 ZookeeperTestService (org.apache.hudi.common.testutils.minicluster.ZookeeperTestService)1 JDBCExecutor (org.apache.hudi.hive.ddl.JDBCExecutor)1 QueryBasedDDLExecutor (org.apache.hudi.hive.ddl.QueryBasedDDLExecutor)1 DummySchemaProvider (org.apache.hudi.utilities.DummySchemaProvider)1 HoodieDeltaStreamer (org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer)1