Search in sources :

Example 1 with HoodieCompactionConfig

use of org.apache.hudi.config.HoodieCompactionConfig in project hudi by apache.

the class UtilHelpers method createHoodieClient.

/**
 * Build Hoodie write client.
 *
 * @param jsc         Java Spark Context
 * @param basePath    Base Path
 * @param schemaStr   Schema
 * @param parallelism Parallelism
 */
public static SparkRDDWriteClient<HoodieRecordPayload> createHoodieClient(JavaSparkContext jsc, String basePath, String schemaStr, int parallelism, Option<String> compactionStrategyClass, TypedProperties properties) {
    HoodieCompactionConfig compactionConfig = compactionStrategyClass.map(strategy -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false).withCompactionStrategy(ReflectionUtils.loadClass(strategy)).build()).orElse(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).build());
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withParallelism(parallelism, parallelism).withBulkInsertParallelism(parallelism).withDeleteParallelism(parallelism).withSchema(schemaStr).combineInput(true, true).withCompactionConfig(compactionConfig).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).withProps(properties).build();
    return new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jsc), config);
}
Also used : Arrays(java.util.Arrays) SchemaProviderWithPostProcessor(org.apache.hudi.utilities.schema.SchemaProviderWithPostProcessor) Connection(java.sql.Connection) Enumeration(java.util.Enumeration) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieException(org.apache.hudi.exception.HoodieException) ByteBuffer(java.nio.ByteBuffer) Logger(org.apache.log4j.Logger) DFSPropertiesConfiguration(org.apache.hudi.common.config.DFSPropertiesConfiguration) HoodieSourcePostProcessException(org.apache.hudi.utilities.exception.HoodieSourcePostProcessException) Source(org.apache.hudi.utilities.sources.Source) ResultSet(java.sql.ResultSet) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Transformer(org.apache.hudi.utilities.transform.Transformer) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) DriverRegistry(org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry) JDBCOptions(org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions) InitialCheckPointProvider(org.apache.hudi.utilities.checkpointing.InitialCheckPointProvider) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) StructType(org.apache.spark.sql.types.StructType) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) SchemaProvider(org.apache.hudi.utilities.schema.SchemaProvider) Schema(org.apache.avro.Schema) SparkAvroPostProcessor(org.apache.hudi.utilities.schema.SparkAvroPostProcessor) JdbcUtils(org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils) Function1(org.apache.hudi.common.util.Functions.Function1) PreparedStatement(java.sql.PreparedStatement) HoodieIndex(org.apache.hudi.index.HoodieIndex) Objects(java.util.Objects) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) RowBasedSchemaProvider(org.apache.hudi.utilities.schema.RowBasedSchemaProvider) ReflectionUtils(org.apache.hudi.common.util.ReflectionUtils) SchemaPostProcessor(org.apache.hudi.utilities.schema.SchemaPostProcessor) JdbcDialects(org.apache.spark.sql.jdbc.JdbcDialects) JdbcDialect(org.apache.spark.sql.jdbc.JdbcDialect) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) AvroConversionUtils(org.apache.hudi.AvroConversionUtils) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieDeltaStreamerMetrics(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics) ArrayList(java.util.ArrayList) StringUtils(org.apache.hudi.common.util.StringUtils) SQLException(java.sql.SQLException) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) DelegatingSchemaProvider(org.apache.hudi.utilities.schema.DelegatingSchemaProvider) ChainedSchemaPostProcessor(org.apache.hudi.utilities.schema.ChainedSchemaPostProcessor) JsonKafkaSourcePostProcessor(org.apache.hudi.utilities.sources.processor.JsonKafkaSourcePostProcessor) JavaRDD(org.apache.spark.api.java.JavaRDD) DriverWrapper(org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper) SparkSession(org.apache.spark.sql.SparkSession) HoodieSchemaPostProcessException(org.apache.hudi.utilities.exception.HoodieSchemaPostProcessException) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) Properties(java.util.Properties) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) SparkLauncher(org.apache.spark.launcher.SparkLauncher) SparkConf(org.apache.spark.SparkConf) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) LongAccumulator(org.apache.spark.util.LongAccumulator) Config(org.apache.hudi.utilities.schema.SchemaPostProcessor.Config) ChainedJsonKafkaSourcePostProcessor(org.apache.hudi.utilities.sources.processor.ChainedJsonKafkaSourcePostProcessor) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) StringReader(java.io.StringReader) Driver(java.sql.Driver) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) ChainedTransformer(org.apache.hudi.utilities.transform.ChainedTransformer) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) DriverManager(java.sql.DriverManager) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig)

Aggregations

BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 StringReader (java.io.StringReader)1 ByteBuffer (java.nio.ByteBuffer)1 Connection (java.sql.Connection)1 Driver (java.sql.Driver)1 DriverManager (java.sql.DriverManager)1 PreparedStatement (java.sql.PreparedStatement)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 Enumeration (java.util.Enumeration)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Properties (java.util.Properties)1 Schema (org.apache.avro.Schema)1