Search in sources :

Example 51 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestJsonKafkaSourcePostProcessor method testSampleJsonKafkaSourcePostProcessor.

@Test
public void testSampleJsonKafkaSourcePostProcessor() {
    // topic setup.
    final String topic = TEST_TOPIC_PREFIX + "testSampleJsonKafkaSourcePostProcessor";
    testUtils.createTopic(topic, 2);
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    TypedProperties props = createPropsForJsonSource(topic, null, "earliest");
    // processor class name setup
    props.setProperty(JSON_KAFKA_PROCESSOR_CLASS_OPT.key(), SampleJsonKafkaSourcePostProcessor.class.getName());
    Source jsonSource = new JsonKafkaSource(props, jsc(), spark(), schemaProvider, metrics);
    SourceFormatAdapter kafkaSource = new SourceFormatAdapter(jsonSource);
    testUtils.sendMessages(topic, jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
    InputBatch<JavaRDD<GenericRecord>> fetch1 = kafkaSource.fetchNewDataInAvroFormat(Option.empty(), 900);
    assertNotEquals(900, fetch1.getBatch().get().count());
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) SourceFormatAdapter(org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter) JavaRDD(org.apache.spark.api.java.JavaRDD) Test(org.junit.jupiter.api.Test)

Example 52 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestParquetDFSSource method prepareDFSSource.

@Override
public Source prepareDFSSource() {
    TypedProperties props = new TypedProperties();
    props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
    return new ParquetDFSSource(props, jsc, sparkSession, schemaProvider);
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 53 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestAbstractDebeziumSource method createPropsForJsonSource.

private TypedProperties createPropsForJsonSource() {
    TypedProperties props = new TypedProperties();
    props.setProperty("hoodie.deltastreamer.source.kafka.topic", TEST_TOPIC_NAME);
    props.setProperty("bootstrap.servers", testUtils.brokerAddress());
    props.setProperty("auto.offset.reset", "earliest");
    props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
    props.setProperty("hoodie.deltastreamer.schemaprovider.registry.url", "localhost");
    props.setProperty("hoodie.deltastreamer.source.kafka.value.deserializer.class", StringDeserializer.class.getName());
    props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
    return props;
}
Also used : StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) TypedProperties(org.apache.hudi.common.config.TypedProperties)

Example 54 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestSqlFileBasedTransformer method setup.

@Override
@BeforeEach
public void setup() throws Exception {
    super.setup();
    props = new TypedProperties();
    sqlFileTransformer = new SqlFileBasedTransformer();
    inputDatasetRows = getInputDatasetRows();
    emptyDatasetRow = getEmptyDatasetRow();
}
Also used : TypedProperties(org.apache.hudi.common.config.TypedProperties) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 55 with TypedProperties

use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.

the class TestSqlQueryBasedTransformer method testSqlQuery.

@Test
public void testSqlQuery() {
    SparkSession spark = SparkSession.builder().master("local[2]").appName(TestSqlQueryBasedTransformer.class.getName()).getOrCreate();
    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
    // prepare test data
    String testData = "{\n" + "  \"ts\": 1622126968000,\n" + "  \"uuid\": \"c978e157-72ee-4819-8f04-8e46e1bb357a\",\n" + "  \"rider\": \"rider-213\",\n" + "  \"driver\": \"driver-213\",\n" + "  \"begin_lat\": 0.4726905879569653,\n" + "  \"begin_lon\": 0.46157858450465483,\n" + "  \"end_lat\": 0.754803407008858,\n" + "  \"end_lon\": 0.9671159942018241,\n" + "  \"fare\": 34.158284716382845,\n" + "  \"partitionpath\": \"americas/brazil/sao_paulo\"\n" + "}";
    JavaRDD<String> testRdd = jsc.parallelize(Collections.singletonList(testData), 2);
    Dataset<Row> ds = spark.read().json(testRdd);
    // create a new column dt, whose value is transformed from ts, format is yyyyMMdd
    String transSql = "select\n" + "\tuuid,\n" + "\tbegin_lat,\n" + "\tbegin_lon,\n" + "\tdriver,\n" + "\tend_lat,\n" + "\tend_lon,\n" + "\tfare,\n" + "\tpartitionpath,\n" + "\trider,\n" + "\tts,\n" + "\tFROM_UNIXTIME(ts / 1000, 'yyyyMMdd') as dt\n" + "from\n" + "\t<SRC>";
    TypedProperties props = new TypedProperties();
    props.put("hoodie.deltastreamer.transformer.sql", transSql);
    // transform
    SqlQueryBasedTransformer transformer = new SqlQueryBasedTransformer();
    Dataset<Row> result = transformer.apply(jsc, spark, ds, props);
    // check result
    assertEquals(11, result.columns().length);
    assertNotNull(result.col("dt"));
    assertEquals("20210527", result.first().get(10).toString());
    spark.close();
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Row(org.apache.spark.sql.Row) TypedProperties(org.apache.hudi.common.config.TypedProperties) Test(org.junit.jupiter.api.Test)

Aggregations

TypedProperties (org.apache.hudi.common.config.TypedProperties)143 Test (org.junit.jupiter.api.Test)47 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)22 JavaRDD (org.apache.spark.api.java.JavaRDD)16 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)15 IOException (java.io.IOException)14 Path (org.apache.hadoop.fs.Path)14 Properties (java.util.Properties)13 GenericRecord (org.apache.avro.generic.GenericRecord)13 SourceFormatAdapter (org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter)12 Row (org.apache.spark.sql.Row)12 BeforeEach (org.junit.jupiter.api.BeforeEach)11 ArrayList (java.util.ArrayList)10 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)10 HoodieKey (org.apache.hudi.common.model.HoodieKey)9 DFSPropertiesConfiguration (org.apache.hudi.common.config.DFSPropertiesConfiguration)8 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)8 Dataset (org.apache.spark.sql.Dataset)8 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)7