use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestJsonKafkaSourcePostProcessor method testSampleJsonKafkaSourcePostProcessor.
@Test
public void testSampleJsonKafkaSourcePostProcessor() {
// topic setup.
final String topic = TEST_TOPIC_PREFIX + "testSampleJsonKafkaSourcePostProcessor";
testUtils.createTopic(topic, 2);
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
TypedProperties props = createPropsForJsonSource(topic, null, "earliest");
// processor class name setup
props.setProperty(JSON_KAFKA_PROCESSOR_CLASS_OPT.key(), SampleJsonKafkaSourcePostProcessor.class.getName());
Source jsonSource = new JsonKafkaSource(props, jsc(), spark(), schemaProvider, metrics);
SourceFormatAdapter kafkaSource = new SourceFormatAdapter(jsonSource);
testUtils.sendMessages(topic, jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
InputBatch<JavaRDD<GenericRecord>> fetch1 = kafkaSource.fetchNewDataInAvroFormat(Option.empty(), 900);
assertNotEquals(900, fetch1.getBatch().get().count());
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestParquetDFSSource method prepareDFSSource.
@Override
public Source prepareDFSSource() {
TypedProperties props = new TypedProperties();
props.setProperty("hoodie.deltastreamer.source.dfs.root", dfsRoot);
return new ParquetDFSSource(props, jsc, sparkSession, schemaProvider);
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestAbstractDebeziumSource method createPropsForJsonSource.
private TypedProperties createPropsForJsonSource() {
TypedProperties props = new TypedProperties();
props.setProperty("hoodie.deltastreamer.source.kafka.topic", TEST_TOPIC_NAME);
props.setProperty("bootstrap.servers", testUtils.brokerAddress());
props.setProperty("auto.offset.reset", "earliest");
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
props.setProperty("hoodie.deltastreamer.schemaprovider.registry.url", "localhost");
props.setProperty("hoodie.deltastreamer.source.kafka.value.deserializer.class", StringDeserializer.class.getName());
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
return props;
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestSqlFileBasedTransformer method setup.
@Override
@BeforeEach
public void setup() throws Exception {
super.setup();
props = new TypedProperties();
sqlFileTransformer = new SqlFileBasedTransformer();
inputDatasetRows = getInputDatasetRows();
emptyDatasetRow = getEmptyDatasetRow();
}
use of org.apache.hudi.common.config.TypedProperties in project hudi by apache.
the class TestSqlQueryBasedTransformer method testSqlQuery.
@Test
public void testSqlQuery() {
SparkSession spark = SparkSession.builder().master("local[2]").appName(TestSqlQueryBasedTransformer.class.getName()).getOrCreate();
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
// prepare test data
String testData = "{\n" + " \"ts\": 1622126968000,\n" + " \"uuid\": \"c978e157-72ee-4819-8f04-8e46e1bb357a\",\n" + " \"rider\": \"rider-213\",\n" + " \"driver\": \"driver-213\",\n" + " \"begin_lat\": 0.4726905879569653,\n" + " \"begin_lon\": 0.46157858450465483,\n" + " \"end_lat\": 0.754803407008858,\n" + " \"end_lon\": 0.9671159942018241,\n" + " \"fare\": 34.158284716382845,\n" + " \"partitionpath\": \"americas/brazil/sao_paulo\"\n" + "}";
JavaRDD<String> testRdd = jsc.parallelize(Collections.singletonList(testData), 2);
Dataset<Row> ds = spark.read().json(testRdd);
// create a new column dt, whose value is transformed from ts, format is yyyyMMdd
String transSql = "select\n" + "\tuuid,\n" + "\tbegin_lat,\n" + "\tbegin_lon,\n" + "\tdriver,\n" + "\tend_lat,\n" + "\tend_lon,\n" + "\tfare,\n" + "\tpartitionpath,\n" + "\trider,\n" + "\tts,\n" + "\tFROM_UNIXTIME(ts / 1000, 'yyyyMMdd') as dt\n" + "from\n" + "\t<SRC>";
TypedProperties props = new TypedProperties();
props.put("hoodie.deltastreamer.transformer.sql", transSql);
// transform
SqlQueryBasedTransformer transformer = new SqlQueryBasedTransformer();
Dataset<Row> result = transformer.apply(jsc, spark, ds, props);
// check result
assertEquals(11, result.columns().length);
assertNotNull(result.col("dt"));
assertEquals("20210527", result.first().get(10).toString());
spark.close();
}
Aggregations