use of org.apache.hudi.utilities.sources.helpers.AvroConvertor in project hudi by apache.
the class AbstractConnectWriter method writeRecord.
@Override
public void writeRecord(SinkRecord record) throws IOException {
AvroConvertor convertor = new AvroConvertor(schemaProvider.getSourceSchema());
Option<GenericRecord> avroRecord;
switch(connectConfigs.getKafkaValueConverter()) {
case KAFKA_AVRO_CONVERTER:
avroRecord = Option.of((GenericRecord) record.value());
break;
case KAFKA_STRING_CONVERTER:
avroRecord = Option.of(convertor.fromJson((String) record.value()));
break;
case KAFKA_JSON_CONVERTER:
throw new UnsupportedEncodingException("Currently JSON objects are not supported");
default:
throw new IOException("Unsupported Kafka Format type (" + connectConfigs.getKafkaValueConverter() + ")");
}
// Tag records with a file ID based on kafka partition and hudi partition.
HoodieRecord<?> hoodieRecord = new HoodieAvroRecord<>(keyGenerator.getKey(avroRecord.get()), new HoodieAvroPayload(avroRecord));
String fileId = KafkaConnectUtils.hashDigest(String.format("%s-%s", record.kafkaPartition(), hoodieRecord.getPartitionPath()));
hoodieRecord.unseal();
hoodieRecord.setCurrentLocation(new HoodieRecordLocation(instantTime, fileId));
hoodieRecord.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
hoodieRecord.seal();
writeHudiRecord(hoodieRecord);
}
use of org.apache.hudi.utilities.sources.helpers.AvroConvertor in project hudi by apache.
the class DebeziumSource method toDataset.
/**
* Converts a Kafka Topic offset into a Spark dataset.
*
* @param offsetRanges Offset ranges
* @param offsetGen KafkaOffsetGen
* @return Spark dataset
*/
private Dataset<Row> toDataset(OffsetRange[] offsetRanges, KafkaOffsetGen offsetGen, String schemaStr) {
AvroConvertor convertor = new AvroConvertor(schemaStr);
Dataset<Row> kafkaData;
if (deserializerClassName.equals(StringDeserializer.class.getName())) {
kafkaData = AvroConversionUtils.createDataFrame(KafkaUtils.<String, String>createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent()).map(obj -> convertor.fromJson(obj.value())).rdd(), schemaStr, sparkSession);
} else {
kafkaData = AvroConversionUtils.createDataFrame(KafkaUtils.createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges, LocationStrategies.PreferConsistent()).map(obj -> (GenericRecord) obj.value()).rdd(), schemaStr, sparkSession);
}
// Flatten debezium payload, specific to each DB type (postgres/ mysql/ etc..)
Dataset<Row> debeziumDataset = processDataset(kafkaData);
// Some required transformations to ensure debezium data types are converted to spark supported types.
return convertArrayColumnsToString(convertColumnToNullable(sparkSession, convertDateColumns(debeziumDataset, new Schema.Parser().parse(schemaStr))));
}
Aggregations