Search in sources :

Example 1 with SchemaProvider

use of org.apache.hudi.schema.SchemaProvider in project hudi by apache.

the class TestAbstractConnectWriter method testAbstractWriterForAllFormats.

@ParameterizedTest
@EnumSource(value = TestInputFormats.class)
public void testAbstractWriterForAllFormats(TestInputFormats inputFormats) throws Exception {
    Schema schema = schemaProvider.getSourceSchema();
    List<?> inputRecords;
    List<HoodieRecord> expectedRecords;
    String formatConverter;
    switch(inputFormats) {
        case JSON_STRING:
            formatConverter = AbstractConnectWriter.KAFKA_STRING_CONVERTER;
            GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(schema, schema);
            inputRecords = SchemaTestUtil.generateTestJsonRecords(0, NUM_RECORDS);
            expectedRecords = ((List<String>) inputRecords).stream().map(s -> {
                try {
                    return HoodieAvroUtils.rewriteRecord((GenericRecord) reader.read(null, DecoderFactory.get().jsonDecoder(schema, s)), schema);
                } catch (IOException exception) {
                    throw new HoodieException("Error converting JSON records to AVRO");
                }
            }).map(p -> convertToHoodieRecords(p, p.get(RECORD_KEY_INDEX).toString(), "000/00/00")).collect(Collectors.toList());
            break;
        case AVRO:
            formatConverter = AbstractConnectWriter.KAFKA_AVRO_CONVERTER;
            inputRecords = SchemaTestUtil.generateTestRecords(0, NUM_RECORDS);
            expectedRecords = inputRecords.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, schema)).map(p -> convertToHoodieRecords(p, p.get(RECORD_KEY_INDEX).toString(), "000/00/00")).collect(Collectors.toList());
            break;
        default:
            throw new HoodieException("Unknown test scenario " + inputFormats);
    }
    configs = KafkaConnectConfigs.newBuilder().withProperties(Collections.singletonMap(KafkaConnectConfigs.KAFKA_VALUE_CONVERTER, formatConverter)).build();
    AbstractHudiConnectWriterTestWrapper writer = new AbstractHudiConnectWriterTestWrapper(configs, keyGenerator, schemaProvider);
    for (int i = 0; i < NUM_RECORDS; i++) {
        writer.writeRecord(getNextKafkaRecord(inputRecords.get(i)));
    }
    validateRecords(writer.getWrittenRecords(), expectedRecords);
}
Also used : HoodieAvroPayload(org.apache.hudi.common.model.HoodieAvroPayload) BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) AbstractConnectWriter(org.apache.hudi.connect.writers.AbstractConnectWriter) ArrayList(java.util.ArrayList) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) SchemaTestUtil(org.apache.hudi.common.testutils.SchemaTestUtil) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) TypedProperties(org.apache.hudi.common.config.TypedProperties) Iterator(java.util.Iterator) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) List(java.util.List) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) SchemaProvider(org.apache.hudi.schema.SchemaProvider) Comparator(java.util.Comparator) Collections(java.util.Collections) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) DecoderFactory(org.apache.avro.io.DecoderFactory) KafkaConnectConfigs(org.apache.hudi.connect.writers.KafkaConnectConfigs) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 Schema (org.apache.avro.Schema)1 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 DecoderFactory (org.apache.avro.io.DecoderFactory)1 HoodieAvroUtils (org.apache.hudi.avro.HoodieAvroUtils)1 WriteStatus (org.apache.hudi.client.WriteStatus)1 TypedProperties (org.apache.hudi.common.config.TypedProperties)1 HoodieAvroPayload (org.apache.hudi.common.model.HoodieAvroPayload)1 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)1 HoodieKey (org.apache.hudi.common.model.HoodieKey)1 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)1 SchemaTestUtil (org.apache.hudi.common.testutils.SchemaTestUtil)1