Search in sources :

Example 1 with AvroData

use of io.confluent.connect.avro.AvroData in project kafka-connect-storage-cloud by confluentinc.

the class S3SinkTaskTest method calcByteSize.

private int calcByteSize(List<SinkRecord> sinkRecords) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>());
    AvroData avroData = new AvroData(1);
    boolean writerInit = false;
    for (SinkRecord sinkRecord : sinkRecords) {
        if (!writerInit) {
            writer.create(avroData.fromConnectSchema(sinkRecord.valueSchema()), baos);
            writerInit = true;
        }
        writer.append(avroData.fromConnectData(sinkRecord.valueSchema(), sinkRecord.value()));
    }
    return baos.size();
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) AvroData(io.confluent.connect.avro.AvroData) SinkRecord(org.apache.kafka.connect.sink.SinkRecord)

Example 2 with AvroData

use of io.confluent.connect.avro.AvroData in project ksql by confluentinc.

the class DataGenProducer method populateTopic.

public void populateTopic(Properties props, Generator generator, String kafkaTopicName, String key, int messageCount, long maxInterval) {
    if (maxInterval < 0) {
        maxInterval = INTER_MESSAGE_MAX_INTERVAL;
    }
    Schema avroSchema = generator.schema();
    org.apache.kafka.connect.data.Schema kafkaSchema = new AvroData(1).toConnectSchema(avroSchema);
    Serializer<GenericRow> serializer = getSerializer(avroSchema, kafkaSchema, kafkaTopicName);
    final KafkaProducer<String, GenericRow> producer = new KafkaProducer<>(props, new StringSerializer(), serializer);
    SessionManager sessionManager = new SessionManager();
    for (int i = 0; i < messageCount; i++) {
        Object generatedObject = generator.generate();
        if (!(generatedObject instanceof GenericRecord)) {
            throw new RuntimeException(String.format("Expected Avro Random Generator to return instance of GenericRecord, found %s instead", generatedObject.getClass().getName()));
        }
        GenericRecord randomAvroMessage = (GenericRecord) generatedObject;
        List<Object> genericRowValues = new ArrayList<>();
        SimpleDateFormat timeformatter = null;
        /**
         * Populate the record entries
         */
        String sessionisationValue = null;
        for (Schema.Field field : avroSchema.getFields()) {
            boolean isSession = field.schema().getProp("session") != null;
            boolean isSessionSiblingIntHash = field.schema().getProp("session-sibling-int-hash") != null;
            String timeFormatFromLong = field.schema().getProp("format_as_time");
            if (isSession) {
                String currentValue = (String) randomAvroMessage.get(field.name());
                String newCurrentValue = handleSessionisationOfValue(sessionManager, currentValue);
                sessionisationValue = newCurrentValue;
                genericRowValues.add(newCurrentValue);
            } else if (isSessionSiblingIntHash && sessionisationValue != null) {
                // super cheeky hack to link int-ids to session-values - if anything fails then we use
                // the 'avro-gen' randomised version
                handleSessionSiblingField(randomAvroMessage, genericRowValues, sessionisationValue, field);
            } else if (timeFormatFromLong != null) {
                Date date = new Date(System.currentTimeMillis());
                if (timeFormatFromLong.equals("unix_long")) {
                    genericRowValues.add(date.getTime());
                } else {
                    if (timeformatter == null) {
                        timeformatter = new SimpleDateFormat(timeFormatFromLong);
                    }
                    genericRowValues.add(timeformatter.format(date));
                }
            } else {
                genericRowValues.add(randomAvroMessage.get(field.name()));
            }
        }
        GenericRow genericRow = new GenericRow(genericRowValues);
        String keyString = randomAvroMessage.get(key).toString();
        ProducerRecord<String, GenericRow> producerRecord = new ProducerRecord<>(kafkaTopicName, keyString, genericRow);
        producer.send(producerRecord);
        System.err.println(keyString + " --> (" + genericRow + ")");
        try {
            Thread.sleep((long) (maxInterval * Math.random()));
        } catch (InterruptedException e) {
        // Ignore the exception.
        }
    }
    producer.flush();
    producer.close();
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) AvroData(io.confluent.connect.avro.AvroData) GenericRow(io.confluent.ksql.GenericRow) GenericRecord(org.apache.avro.generic.GenericRecord) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Date(java.util.Date) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) SimpleDateFormat(java.text.SimpleDateFormat)

Aggregations

AvroData (io.confluent.connect.avro.AvroData)2 GenericRow (io.confluent.ksql.GenericRow)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 SimpleDateFormat (java.text.SimpleDateFormat)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 Schema (org.apache.avro.Schema)1 DataFileWriter (org.apache.avro.file.DataFileWriter)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 KafkaProducer (org.apache.kafka.clients.producer.KafkaProducer)1 ProducerRecord (org.apache.kafka.clients.producer.ProducerRecord)1 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)1 SinkRecord (org.apache.kafka.connect.sink.SinkRecord)1