Search in sources :

Example 1 with EncoderFactory

use of org.apache.avro.io.EncoderFactory in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushAvroIntoKafka.

public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
    for (File avroFile : avroFiles) {
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
            DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
            BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
            GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
            int recordCount = 0;
            List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
            int messagesInThisBatch = 0;
            for (GenericRecord genericRecord : reader) {
                outputStream.reset();
                if (header != null && 0 < header.length) {
                    outputStream.write(header);
                }
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
                producer.send(messagesToWrite);
            }
            outputStream.close();
            reader.close();
            LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
            int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
            LOGGER.info("Total records written so far " + totalRecordCount);
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage) File(java.io.File)

Example 2 with EncoderFactory

use of org.apache.avro.io.EncoderFactory in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushRandomAvroIntoKafka.

public static void pushRandomAvroIntoKafka(File avroFile, String kafkaBroker, String kafkaTopic, int rowCount, Random random) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<String, byte[]> producer = new Producer<String, byte[]>(producerConfig);
    try {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
        DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
        BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
        Schema avroSchema = reader.getSchema();
        GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema);
        int recordCount = 0;
        int rowsRemaining = rowCount;
        int messagesInThisBatch = 0;
        while (rowsRemaining > 0) {
            int rowsInThisBatch = Math.min(rowsRemaining, MAX_MESSAGES_PER_BATCH);
            List<KeyedMessage<String, byte[]>> messagesToWrite = new ArrayList<KeyedMessage<String, byte[]>>(rowsInThisBatch);
            GenericRecord genericRecord = new GenericData.Record(avroSchema);
            for (int i = 0; i < rowsInThisBatch; ++i) {
                generateRandomRecord(genericRecord, avroSchema, random);
                outputStream.reset();
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<String, byte[]> data = new KeyedMessage<String, byte[]>(kafkaTopic, bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                        Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                producer.send(messagesToWrite);
            }
            //        System.out.println("rowsRemaining = " + rowsRemaining);
            rowsRemaining -= rowsInThisBatch;
        }
        outputStream.close();
        reader.close();
        LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic);
        int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
        LOGGER.info("Total records written so far " + totalRecordCount);
    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage)

Aggregations

ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 IOException (java.io.IOException)2 SQLException (java.sql.SQLException)2 ArrayList (java.util.ArrayList)2 Properties (java.util.Properties)2 Producer (kafka.javaapi.producer.Producer)2 KeyedMessage (kafka.producer.KeyedMessage)2 ProducerConfig (kafka.producer.ProducerConfig)2 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 BinaryEncoder (org.apache.avro.io.BinaryEncoder)2 EncoderFactory (org.apache.avro.io.EncoderFactory)2 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)2 JSONException (org.json.JSONException)2 File (java.io.File)1 Schema (org.apache.avro.Schema)1