Search in sources :

Example 6 with Producer

use of kafka.javaapi.producer.Producer in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushRandomAvroIntoKafka.

public static void pushRandomAvroIntoKafka(File avroFile, String kafkaBroker, String kafkaTopic, int rowCount, Random random) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<String, byte[]> producer = new Producer<String, byte[]>(producerConfig);
    try {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
        DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
        BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
        Schema avroSchema = reader.getSchema();
        GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema);
        int recordCount = 0;
        int rowsRemaining = rowCount;
        int messagesInThisBatch = 0;
        while (rowsRemaining > 0) {
            int rowsInThisBatch = Math.min(rowsRemaining, MAX_MESSAGES_PER_BATCH);
            List<KeyedMessage<String, byte[]>> messagesToWrite = new ArrayList<KeyedMessage<String, byte[]>>(rowsInThisBatch);
            GenericRecord genericRecord = new GenericData.Record(avroSchema);
            for (int i = 0; i < rowsInThisBatch; ++i) {
                generateRandomRecord(genericRecord, avroSchema, random);
                outputStream.reset();
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<String, byte[]> data = new KeyedMessage<String, byte[]>(kafkaTopic, bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                        Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                producer.send(messagesToWrite);
            }
            //        System.out.println("rowsRemaining = " + rowsRemaining);
            rowsRemaining -= rowsInThisBatch;
        }
        outputStream.close();
        reader.close();
        LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic);
        int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
        LOGGER.info("Total records written so far " + totalRecordCount);
    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage)

Example 7 with Producer

use of kafka.javaapi.producer.Producer in project avro-kafka-storm by ransilberman.

the class MainTest method testGenericRecord.

@Test
public void testGenericRecord() throws IOException, InterruptedException {
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
    GenericRecord datum = new GenericData.Record(schema);
    datum.put("revision", 1L);
    datum.put("siteId", "28280110");
    datum.put("eventType", "PLine");
    datum.put("timeStamp", System.currentTimeMillis());
    datum.put("sessionId", "123456II");
    Map<String, Schema> unions = new HashMap<String, Schema>();
    List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
    for (Schema sch : typeList) {
        unions.put(sch.getName(), sch);
    }
    GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
    plineDatum.put("text", "How can I help you?");
    plineDatum.put("lineType", 1);
    plineDatum.put("repId", "REPID12345");
    datum.put("subrecord", plineDatum);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    writer.write(datum, encoder);
    encoder.flush();
    out.close();
    Message message = new Message(out.toByteArray());
    Properties props = new Properties();
    props.put("zk.connect", zkConnection);
    Producer<Message, Message> producer = new kafka.javaapi.producer.Producer<Message, Message>(new ProducerConfig(props));
    producer.send(new ProducerData<Message, Message>(topic, message));
}
Also used : Message(kafka.message.Message) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) Producer(kafka.javaapi.producer.Producer) Encoder(org.apache.avro.io.Encoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

Properties (java.util.Properties)7 Producer (kafka.javaapi.producer.Producer)7 ProducerConfig (kafka.producer.ProducerConfig)7 IOException (java.io.IOException)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 GenericRecord (org.apache.avro.generic.GenericRecord)4 KeyedMessage (kafka.producer.KeyedMessage)3 Schema (org.apache.avro.Schema)3 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)3 Test (org.junit.Test)3 Closeable (java.io.Closeable)2 File (java.io.File)2 SQLException (java.sql.SQLException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Message (kafka.message.Message)2 BinaryEncoder (org.apache.avro.io.BinaryEncoder)2 Encoder (org.apache.avro.io.Encoder)2 EncoderFactory (org.apache.avro.io.EncoderFactory)2 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)2