Search in sources :

Example 1 with KeyedMessage

use of kafka.producer.KeyedMessage in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushAvroIntoKafka.

public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
    for (File avroFile : avroFiles) {
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
            DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
            BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
            GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
            int recordCount = 0;
            List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
            int messagesInThisBatch = 0;
            for (GenericRecord genericRecord : reader) {
                outputStream.reset();
                if (header != null && 0 < header.length) {
                    outputStream.write(header);
                }
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
                producer.send(messagesToWrite);
            }
            outputStream.close();
            reader.close();
            LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
            int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
            LOGGER.info("Total records written so far " + totalRecordCount);
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage) File(java.io.File)

Example 2 with KeyedMessage

use of kafka.producer.KeyedMessage in project jdepth by Crab2died.

the class Kafka method main.

public static void main(String... args) {
    String brokerList = "xjtz234:9091,xjtz234:9092,xjtz234:9093";
    Properties props = new Properties();
    props.put("metadata.broker.list", brokerList);
    /*
         * 0表示不等待结果返回<br/>
         * 1表示等待至少有一个服务器返回数据接收标识<br/>
         * -1表示必须接收到所有的服务器返回标识,及同步写入<br/>
         */
    props.put("request.required.acks", "1");
    /*
         * 内部发送数据是异步还是同步
         * sync:同步, 默认
         * async:异步
         */
    props.put("producer.type", "async");
    /*
         * 设置序列化的类
         * 可选:kafka.serializer.StringEncoder
         * 默认:kafka.serializer.DefaultEncoder
         */
    props.put("serializer.class", "kafka.serializer.StringEncoder");
    /*
         * 设置分区类
         * 根据key进行数据分区
         * 默认是:kafka.producer.DefaultPartitioner ==> 安装key的hash进行分区
         * 可选:kafka.serializer.ByteArrayPartitioner ==> 转换为字节数组后进行hash分区
         */
    // props.put("partitioner.class", "com.github.jms.kafka.KafkaProducerPartitioner");
    // 重试次数
    props.put("message.send.max.retries", "3");
    // 异步提交的时候(async),并发提交的记录数
    props.put("batch.num.messages", "200");
    // 设置缓冲区大小,默认10KB
    props.put("send.buffer.bytes", "102400");
    // 2. 构建Kafka Producer Configuration上下文
    ProducerConfig config = new ProducerConfig(props);
    // 3. 构建Producer对象
    final Producer<String, String> producer = new Producer<>(config);
    int numThreads = 10;
    ExecutorService pool = Executors.newFixedThreadPool(numThreads);
    CountDownLatch latch = new CountDownLatch(5);
    for (int i = 0; i < 5; i++) {
        int finalI = i;
        pool.submit(new Thread(() -> {
            // 发送数据
            KeyedMessage message = new KeyedMessage("topic1", "key1" + finalI, "v1");
            producer.send(message);
            System.out.println("发送数据:" + message);
            latch.countDown();
        }, "Thread-" + i));
    }
    try {
        latch.await();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
    producer.close();
}
Also used : Producer(kafka.javaapi.producer.Producer) ExecutorService(java.util.concurrent.ExecutorService) ProducerConfig(kafka.producer.ProducerConfig) Properties(java.util.Properties) CountDownLatch(java.util.concurrent.CountDownLatch) KeyedMessage(kafka.producer.KeyedMessage)

Example 3 with KeyedMessage

use of kafka.producer.KeyedMessage in project apex-malhar by apache.

the class ApplicationTest method writeToTopic.

private void writeToTopic() {
    KafkaUnit ku = kafkaUnitRule.getKafkaUnit();
    ku.createTopic(TOPIC);
    for (String line : lines) {
        KeyedMessage<String, String> kMsg = new KeyedMessage<>(TOPIC, line);
        ku.sendMessages(kMsg);
    }
    LOG.debug("Sent messages to topic {}", TOPIC);
}
Also used : KafkaUnit(info.batey.kafka.unit.KafkaUnit) KeyedMessage(kafka.producer.KeyedMessage)

Example 4 with KeyedMessage

use of kafka.producer.KeyedMessage in project pinot by linkedin.

the class MeetupRsvpStream method run.

public void run() {
    try {
        final ClientEndpointConfig cec = ClientEndpointConfig.Builder.create().build();
        final KafkaJSONMessageDecoder decoder = new KafkaJSONMessageDecoder();
        decoder.init(null, schema, null);
        client = ClientManager.createClient();
        client.connectToServer(new Endpoint() {

            @Override
            public void onOpen(Session session, EndpointConfig config) {
                try {
                    session.addMessageHandler(new MessageHandler.Whole<String>() {

                        @Override
                        public void onMessage(String message) {
                            try {
                                JSONObject messageJSON = new JSONObject(message);
                                JSONObject extracted = new JSONObject();
                                if (messageJSON.has("venue")) {
                                    JSONObject venue = messageJSON.getJSONObject("venue");
                                    extracted.put("venue_name", venue.getString("venue_name"));
                                }
                                if (messageJSON.has("event")) {
                                    JSONObject event = messageJSON.getJSONObject("event");
                                    extracted.put("event_name", event.getString("event_name"));
                                    extracted.put("event_id", event.getString("event_id"));
                                    extracted.put("event_time", event.getLong("time"));
                                }
                                if (messageJSON.has("group")) {
                                    JSONObject group = messageJSON.getJSONObject("group");
                                    extracted.put("group_city", group.getString("group_city"));
                                    extracted.put("group_country", group.getString("group_country"));
                                    extracted.put("group_id", group.getLong("group_id"));
                                    extracted.put("group_name", group.getString("group_name"));
                                }
                                extracted.put("mtime", messageJSON.getLong("mtime"));
                                extracted.put("rsvp_count", 1);
                                if (keepPublishing) {
                                    KeyedMessage<String, byte[]> data = new KeyedMessage<String, byte[]>("meetupRSVPEvents", extracted.toString().getBytes("UTF-8"));
                                    producer.send(data);
                                }
                            } catch (Exception e) {
                            //LOGGER.error("error processing raw event ", e);
                            }
                        }
                    });
                    session.getBasicRemote().sendText("");
                } catch (IOException e) {
                //LOGGER.error("found an event where data did not have all the fields, don't care about for quickstart");
                }
            }
        }, cec, new URI("ws://stream.meetup.com/2/rsvps"));
    } catch (Exception e) {
    //e.printStackTrace();
    }
}
Also used : KafkaJSONMessageDecoder(com.linkedin.pinot.core.realtime.impl.kafka.KafkaJSONMessageDecoder) IOException(java.io.IOException) URI(java.net.URI) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) Endpoint(javax.websocket.Endpoint) JSONObject(org.json.JSONObject) ClientEndpointConfig(javax.websocket.ClientEndpointConfig) KeyedMessage(kafka.producer.KeyedMessage) EndpointConfig(javax.websocket.EndpointConfig) ClientEndpointConfig(javax.websocket.ClientEndpointConfig) Session(javax.websocket.Session)

Example 5 with KeyedMessage

use of kafka.producer.KeyedMessage in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushRandomAvroIntoKafka.

public static void pushRandomAvroIntoKafka(File avroFile, String kafkaBroker, String kafkaTopic, int rowCount, Random random) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<String, byte[]> producer = new Producer<String, byte[]>(producerConfig);
    try {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
        DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
        BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
        Schema avroSchema = reader.getSchema();
        GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema);
        int recordCount = 0;
        int rowsRemaining = rowCount;
        int messagesInThisBatch = 0;
        while (rowsRemaining > 0) {
            int rowsInThisBatch = Math.min(rowsRemaining, MAX_MESSAGES_PER_BATCH);
            List<KeyedMessage<String, byte[]>> messagesToWrite = new ArrayList<KeyedMessage<String, byte[]>>(rowsInThisBatch);
            GenericRecord genericRecord = new GenericData.Record(avroSchema);
            for (int i = 0; i < rowsInThisBatch; ++i) {
                generateRandomRecord(genericRecord, avroSchema, random);
                outputStream.reset();
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<String, byte[]> data = new KeyedMessage<String, byte[]>(kafkaTopic, bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                        Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                producer.send(messagesToWrite);
            }
            //        System.out.println("rowsRemaining = " + rowsRemaining);
            rowsRemaining -= rowsInThisBatch;
        }
        outputStream.close();
        reader.close();
        LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic);
        int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
        LOGGER.info("Total records written so far " + totalRecordCount);
    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage)

Aggregations

KeyedMessage (kafka.producer.KeyedMessage)8 Properties (java.util.Properties)5 IOException (java.io.IOException)4 Producer (kafka.javaapi.producer.Producer)4 ProducerConfig (kafka.producer.ProducerConfig)4 GenericRecord (org.apache.avro.generic.GenericRecord)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 File (java.io.File)2 SQLException (java.sql.SQLException)2 ArrayList (java.util.ArrayList)2 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)2 BinaryEncoder (org.apache.avro.io.BinaryEncoder)2 EncoderFactory (org.apache.avro.io.EncoderFactory)2 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)2 JSONException (org.json.JSONException)2 ImmutableList (com.google.common.collect.ImmutableList)1 KafkaJSONMessageDecoder (com.linkedin.pinot.core.realtime.impl.kafka.KafkaJSONMessageDecoder)1 KafkaUnit (info.batey.kafka.unit.KafkaUnit)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1