Search in sources :

Example 1 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project druid by druid-io.

the class KafkaLookupExtractorFactory method start.

@Override
public boolean start() {
    synchronized (started) {
        if (started.get()) {
            LOG.warn("Already started, not starting again");
            return started.get();
        }
        if (executorService.isShutdown()) {
            LOG.warn("Already shut down, not starting again");
            return false;
        }
        final Properties kafkaProperties = new Properties();
        kafkaProperties.putAll(getKafkaProperties());
        if (kafkaProperties.containsKey("group.id")) {
            throw new IAE("Cannot set kafka property [group.id]. Property is randomly generated for you. Found [%s]", kafkaProperties.getProperty("group.id"));
        }
        if (kafkaProperties.containsKey("auto.offset.reset")) {
            throw new IAE("Cannot set kafka property [auto.offset.reset]. Property will be forced to [smallest]. Found [%s]", kafkaProperties.getProperty("auto.offset.reset"));
        }
        Preconditions.checkNotNull(kafkaProperties.getProperty("zookeeper.connect"), "zookeeper.connect required property");
        kafkaProperties.setProperty("group.id", factoryId);
        final String topic = getKafkaTopic();
        LOG.debug("About to listen to topic [%s] with group.id [%s]", topic, factoryId);
        cacheHandler = cacheManager.createCache();
        final Map<String, String> map = cacheHandler.getCache();
        mapRef.set(map);
        // Enable publish-subscribe
        kafkaProperties.setProperty("auto.offset.reset", "smallest");
        final CountDownLatch startingReads = new CountDownLatch(1);
        final ListenableFuture<?> future = executorService.submit(new Runnable() {

            @Override
            public void run() {
                while (!executorService.isShutdown()) {
                    consumerConnector = buildConnector(kafkaProperties);
                    try {
                        if (executorService.isShutdown()) {
                            break;
                        }
                        final List<KafkaStream<String, String>> streams = consumerConnector.createMessageStreamsByFilter(new Whitelist(Pattern.quote(topic)), 1, DEFAULT_STRING_DECODER, DEFAULT_STRING_DECODER);
                        if (streams == null || streams.isEmpty()) {
                            throw new IAE("Topic [%s] had no streams", topic);
                        }
                        if (streams.size() > 1) {
                            throw new ISE("Topic [%s] has %d streams! expected 1", topic, streams.size());
                        }
                        final KafkaStream<String, String> kafkaStream = streams.get(0);
                        startingReads.countDown();
                        for (final MessageAndMetadata<String, String> messageAndMetadata : kafkaStream) {
                            final String key = messageAndMetadata.key();
                            final String message = messageAndMetadata.message();
                            if (key == null || message == null) {
                                LOG.error("Bad key/message from topic [%s]: [%s]", topic, messageAndMetadata);
                                continue;
                            }
                            doubleEventCount.incrementAndGet();
                            map.put(key, message);
                            doubleEventCount.incrementAndGet();
                            LOG.trace("Placed key[%s] val[%s]", key, message);
                        }
                    } catch (Exception e) {
                        LOG.error(e, "Error reading stream for topic [%s]", topic);
                    } finally {
                        consumerConnector.shutdown();
                    }
                }
            }
        });
        Futures.addCallback(future, new FutureCallback<Object>() {

            @Override
            public void onSuccess(Object result) {
                LOG.debug("Success listening to [%s]", topic);
            }

            @Override
            public void onFailure(Throwable t) {
                if (t instanceof CancellationException) {
                    LOG.debug("Topic [%s] cancelled", topic);
                } else {
                    LOG.error(t, "Error in listening to [%s]", topic);
                }
            }
        }, MoreExecutors.sameThreadExecutor());
        this.future = future;
        final Stopwatch stopwatch = Stopwatch.createStarted();
        try {
            while (!startingReads.await(100, TimeUnit.MILLISECONDS) && connectTimeout > 0L) {
                // Don't return until we have actually connected
                if (future.isDone()) {
                    future.get();
                } else {
                    if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > connectTimeout) {
                        throw new TimeoutException("Failed to connect to kafka in sufficient time");
                    }
                }
            }
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            executorService.shutdown();
            if (!future.isDone() && !future.cancel(false)) {
                LOG.warn("Could not cancel kafka listening thread");
            }
            LOG.error(e, "Failed to start kafka extraction factory");
            cacheHandler.close();
            return false;
        }
        started.set(true);
        return true;
    }
}
Also used : MessageAndMetadata(kafka.message.MessageAndMetadata) Stopwatch(com.google.common.base.Stopwatch) KafkaStream(kafka.consumer.KafkaStream) Properties(java.util.Properties) IAE(io.druid.java.util.common.IAE) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) CancellationException(java.util.concurrent.CancellationException) Whitelist(kafka.consumer.Whitelist) List(java.util.List) ISE(io.druid.java.util.common.ISE) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project flink by apache.

the class KafkaConsumerTestBase method readTopicToList.

// ------------------------------------------------------------------------
//  Debugging utilities
// ------------------------------------------------------------------------
/**
	 * Read topic to list, only using Kafka code.
	 */
private static List<MessageAndMetadata<byte[], byte[]>> readTopicToList(String topicName, ConsumerConfig config, final int stopAfter) {
    ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(config);
    // we request only one stream per consumer instance. Kafka will make sure that each consumer group
    // will see each message only once.
    Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumerConnector.createMessageStreams(topicCountMap);
    if (streams.size() != 1) {
        throw new RuntimeException("Expected only one message stream but got " + streams.size());
    }
    List<KafkaStream<byte[], byte[]>> kafkaStreams = streams.get(topicName);
    if (kafkaStreams == null) {
        throw new RuntimeException("Requested stream not available. Available streams: " + streams.toString());
    }
    if (kafkaStreams.size() != 1) {
        throw new RuntimeException("Requested 1 stream from Kafka, bot got " + kafkaStreams.size() + " streams");
    }
    LOG.info("Opening Consumer instance for topic '{}' on group '{}'", topicName, config.groupId());
    ConsumerIterator<byte[], byte[]> iteratorToRead = kafkaStreams.get(0).iterator();
    List<MessageAndMetadata<byte[], byte[]>> result = new ArrayList<>();
    int read = 0;
    while (iteratorToRead.hasNext()) {
        read++;
        result.add(iteratorToRead.next());
        if (read == stopAfter) {
            LOG.info("Read " + read + " elements");
            return result;
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) MessageAndMetadata(kafka.message.MessageAndMetadata) ConsumerConnector(kafka.javaapi.consumer.ConsumerConnector) KafkaStream(kafka.consumer.KafkaStream) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) List(java.util.List) ArrayList(java.util.ArrayList)

Example 3 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project apex-malhar by apache.

the class HighlevelKafkaConsumer method start.

@Override
public void start() {
    super.start();
    // Share other properties among all connectors but set zookeepers respectively cause different cluster would use different zookeepers
    for (String cluster : zookeeperMap.keySet()) {
        // create high level consumer for every cluster
        Properties config = new Properties();
        config.putAll(consumerConfig);
        config.setProperty("zookeeper.connect", zookeeperMap.get(cluster).iterator().next());
        // create consumer connector will start a daemon thread to monitor the metadata change
        // we want to start this thread until the operator is activated
        standardConsumer.put(cluster, kafka.consumer.Consumer.createJavaConsumerConnector(new ConsumerConfig(config)));
    }
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    if (numStream == null || numStream.size() == 0) {
        if (numStream == null) {
            numStream = new HashMap<String, Integer>();
        }
        // get metadata from kafka and initialize streams accordingly
        for (Entry<String, List<PartitionMetadata>> e : KafkaMetadataUtil.getPartitionsForTopic(brokers, topic).entrySet()) {
            numStream.put(e.getKey(), e.getValue().size());
        }
    }
    int totalNumStream = 0;
    for (int delta : numStream.values()) {
        totalNumStream += delta;
    }
    // start $totalNumStream anonymous threads to consume the data from all clusters
    if (totalNumStream <= 0) {
        logger.warn("No more job needed to consume data ");
        return;
    }
    consumerThreadExecutor = Executors.newFixedThreadPool(totalNumStream);
    for (final Entry<String, Integer> e : numStream.entrySet()) {
        int realNumStream = e.getValue();
        topicCountMap.put(topic, new Integer(realNumStream));
        Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = standardConsumer.get(e.getKey()).createMessageStreams(topicCountMap);
        for (final KafkaStream<byte[], byte[]> stream : consumerMap.get(topic)) {
            consumerThreadExecutor.submit(new Runnable() {

                KafkaPartition kp = new KafkaPartition(e.getKey(), topic, -1);

                public void run() {
                    ConsumerIterator<byte[], byte[]> itr = stream.iterator();
                    logger.debug("Thread {} starts consuming message...", Thread.currentThread().getName());
                    while (itr.hasNext() && isAlive) {
                        MessageAndMetadata<byte[], byte[]> mam = itr.next();
                        try {
                            kp.setPartitionId(mam.partition());
                            putMessage(kp, new Message(mam.message()), mam.offset());
                        } catch (InterruptedException e) {
                            logger.error("Message Enqueue has been interrupted", e);
                        }
                    }
                    logger.debug("Thread {} stops consuming message...", Thread.currentThread().getName());
                }
            });
        }
    }
}
Also used : Message(kafka.message.Message) HashMap(java.util.HashMap) MessageAndMetadata(kafka.message.MessageAndMetadata) Properties(java.util.Properties) ConsumerIterator(kafka.consumer.ConsumerIterator) ConsumerConfig(kafka.consumer.ConsumerConfig) List(java.util.List)

Example 4 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project graylog2-server by Graylog2.

the class KafkaTransport method doLaunch.

@Override
public void doLaunch(final MessageInput input) throws MisfireException {
    serverStatus.awaitRunning(new Runnable() {

        @Override
        public void run() {
            lifecycleStateChange(Lifecycle.RUNNING);
        }
    });
    // listen for lifecycle changes
    serverEventBus.register(this);
    final Properties props = new Properties();
    props.put("group.id", GROUP_ID);
    props.put("client.id", "gl2-" + nodeId + "-" + input.getId());
    props.put("fetch.min.bytes", String.valueOf(configuration.getInt(CK_FETCH_MIN_BYTES)));
    props.put("fetch.wait.max.ms", String.valueOf(configuration.getInt(CK_FETCH_WAIT_MAX)));
    props.put("zookeeper.connect", configuration.getString(CK_ZOOKEEPER));
    // Default auto commit interval is 60 seconds. Reduce to 1 second to minimize message duplication
    // if something breaks.
    props.put("auto.commit.interval.ms", "1000");
    // Set a consumer timeout to avoid blocking on the consumer iterator.
    props.put("consumer.timeout.ms", "1000");
    final int numThreads = configuration.getInt(CK_THREADS);
    final ConsumerConfig consumerConfig = new ConsumerConfig(props);
    cc = Consumer.createJavaConsumerConnector(consumerConfig);
    final TopicFilter filter = new Whitelist(configuration.getString(CK_TOPIC_FILTER));
    final List<KafkaStream<byte[], byte[]>> streams = cc.createMessageStreamsByFilter(filter, numThreads);
    final ExecutorService executor = executorService(numThreads);
    // this is being used during shutdown to first stop all submitted jobs before committing the offsets back to zookeeper
    // and then shutting down the connection.
    // this is to avoid yanking away the connection from the consumer runnables
    stopLatch = new CountDownLatch(streams.size());
    for (final KafkaStream<byte[], byte[]> stream : streams) {
        executor.submit(new Runnable() {

            @Override
            public void run() {
                final ConsumerIterator<byte[], byte[]> consumerIterator = stream.iterator();
                boolean retry;
                do {
                    retry = false;
                    try {
                        // noinspection WhileLoopReplaceableByForEach
                        while (consumerIterator.hasNext()) {
                            if (paused) {
                                // we try not to spin here, so we wait until the lifecycle goes back to running.
                                LOG.debug("Message processing is paused, blocking until message processing is turned back on.");
                                Uninterruptibles.awaitUninterruptibly(pausedLatch);
                            }
                            // check for being stopped before actually getting the message, otherwise we could end up losing that message
                            if (stopped) {
                                break;
                            }
                            if (isThrottled()) {
                                blockUntilUnthrottled();
                            }
                            // process the message, this will immediately mark the message as having been processed. this gets tricky
                            // if we get an exception about processing it down below.
                            final MessageAndMetadata<byte[], byte[]> message = consumerIterator.next();
                            final byte[] bytes = message.message();
                            // it is possible that the message is null
                            if (bytes == null) {
                                continue;
                            }
                            totalBytesRead.addAndGet(bytes.length);
                            lastSecBytesReadTmp.addAndGet(bytes.length);
                            final RawMessage rawMessage = new RawMessage(bytes);
                            // TODO implement throttling
                            input.processRawMessage(rawMessage);
                        }
                    } catch (ConsumerTimeoutException e) {
                        // Happens when there is nothing to consume, retry to check again.
                        retry = true;
                    } catch (Exception e) {
                        LOG.error("Kafka consumer error, stopping consumer thread.", e);
                    }
                } while (retry && !stopped);
                // explicitly commit our offsets when stopping.
                // this might trigger a couple of times, but it won't hurt
                cc.commitOffsets();
                stopLatch.countDown();
            }
        });
    }
    scheduler.scheduleAtFixedRate(new Runnable() {

        @Override
        public void run() {
            lastSecBytesRead.set(lastSecBytesReadTmp.getAndSet(0));
        }
    }, 1, 1, TimeUnit.SECONDS);
}
Also used : TopicFilter(kafka.consumer.TopicFilter) MessageAndMetadata(kafka.message.MessageAndMetadata) KafkaStream(kafka.consumer.KafkaStream) Properties(java.util.Properties) CountDownLatch(java.util.concurrent.CountDownLatch) ConsumerTimeoutException(kafka.consumer.ConsumerTimeoutException) MisfireException(org.graylog2.plugin.inputs.MisfireException) ConsumerIterator(kafka.consumer.ConsumerIterator) InstrumentedExecutorService(com.codahale.metrics.InstrumentedExecutorService) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) Whitelist(kafka.consumer.Whitelist) ConsumerTimeoutException(kafka.consumer.ConsumerTimeoutException) ConsumerConfig(kafka.consumer.ConsumerConfig) RawMessage(org.graylog2.plugin.journal.RawMessage)

Example 5 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project incubator-atlas by apache.

the class KafkaConsumer method getNext.

// ----- AbstractNotificationConsumer ------------------------------------
@Override
public String getNext() {
    MessageAndMetadata message = iterator.next();
    LOG.debug("Read message: conumerId: {}, topic - {}, partition - {}, offset - {}, message - {}", consumerId, message.topic(), message.partition(), message.offset(), message.message());
    lastSeenOffset = message.offset();
    return (String) message.message();
}
Also used : MessageAndMetadata(kafka.message.MessageAndMetadata)

Aggregations

MessageAndMetadata (kafka.message.MessageAndMetadata)6 List (java.util.List)3 Properties (java.util.Properties)3 CountDownLatch (java.util.concurrent.CountDownLatch)3 KafkaStream (kafka.consumer.KafkaStream)3 HashMap (java.util.HashMap)2 TimeoutException (java.util.concurrent.TimeoutException)2 ConsumerConfig (kafka.consumer.ConsumerConfig)2 ConsumerIterator (kafka.consumer.ConsumerIterator)2 Whitelist (kafka.consumer.Whitelist)2 InstrumentedExecutorService (com.codahale.metrics.InstrumentedExecutorService)1 Stopwatch (com.google.common.base.Stopwatch)1 IAE (io.druid.java.util.common.IAE)1 ISE (io.druid.java.util.common.ISE)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 UUID (java.util.UUID)1 CancellationException (java.util.concurrent.CancellationException)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorService (java.util.concurrent.ExecutorService)1