Example 1 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project druid by druid-io.

the class KafkaLookupExtractorFactory method start.

public boolean start() {
    synchronized (started) {
        if (started.get()) {
            LOG.warn("Already started, not starting again");
            return started.get();
        if (executorService.isShutdown()) {
            LOG.warn("Already shut down, not starting again");
            return false;
        final Properties kafkaProperties = new Properties();
        if (kafkaProperties.containsKey("")) {
            throw new IAE("Cannot set kafka property []. Property is randomly generated for you. Found [%s]", kafkaProperties.getProperty(""));
        if (kafkaProperties.containsKey("auto.offset.reset")) {
            throw new IAE("Cannot set kafka property [auto.offset.reset]. Property will be forced to [smallest]. Found [%s]", kafkaProperties.getProperty("auto.offset.reset"));
        Preconditions.checkNotNull(kafkaProperties.getProperty("zookeeper.connect"), "zookeeper.connect required property");
        kafkaProperties.setProperty("", factoryId);
        final String topic = getKafkaTopic();
        LOG.debug("About to listen to topic [%s] with [%s]", topic, factoryId);
        cacheHandler = cacheManager.createCache();
        final Map<String, String> map = cacheHandler.getCache();
        // Enable publish-subscribe
        kafkaProperties.setProperty("auto.offset.reset", "smallest");
        final CountDownLatch startingReads = new CountDownLatch(1);
        final ListenableFuture<?> future = executorService.submit(new Runnable() {

            public void run() {
                while (!executorService.isShutdown()) {
                    consumerConnector = buildConnector(kafkaProperties);
                    try {
                        if (executorService.isShutdown()) {
                        final List<KafkaStream<String, String>> streams = consumerConnector.createMessageStreamsByFilter(new Whitelist(Pattern.quote(topic)), 1, DEFAULT_STRING_DECODER, DEFAULT_STRING_DECODER);
                        if (streams == null || streams.isEmpty()) {
                            throw new IAE("Topic [%s] had no streams", topic);
                        if (streams.size() > 1) {
                            throw new ISE("Topic [%s] has %d streams! expected 1", topic, streams.size());
                        final KafkaStream<String, String> kafkaStream = streams.get(0);
                        for (final MessageAndMetadata<String, String> messageAndMetadata : kafkaStream) {
                            final String key = messageAndMetadata.key();
                            final String message = messageAndMetadata.message();
                            if (key == null || message == null) {
                                LOG.error("Bad key/message from topic [%s]: [%s]", topic, messageAndMetadata);
                            map.put(key, message);
                            LOG.trace("Placed key[%s] val[%s]", key, message);
                    } catch (Exception e) {
                        LOG.error(e, "Error reading stream for topic [%s]", topic);
                    } finally {
        Futures.addCallback(future, new FutureCallback<Object>() {

            public void onSuccess(Object result) {
                LOG.debug("Success listening to [%s]", topic);

            public void onFailure(Throwable t) {
                if (t instanceof CancellationException) {
                    LOG.debug("Topic [%s] cancelled", topic);
                } else {
                    LOG.error(t, "Error in listening to [%s]", topic);
        }, MoreExecutors.sameThreadExecutor());
        this.future = future;
        final Stopwatch stopwatch = Stopwatch.createStarted();
        try {
            while (!startingReads.await(100, TimeUnit.MILLISECONDS) && connectTimeout > 0L) {
                // Don't return until we have actually connected
                if (future.isDone()) {
                } else {
                    if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > connectTimeout) {
                        throw new TimeoutException("Failed to connect to kafka in sufficient time");
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            if (!future.isDone() && !future.cancel(false)) {
                LOG.warn("Could not cancel kafka listening thread");
            LOG.error(e, "Failed to start kafka extraction factory");
            return false;
        return true;
Example 2 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project flink by apache.

the class KafkaConsumerTestBase method readTopicToList.

// ------------------------------------------------------------------------
//  Debugging utilities
// ------------------------------------------------------------------------
	 * Read topic to list, only using Kafka code.
private static List<MessageAndMetadata<byte[], byte[]>> readTopicToList(String topicName, ConsumerConfig config, final int stopAfter) {
    ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(config);
    // we request only one stream per consumer instance. Kafka will make sure that each consumer group
    // will see each message only once.
    Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumerConnector.createMessageStreams(topicCountMap);
    if (streams.size() != 1) {
        throw new RuntimeException("Expected only one message stream but got " + streams.size());
    List<KafkaStream<byte[], byte[]>> kafkaStreams = streams.get(topicName);
    if (kafkaStreams == null) {
        throw new RuntimeException("Requested stream not available. Available streams: " + streams.toString());
    if (kafkaStreams.size() != 1) {
        throw new RuntimeException("Requested 1 stream from Kafka, bot got " + kafkaStreams.size() + " streams");
    }"Opening Consumer instance for topic '{}' on group '{}'", topicName, config.groupId());
    ConsumerIterator<byte[], byte[]> iteratorToRead = kafkaStreams.get(0).iterator();
    List<MessageAndMetadata<byte[], byte[]>> result = new ArrayList<>();
    int read = 0;
    while (iteratorToRead.hasNext()) {
        if (read == stopAfter) {
  "Read " + read + " elements");
            return result;
    return result;
Example 3 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project apex-malhar by apache.

the class HighlevelKafkaConsumer method start.

public void start() {
    // Share other properties among all connectors but set zookeepers respectively cause different cluster would use different zookeepers
    for (String cluster : zookeeperMap.keySet()) {
        // create high level consumer for every cluster
        Properties config = new Properties();
        config.setProperty("zookeeper.connect", zookeeperMap.get(cluster).iterator().next());
        // create consumer connector will start a daemon thread to monitor the metadata change
        // we want to start this thread until the operator is activated
        standardConsumer.put(cluster, kafka.consumer.Consumer.createJavaConsumerConnector(new ConsumerConfig(config)));
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    if (numStream == null || numStream.size() == 0) {
        if (numStream == null) {
            numStream = new HashMap<String, Integer>();
        // get metadata from kafka and initialize streams accordingly
        for (Entry<String, List<PartitionMetadata>> e : KafkaMetadataUtil.getPartitionsForTopic(brokers, topic).entrySet()) {
            numStream.put(e.getKey(), e.getValue().size());
    int totalNumStream = 0;
    for (int delta : numStream.values()) {
        totalNumStream += delta;
    // start $totalNumStream anonymous threads to consume the data from all clusters
    if (totalNumStream <= 0) {
        logger.warn("No more job needed to consume data ");
    consumerThreadExecutor = Executors.newFixedThreadPool(totalNumStream);
    for (final Entry<String, Integer> e : numStream.entrySet()) {
        int realNumStream = e.getValue();
        topicCountMap.put(topic, new Integer(realNumStream));
        Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = standardConsumer.get(e.getKey()).createMessageStreams(topicCountMap);
        for (final KafkaStream<byte[], byte[]> stream : consumerMap.get(topic)) {
            consumerThreadExecutor.submit(new Runnable() {

                KafkaPartition kp = new KafkaPartition(e.getKey(), topic, -1);

                public void run() {
                    ConsumerIterator<byte[], byte[]> itr = stream.iterator();
                    logger.debug("Thread {} starts consuming message...", Thread.currentThread().getName());
                    while (itr.hasNext() && isAlive) {
                        MessageAndMetadata<byte[], byte[]> mam =;
                        try {
                            putMessage(kp, new Message(mam.message()), mam.offset());
                        } catch (InterruptedException e) {
                            logger.error("Message Enqueue has been interrupted", e);
                    logger.debug("Thread {} stops consuming message...", Thread.currentThread().getName());
Example 4 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project graylog2-server by Graylog2.

the class KafkaTransport method doLaunch.

public void doLaunch(final MessageInput input) throws MisfireException {
    serverStatus.awaitRunning(new Runnable() {

        public void run() {
    // listen for lifecycle changes
    final Properties props = new Properties();
    props.put("", GROUP_ID);
    props.put("", "gl2-" + nodeId + "-" + input.getId());
    props.put("fetch.min.bytes", String.valueOf(configuration.getInt(CK_FETCH_MIN_BYTES)));
    props.put("", String.valueOf(configuration.getInt(CK_FETCH_WAIT_MAX)));
    props.put("zookeeper.connect", configuration.getString(CK_ZOOKEEPER));
    // Default auto commit interval is 60 seconds. Reduce to 1 second to minimize message duplication
    // if something breaks.
    props.put("", "1000");
    // Set a consumer timeout to avoid blocking on the consumer iterator.
    props.put("", "1000");
    final int numThreads = configuration.getInt(CK_THREADS);
    final ConsumerConfig consumerConfig = new ConsumerConfig(props);
    cc = Consumer.createJavaConsumerConnector(consumerConfig);
    final TopicFilter filter = new Whitelist(configuration.getString(CK_TOPIC_FILTER));
    final List<KafkaStream<byte[], byte[]>> streams = cc.createMessageStreamsByFilter(filter, numThreads);
    final ExecutorService executor = executorService(numThreads);
    // this is being used during shutdown to first stop all submitted jobs before committing the offsets back to zookeeper
    // and then shutting down the connection.
    // this is to avoid yanking away the connection from the consumer runnables
    stopLatch = new CountDownLatch(streams.size());
    for (final KafkaStream<byte[], byte[]> stream : streams) {
        executor.submit(new Runnable() {

            public void run() {
                final ConsumerIterator<byte[], byte[]> consumerIterator = stream.iterator();
                boolean retry;
                do {
                    retry = false;
                    try {
                        // noinspection WhileLoopReplaceableByForEach
                        while (consumerIterator.hasNext()) {
                            if (paused) {
                                // we try not to spin here, so we wait until the lifecycle goes back to running.
                                LOG.debug("Message processing is paused, blocking until message processing is turned back on.");
                            // check for being stopped before actually getting the message, otherwise we could end up losing that message
                            if (stopped) {
                            if (isThrottled()) {
                            // process the message, this will immediately mark the message as having been processed. this gets tricky
                            // if we get an exception about processing it down below.
                            final MessageAndMetadata<byte[], byte[]> message =;
                            final byte[] bytes = message.message();
                            // it is possible that the message is null
                            if (bytes == null) {
                            final RawMessage rawMessage = new RawMessage(bytes);
                            // TODO implement throttling
                    } catch (ConsumerTimeoutException e) {
                        // Happens when there is nothing to consume, retry to check again.
                        retry = true;
                    } catch (Exception e) {
                        LOG.error("Kafka consumer error, stopping consumer thread.", e);
                } while (retry && !stopped);
                // explicitly commit our offsets when stopping.
                // this might trigger a couple of times, but it won't hurt
    scheduler.scheduleAtFixedRate(new Runnable() {

        public void run() {
    }, 1, 1, TimeUnit.SECONDS);
Example 5 with MessageAndMetadata

use of kafka.message.MessageAndMetadata in project incubator-atlas by apache.

the class KafkaConsumer method getNext.

// ----- AbstractNotificationConsumer ------------------------------------
public String getNext() {
    MessageAndMetadata message =;
    LOG.debug("Read message: conumerId: {}, topic - {}, partition - {}, offset - {}, message - {}", consumerId, message.topic(), message.partition(), message.offset(), message.message());
    lastSeenOffset = message.offset();
    return (String) message.message();
