Search in sources :

Example 1 with KafkaStream

use of kafka.consumer.KafkaStream in project druid by druid-io.

the class KafkaLookupExtractorFactory method start.

@Override
public boolean start() {
    synchronized (started) {
        if (started.get()) {
            LOG.warn("Already started, not starting again");
            return started.get();
        }
        if (executorService.isShutdown()) {
            LOG.warn("Already shut down, not starting again");
            return false;
        }
        final Properties kafkaProperties = new Properties();
        kafkaProperties.putAll(getKafkaProperties());
        if (kafkaProperties.containsKey("group.id")) {
            throw new IAE("Cannot set kafka property [group.id]. Property is randomly generated for you. Found [%s]", kafkaProperties.getProperty("group.id"));
        }
        if (kafkaProperties.containsKey("auto.offset.reset")) {
            throw new IAE("Cannot set kafka property [auto.offset.reset]. Property will be forced to [smallest]. Found [%s]", kafkaProperties.getProperty("auto.offset.reset"));
        }
        Preconditions.checkNotNull(kafkaProperties.getProperty("zookeeper.connect"), "zookeeper.connect required property");
        kafkaProperties.setProperty("group.id", factoryId);
        final String topic = getKafkaTopic();
        LOG.debug("About to listen to topic [%s] with group.id [%s]", topic, factoryId);
        cacheHandler = cacheManager.createCache();
        final Map<String, String> map = cacheHandler.getCache();
        mapRef.set(map);
        // Enable publish-subscribe
        kafkaProperties.setProperty("auto.offset.reset", "smallest");
        final CountDownLatch startingReads = new CountDownLatch(1);
        final ListenableFuture<?> future = executorService.submit(new Runnable() {

            @Override
            public void run() {
                while (!executorService.isShutdown()) {
                    consumerConnector = buildConnector(kafkaProperties);
                    try {
                        if (executorService.isShutdown()) {
                            break;
                        }
                        final List<KafkaStream<String, String>> streams = consumerConnector.createMessageStreamsByFilter(new Whitelist(Pattern.quote(topic)), 1, DEFAULT_STRING_DECODER, DEFAULT_STRING_DECODER);
                        if (streams == null || streams.isEmpty()) {
                            throw new IAE("Topic [%s] had no streams", topic);
                        }
                        if (streams.size() > 1) {
                            throw new ISE("Topic [%s] has %d streams! expected 1", topic, streams.size());
                        }
                        final KafkaStream<String, String> kafkaStream = streams.get(0);
                        startingReads.countDown();
                        for (final MessageAndMetadata<String, String> messageAndMetadata : kafkaStream) {
                            final String key = messageAndMetadata.key();
                            final String message = messageAndMetadata.message();
                            if (key == null || message == null) {
                                LOG.error("Bad key/message from topic [%s]: [%s]", topic, messageAndMetadata);
                                continue;
                            }
                            doubleEventCount.incrementAndGet();
                            map.put(key, message);
                            doubleEventCount.incrementAndGet();
                            LOG.trace("Placed key[%s] val[%s]", key, message);
                        }
                    } catch (Exception e) {
                        LOG.error(e, "Error reading stream for topic [%s]", topic);
                    } finally {
                        consumerConnector.shutdown();
                    }
                }
            }
        });
        Futures.addCallback(future, new FutureCallback<Object>() {

            @Override
            public void onSuccess(Object result) {
                LOG.debug("Success listening to [%s]", topic);
            }

            @Override
            public void onFailure(Throwable t) {
                if (t instanceof CancellationException) {
                    LOG.debug("Topic [%s] cancelled", topic);
                } else {
                    LOG.error(t, "Error in listening to [%s]", topic);
                }
            }
        }, MoreExecutors.sameThreadExecutor());
        this.future = future;
        final Stopwatch stopwatch = Stopwatch.createStarted();
        try {
            while (!startingReads.await(100, TimeUnit.MILLISECONDS) && connectTimeout > 0L) {
                // Don't return until we have actually connected
                if (future.isDone()) {
                    future.get();
                } else {
                    if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > connectTimeout) {
                        throw new TimeoutException("Failed to connect to kafka in sufficient time");
                    }
                }
            }
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            executorService.shutdown();
            if (!future.isDone() && !future.cancel(false)) {
                LOG.warn("Could not cancel kafka listening thread");
            }
            LOG.error(e, "Failed to start kafka extraction factory");
            cacheHandler.close();
            return false;
        }
        started.set(true);
        return true;
    }
}
Also used : MessageAndMetadata(kafka.message.MessageAndMetadata) Stopwatch(com.google.common.base.Stopwatch) KafkaStream(kafka.consumer.KafkaStream) Properties(java.util.Properties) IAE(io.druid.java.util.common.IAE) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) ExecutionException(java.util.concurrent.ExecutionException) CancellationException(java.util.concurrent.CancellationException) Whitelist(kafka.consumer.Whitelist) List(java.util.List) ISE(io.druid.java.util.common.ISE) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with KafkaStream

use of kafka.consumer.KafkaStream in project voltdb by VoltDB.

the class ExportKafkaOnServerVerifier method createAndConsumeKafkaStreams.

//Submit consumer tasks to executor and wait for EOS message then continue on.
void createAndConsumeKafkaStreams(String topicPrefix, boolean skinny) throws Exception {
    final String topic = topicPrefix + "EXPORT_PARTITIONED_TABLE";
    final String topic2 = topicPrefix + "EXPORT_PARTITIONED_TABLE2";
    final String doneTopic = topicPrefix + "EXPORT_DONE_TABLE";
    List<Future<Long>> doneFutures = new ArrayList<>();
    Map<String, Integer> topicCountMap = new HashMap<>();
    topicCountMap.put(topic, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = m_kafkaConfig.consumer.createMessageStreams(topicCountMap);
    List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
    ExecutorService executor = Executors.newFixedThreadPool(streams.size());
    // now launch all the threads
    CountDownLatch consumersLatch = new CountDownLatch(streams.size());
    for (final KafkaStream stream : streams) {
        System.out.println("Creating consumer for " + topic);
        ExportConsumer consumer = new ExportConsumer(stream, false, skinny, consumersLatch);
        executor.submit(consumer);
    }
    Map<String, Integer> topicCountMap2 = new HashMap<>();
    topicCountMap2.put(topic2, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap2 = m_kafkaConfig.consumer2.createMessageStreams(topicCountMap2);
    List<KafkaStream<byte[], byte[]>> streams2 = consumerMap2.get(topic2);
    ExecutorService executor2 = Executors.newFixedThreadPool(streams2.size());
    // now launch all the threads
    CountDownLatch consumersLatch2 = new CountDownLatch(streams2.size());
    for (final KafkaStream stream : streams2) {
        System.out.println("Creating consumer for " + topic2);
        ExportConsumer consumer = new ExportConsumer(stream, false, skinny, consumersLatch2);
        executor2.submit(consumer);
    }
    Map<String, Integer> topicDoneCountMap = new HashMap<String, Integer>();
    topicDoneCountMap.put(doneTopic, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> doneConsumerMap = m_kafkaConfig.doneConsumer.createMessageStreams(topicDoneCountMap);
    List<KafkaStream<byte[], byte[]>> doneStreams = doneConsumerMap.get(doneTopic);
    ExecutorService executord2 = Executors.newFixedThreadPool(doneStreams.size());
    CompletionService<Long> ecs = new ExecutorCompletionService<>(executord2);
    CountDownLatch doneLatch = new CountDownLatch(doneStreams.size());
    // now launch all the threads
    for (final KafkaStream stream : doneStreams) {
        System.out.println("Creating consumer for " + doneTopic);
        ExportConsumer consumer = new ExportConsumer(stream, true, true, doneLatch);
        Future<Long> f = ecs.submit(consumer, new Long(0));
        doneFutures.add(f);
    }
    System.out.println("All Consumer Creation Done...Waiting for EOS");
    // Now wait for any executorservice2 completion.
    ecs.take().get();
    System.out.println("Done Consumer Saw EOS...Cancelling rest of the done consumers.");
    for (Future<Long> f : doneFutures) {
        f.cancel(true);
    }
    //Wait for all consumers to consume and timeout.
    System.out.println("Wait for drain of consumers.");
    long cnt = consumedRows.get();
    long wtime = System.currentTimeMillis();
    while (true) {
        Thread.sleep(5000);
        if (cnt != consumedRows.get()) {
            wtime = System.currentTimeMillis();
            System.out.println("Train is still running.");
            continue;
        }
        if ((System.currentTimeMillis() - wtime) > 60000) {
            System.out.println("Waited long enough looks like train has stopped.");
            break;
        }
    }
    m_kafkaConfig.stop();
    consumersLatch.await();
    consumersLatch2.await();
    System.out.println("Seen Rows: " + consumedRows.get() + " Expected: " + expectedRows);
    if (consumedRows.get() < expectedRows) {
        System.out.println("ERROR: Exported row count does not match consumed rows.");
        testGood.set(false);
    }
    //For shutdown hook to not stop twice.
    m_kafkaConfig = null;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KafkaStream(kafka.consumer.KafkaStream) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) CountDownLatch(java.util.concurrent.CountDownLatch) ExecutorService(java.util.concurrent.ExecutorService) AtomicLong(java.util.concurrent.atomic.AtomicLong) Future(java.util.concurrent.Future) ArrayList(java.util.ArrayList) List(java.util.List)

Example 3 with KafkaStream

use of kafka.consumer.KafkaStream in project flink by apache.

the class KafkaConsumerTestBase method readTopicToList.

// ------------------------------------------------------------------------
//  Debugging utilities
// ------------------------------------------------------------------------
/**
	 * Read topic to list, only using Kafka code.
	 */
private static List<MessageAndMetadata<byte[], byte[]>> readTopicToList(String topicName, ConsumerConfig config, final int stopAfter) {
    ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(config);
    // we request only one stream per consumer instance. Kafka will make sure that each consumer group
    // will see each message only once.
    Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumerConnector.createMessageStreams(topicCountMap);
    if (streams.size() != 1) {
        throw new RuntimeException("Expected only one message stream but got " + streams.size());
    }
    List<KafkaStream<byte[], byte[]>> kafkaStreams = streams.get(topicName);
    if (kafkaStreams == null) {
        throw new RuntimeException("Requested stream not available. Available streams: " + streams.toString());
    }
    if (kafkaStreams.size() != 1) {
        throw new RuntimeException("Requested 1 stream from Kafka, bot got " + kafkaStreams.size() + " streams");
    }
    LOG.info("Opening Consumer instance for topic '{}' on group '{}'", topicName, config.groupId());
    ConsumerIterator<byte[], byte[]> iteratorToRead = kafkaStreams.get(0).iterator();
    List<MessageAndMetadata<byte[], byte[]>> result = new ArrayList<>();
    int read = 0;
    while (iteratorToRead.hasNext()) {
        read++;
        result.add(iteratorToRead.next());
        if (read == stopAfter) {
            LOG.info("Read " + read + " elements");
            return result;
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) MessageAndMetadata(kafka.message.MessageAndMetadata) ConsumerConnector(kafka.javaapi.consumer.ConsumerConnector) KafkaStream(kafka.consumer.KafkaStream) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) List(java.util.List) ArrayList(java.util.ArrayList)

Example 4 with KafkaStream

use of kafka.consumer.KafkaStream in project nifi by apache.

the class PutKafkaTest method buildConsumer.

private ConsumerIterator<byte[], byte[]> buildConsumer(String topic) {
    Properties props = new Properties();
    props.put("zookeeper.connect", "0.0.0.0:" + kafkaLocal.getZookeeperPort());
    props.put("group.id", "test");
    props.put("consumer.timeout.ms", "5000");
    props.put("auto.offset.reset", "smallest");
    ConsumerConfig consumerConfig = new ConsumerConfig(props);
    ConsumerConnector consumer = Consumer.createJavaConsumerConnector(consumerConfig);
    Map<String, Integer> topicCountMap = new HashMap<>(1);
    topicCountMap.put(topic, 1);
    Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
    List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
    ConsumerIterator<byte[], byte[]> iter = streams.get(0).iterator();
    return iter;
}
Also used : HashMap(java.util.HashMap) ConsumerConfig(kafka.consumer.ConsumerConfig) ConsumerConnector(kafka.javaapi.consumer.ConsumerConnector) List(java.util.List) KafkaStream(kafka.consumer.KafkaStream) Properties(java.util.Properties)

Example 5 with KafkaStream

use of kafka.consumer.KafkaStream in project nifi by apache.

the class GetKafka method createConsumers.

public void createConsumers(final ProcessContext context) {
    final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions().getValue();
    final Properties props = new Properties();
    props.setProperty("zookeeper.connect", context.getProperty(ZOOKEEPER_CONNECTION_STRING).evaluateAttributeExpressions().getValue());
    props.setProperty("group.id", context.getProperty(GROUP_ID).evaluateAttributeExpressions().getValue());
    props.setProperty("client.id", context.getProperty(CLIENT_NAME).getValue());
    props.setProperty("auto.commit.interval.ms", String.valueOf(context.getProperty(ZOOKEEPER_COMMIT_DELAY).asTimePeriod(TimeUnit.MILLISECONDS)));
    props.setProperty("auto.offset.reset", context.getProperty(AUTO_OFFSET_RESET).getValue());
    props.setProperty("zookeeper.connection.timeout.ms", context.getProperty(ZOOKEEPER_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).toString());
    props.setProperty("socket.timeout.ms", context.getProperty(KAFKA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).toString());
    for (final Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
        PropertyDescriptor descriptor = entry.getKey();
        if (descriptor.isDynamic()) {
            if (props.containsKey(descriptor.getName())) {
                this.getLogger().warn("Overriding existing property '" + descriptor.getName() + "' which had value of '" + props.getProperty(descriptor.getName()) + "' with dynamically set value '" + entry.getValue() + "'.");
            }
            props.setProperty(descriptor.getName(), entry.getValue());
        }
    }
    /*
         * Unless user sets it to some explicit value we are setting it to the
         * lowest possible value of 1 millisecond to ensure the
         * consumerStream.hasNext() doesn't block. See
         * http://kafka.apache.org/documentation.html#configuration) as well as
         * comment in 'catch ConsumerTimeoutException' in onTrigger() for more
         * explanation as to the reasoning behind it.
         */
    if (!props.containsKey("consumer.timeout.ms")) {
        this.getLogger().info("Setting 'consumer.timeout.ms' to 1 milliseconds to avoid consumer" + " block in the event when no events are present in Kafka topic. If you wish to change this value " + " set it as dynamic property. If you wish to explicitly enable consumer block (at your own risk)" + " set its value to -1.");
        props.setProperty("consumer.timeout.ms", "1");
    }
    int partitionCount = KafkaUtils.retrievePartitionCountForTopic(context.getProperty(ZOOKEEPER_CONNECTION_STRING).evaluateAttributeExpressions().getValue(), context.getProperty(TOPIC).evaluateAttributeExpressions().getValue());
    final ConsumerConfig consumerConfig = new ConsumerConfig(props);
    consumer = Consumer.createJavaConsumerConnector(consumerConfig);
    final Map<String, Integer> topicCountMap = new HashMap<>(1);
    int concurrentTaskToUse = context.getMaxConcurrentTasks();
    if (context.getMaxConcurrentTasks() < partitionCount) {
        this.getLogger().warn("The amount of concurrent tasks '" + context.getMaxConcurrentTasks() + "' configured for " + "this processor is less than the amount of partitions '" + partitionCount + "' for topic '" + context.getProperty(TOPIC).evaluateAttributeExpressions().getValue() + "'. " + "Consider making it equal to the amount of partition count for most efficient event consumption.");
    } else if (context.getMaxConcurrentTasks() > partitionCount) {
        concurrentTaskToUse = partitionCount;
        this.getLogger().warn("The amount of concurrent tasks '" + context.getMaxConcurrentTasks() + "' configured for " + "this processor is greater than the amount of partitions '" + partitionCount + "' for topic '" + context.getProperty(TOPIC).evaluateAttributeExpressions().getValue() + "'. " + "Therefore those tasks would never see a message. To avoid that the '" + partitionCount + "'(partition count) will be used to consume events");
    }
    topicCountMap.put(topic, concurrentTaskToUse);
    final Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
    final List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
    this.streamIterators.clear();
    for (final KafkaStream<byte[], byte[]> stream : streams) {
        streamIterators.add(stream.iterator());
    }
    this.consumerStreamsReady.set(true);
}
Also used : PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) HashMap(java.util.HashMap) KafkaStream(kafka.consumer.KafkaStream) Properties(java.util.Properties) ConsumerConfig(kafka.consumer.ConsumerConfig) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

KafkaStream (kafka.consumer.KafkaStream)12 List (java.util.List)9 HashMap (java.util.HashMap)6 Properties (java.util.Properties)5 ConsumerConfig (kafka.consumer.ConsumerConfig)5 ConsumerConnector (kafka.javaapi.consumer.ConsumerConnector)4 ArrayList (java.util.ArrayList)3 CountDownLatch (java.util.concurrent.CountDownLatch)3 ExecutorService (java.util.concurrent.ExecutorService)3 MessageAndMetadata (kafka.message.MessageAndMetadata)3 ConsumerIterator (kafka.consumer.ConsumerIterator)2 Whitelist (kafka.consumer.Whitelist)2 VerifiableProperties (kafka.utils.VerifiableProperties)2 InstrumentedExecutorService (com.codahale.metrics.InstrumentedExecutorService)1 Stopwatch (com.google.common.base.Stopwatch)1 MessageDecoder (com.jeesuite.kafka.serializer.MessageDecoder)1 ByteBufferInputRowParser (io.druid.data.input.ByteBufferInputRowParser)1 Firehose (io.druid.data.input.Firehose)1 IAE (io.druid.java.util.common.IAE)1 ISE (io.druid.java.util.common.ISE)1