use of kafka.consumer.KafkaStream in project druid by druid-io.
the class KafkaLookupExtractorFactory method start.
@Override
public boolean start() {
synchronized (started) {
if (started.get()) {
LOG.warn("Already started, not starting again");
return started.get();
}
if (executorService.isShutdown()) {
LOG.warn("Already shut down, not starting again");
return false;
}
final Properties kafkaProperties = new Properties();
kafkaProperties.putAll(getKafkaProperties());
if (kafkaProperties.containsKey("group.id")) {
throw new IAE("Cannot set kafka property [group.id]. Property is randomly generated for you. Found [%s]", kafkaProperties.getProperty("group.id"));
}
if (kafkaProperties.containsKey("auto.offset.reset")) {
throw new IAE("Cannot set kafka property [auto.offset.reset]. Property will be forced to [smallest]. Found [%s]", kafkaProperties.getProperty("auto.offset.reset"));
}
Preconditions.checkNotNull(kafkaProperties.getProperty("zookeeper.connect"), "zookeeper.connect required property");
kafkaProperties.setProperty("group.id", factoryId);
final String topic = getKafkaTopic();
LOG.debug("About to listen to topic [%s] with group.id [%s]", topic, factoryId);
cacheHandler = cacheManager.createCache();
final Map<String, String> map = cacheHandler.getCache();
mapRef.set(map);
// Enable publish-subscribe
kafkaProperties.setProperty("auto.offset.reset", "smallest");
final CountDownLatch startingReads = new CountDownLatch(1);
final ListenableFuture<?> future = executorService.submit(new Runnable() {
@Override
public void run() {
while (!executorService.isShutdown()) {
consumerConnector = buildConnector(kafkaProperties);
try {
if (executorService.isShutdown()) {
break;
}
final List<KafkaStream<String, String>> streams = consumerConnector.createMessageStreamsByFilter(new Whitelist(Pattern.quote(topic)), 1, DEFAULT_STRING_DECODER, DEFAULT_STRING_DECODER);
if (streams == null || streams.isEmpty()) {
throw new IAE("Topic [%s] had no streams", topic);
}
if (streams.size() > 1) {
throw new ISE("Topic [%s] has %d streams! expected 1", topic, streams.size());
}
final KafkaStream<String, String> kafkaStream = streams.get(0);
startingReads.countDown();
for (final MessageAndMetadata<String, String> messageAndMetadata : kafkaStream) {
final String key = messageAndMetadata.key();
final String message = messageAndMetadata.message();
if (key == null || message == null) {
LOG.error("Bad key/message from topic [%s]: [%s]", topic, messageAndMetadata);
continue;
}
doubleEventCount.incrementAndGet();
map.put(key, message);
doubleEventCount.incrementAndGet();
LOG.trace("Placed key[%s] val[%s]", key, message);
}
} catch (Exception e) {
LOG.error(e, "Error reading stream for topic [%s]", topic);
} finally {
consumerConnector.shutdown();
}
}
}
});
Futures.addCallback(future, new FutureCallback<Object>() {
@Override
public void onSuccess(Object result) {
LOG.debug("Success listening to [%s]", topic);
}
@Override
public void onFailure(Throwable t) {
if (t instanceof CancellationException) {
LOG.debug("Topic [%s] cancelled", topic);
} else {
LOG.error(t, "Error in listening to [%s]", topic);
}
}
}, MoreExecutors.sameThreadExecutor());
this.future = future;
final Stopwatch stopwatch = Stopwatch.createStarted();
try {
while (!startingReads.await(100, TimeUnit.MILLISECONDS) && connectTimeout > 0L) {
// Don't return until we have actually connected
if (future.isDone()) {
future.get();
} else {
if (stopwatch.elapsed(TimeUnit.MILLISECONDS) > connectTimeout) {
throw new TimeoutException("Failed to connect to kafka in sufficient time");
}
}
}
} catch (InterruptedException | ExecutionException | TimeoutException e) {
executorService.shutdown();
if (!future.isDone() && !future.cancel(false)) {
LOG.warn("Could not cancel kafka listening thread");
}
LOG.error(e, "Failed to start kafka extraction factory");
cacheHandler.close();
return false;
}
started.set(true);
return true;
}
}
use of kafka.consumer.KafkaStream in project voltdb by VoltDB.
the class ExportKafkaOnServerVerifier method createAndConsumeKafkaStreams.
//Submit consumer tasks to executor and wait for EOS message then continue on.
void createAndConsumeKafkaStreams(String topicPrefix, boolean skinny) throws Exception {
final String topic = topicPrefix + "EXPORT_PARTITIONED_TABLE";
final String topic2 = topicPrefix + "EXPORT_PARTITIONED_TABLE2";
final String doneTopic = topicPrefix + "EXPORT_DONE_TABLE";
List<Future<Long>> doneFutures = new ArrayList<>();
Map<String, Integer> topicCountMap = new HashMap<>();
topicCountMap.put(topic, 1);
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = m_kafkaConfig.consumer.createMessageStreams(topicCountMap);
List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
ExecutorService executor = Executors.newFixedThreadPool(streams.size());
// now launch all the threads
CountDownLatch consumersLatch = new CountDownLatch(streams.size());
for (final KafkaStream stream : streams) {
System.out.println("Creating consumer for " + topic);
ExportConsumer consumer = new ExportConsumer(stream, false, skinny, consumersLatch);
executor.submit(consumer);
}
Map<String, Integer> topicCountMap2 = new HashMap<>();
topicCountMap2.put(topic2, 1);
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap2 = m_kafkaConfig.consumer2.createMessageStreams(topicCountMap2);
List<KafkaStream<byte[], byte[]>> streams2 = consumerMap2.get(topic2);
ExecutorService executor2 = Executors.newFixedThreadPool(streams2.size());
// now launch all the threads
CountDownLatch consumersLatch2 = new CountDownLatch(streams2.size());
for (final KafkaStream stream : streams2) {
System.out.println("Creating consumer for " + topic2);
ExportConsumer consumer = new ExportConsumer(stream, false, skinny, consumersLatch2);
executor2.submit(consumer);
}
Map<String, Integer> topicDoneCountMap = new HashMap<String, Integer>();
topicDoneCountMap.put(doneTopic, 1);
Map<String, List<KafkaStream<byte[], byte[]>>> doneConsumerMap = m_kafkaConfig.doneConsumer.createMessageStreams(topicDoneCountMap);
List<KafkaStream<byte[], byte[]>> doneStreams = doneConsumerMap.get(doneTopic);
ExecutorService executord2 = Executors.newFixedThreadPool(doneStreams.size());
CompletionService<Long> ecs = new ExecutorCompletionService<>(executord2);
CountDownLatch doneLatch = new CountDownLatch(doneStreams.size());
// now launch all the threads
for (final KafkaStream stream : doneStreams) {
System.out.println("Creating consumer for " + doneTopic);
ExportConsumer consumer = new ExportConsumer(stream, true, true, doneLatch);
Future<Long> f = ecs.submit(consumer, new Long(0));
doneFutures.add(f);
}
System.out.println("All Consumer Creation Done...Waiting for EOS");
// Now wait for any executorservice2 completion.
ecs.take().get();
System.out.println("Done Consumer Saw EOS...Cancelling rest of the done consumers.");
for (Future<Long> f : doneFutures) {
f.cancel(true);
}
//Wait for all consumers to consume and timeout.
System.out.println("Wait for drain of consumers.");
long cnt = consumedRows.get();
long wtime = System.currentTimeMillis();
while (true) {
Thread.sleep(5000);
if (cnt != consumedRows.get()) {
wtime = System.currentTimeMillis();
System.out.println("Train is still running.");
continue;
}
if ((System.currentTimeMillis() - wtime) > 60000) {
System.out.println("Waited long enough looks like train has stopped.");
break;
}
}
m_kafkaConfig.stop();
consumersLatch.await();
consumersLatch2.await();
System.out.println("Seen Rows: " + consumedRows.get() + " Expected: " + expectedRows);
if (consumedRows.get() < expectedRows) {
System.out.println("ERROR: Exported row count does not match consumed rows.");
testGood.set(false);
}
//For shutdown hook to not stop twice.
m_kafkaConfig = null;
}
use of kafka.consumer.KafkaStream in project flink by apache.
the class KafkaConsumerTestBase method readTopicToList.
// ------------------------------------------------------------------------
// Debugging utilities
// ------------------------------------------------------------------------
/**
* Read topic to list, only using Kafka code.
*/
private static List<MessageAndMetadata<byte[], byte[]>> readTopicToList(String topicName, ConsumerConfig config, final int stopAfter) {
ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(config);
// we request only one stream per consumer instance. Kafka will make sure that each consumer group
// will see each message only once.
Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 1);
Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumerConnector.createMessageStreams(topicCountMap);
if (streams.size() != 1) {
throw new RuntimeException("Expected only one message stream but got " + streams.size());
}
List<KafkaStream<byte[], byte[]>> kafkaStreams = streams.get(topicName);
if (kafkaStreams == null) {
throw new RuntimeException("Requested stream not available. Available streams: " + streams.toString());
}
if (kafkaStreams.size() != 1) {
throw new RuntimeException("Requested 1 stream from Kafka, bot got " + kafkaStreams.size() + " streams");
}
LOG.info("Opening Consumer instance for topic '{}' on group '{}'", topicName, config.groupId());
ConsumerIterator<byte[], byte[]> iteratorToRead = kafkaStreams.get(0).iterator();
List<MessageAndMetadata<byte[], byte[]>> result = new ArrayList<>();
int read = 0;
while (iteratorToRead.hasNext()) {
read++;
result.add(iteratorToRead.next());
if (read == stopAfter) {
LOG.info("Read " + read + " elements");
return result;
}
}
return result;
}
use of kafka.consumer.KafkaStream in project nifi by apache.
the class PutKafkaTest method buildConsumer.
private ConsumerIterator<byte[], byte[]> buildConsumer(String topic) {
Properties props = new Properties();
props.put("zookeeper.connect", "0.0.0.0:" + kafkaLocal.getZookeeperPort());
props.put("group.id", "test");
props.put("consumer.timeout.ms", "5000");
props.put("auto.offset.reset", "smallest");
ConsumerConfig consumerConfig = new ConsumerConfig(props);
ConsumerConnector consumer = Consumer.createJavaConsumerConnector(consumerConfig);
Map<String, Integer> topicCountMap = new HashMap<>(1);
topicCountMap.put(topic, 1);
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
ConsumerIterator<byte[], byte[]> iter = streams.get(0).iterator();
return iter;
}
use of kafka.consumer.KafkaStream in project nifi by apache.
the class GetKafka method createConsumers.
public void createConsumers(final ProcessContext context) {
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions().getValue();
final Properties props = new Properties();
props.setProperty("zookeeper.connect", context.getProperty(ZOOKEEPER_CONNECTION_STRING).evaluateAttributeExpressions().getValue());
props.setProperty("group.id", context.getProperty(GROUP_ID).evaluateAttributeExpressions().getValue());
props.setProperty("client.id", context.getProperty(CLIENT_NAME).getValue());
props.setProperty("auto.commit.interval.ms", String.valueOf(context.getProperty(ZOOKEEPER_COMMIT_DELAY).asTimePeriod(TimeUnit.MILLISECONDS)));
props.setProperty("auto.offset.reset", context.getProperty(AUTO_OFFSET_RESET).getValue());
props.setProperty("zookeeper.connection.timeout.ms", context.getProperty(ZOOKEEPER_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).toString());
props.setProperty("socket.timeout.ms", context.getProperty(KAFKA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).toString());
for (final Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
PropertyDescriptor descriptor = entry.getKey();
if (descriptor.isDynamic()) {
if (props.containsKey(descriptor.getName())) {
this.getLogger().warn("Overriding existing property '" + descriptor.getName() + "' which had value of '" + props.getProperty(descriptor.getName()) + "' with dynamically set value '" + entry.getValue() + "'.");
}
props.setProperty(descriptor.getName(), entry.getValue());
}
}
/*
* Unless user sets it to some explicit value we are setting it to the
* lowest possible value of 1 millisecond to ensure the
* consumerStream.hasNext() doesn't block. See
* http://kafka.apache.org/documentation.html#configuration) as well as
* comment in 'catch ConsumerTimeoutException' in onTrigger() for more
* explanation as to the reasoning behind it.
*/
if (!props.containsKey("consumer.timeout.ms")) {
this.getLogger().info("Setting 'consumer.timeout.ms' to 1 milliseconds to avoid consumer" + " block in the event when no events are present in Kafka topic. If you wish to change this value " + " set it as dynamic property. If you wish to explicitly enable consumer block (at your own risk)" + " set its value to -1.");
props.setProperty("consumer.timeout.ms", "1");
}
int partitionCount = KafkaUtils.retrievePartitionCountForTopic(context.getProperty(ZOOKEEPER_CONNECTION_STRING).evaluateAttributeExpressions().getValue(), context.getProperty(TOPIC).evaluateAttributeExpressions().getValue());
final ConsumerConfig consumerConfig = new ConsumerConfig(props);
consumer = Consumer.createJavaConsumerConnector(consumerConfig);
final Map<String, Integer> topicCountMap = new HashMap<>(1);
int concurrentTaskToUse = context.getMaxConcurrentTasks();
if (context.getMaxConcurrentTasks() < partitionCount) {
this.getLogger().warn("The amount of concurrent tasks '" + context.getMaxConcurrentTasks() + "' configured for " + "this processor is less than the amount of partitions '" + partitionCount + "' for topic '" + context.getProperty(TOPIC).evaluateAttributeExpressions().getValue() + "'. " + "Consider making it equal to the amount of partition count for most efficient event consumption.");
} else if (context.getMaxConcurrentTasks() > partitionCount) {
concurrentTaskToUse = partitionCount;
this.getLogger().warn("The amount of concurrent tasks '" + context.getMaxConcurrentTasks() + "' configured for " + "this processor is greater than the amount of partitions '" + partitionCount + "' for topic '" + context.getProperty(TOPIC).evaluateAttributeExpressions().getValue() + "'. " + "Therefore those tasks would never see a message. To avoid that the '" + partitionCount + "'(partition count) will be used to consume events");
}
topicCountMap.put(topic, concurrentTaskToUse);
final Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
final List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
this.streamIterators.clear();
for (final KafkaStream<byte[], byte[]> stream : streams) {
streamIterators.add(stream.iterator());
}
this.consumerStreamsReady.set(true);
}
Aggregations