use of kafka.javaapi.consumer.SimpleConsumer in project cdap by caskdata.
the class KafkaOffsetResolver method getStartOffset.
/**
* Check whether the message fetched with the offset {@code checkpoint.getNextOffset() - 1} contains the
* same timestamp as in the given checkpoint. If they match, directly return {@code checkpoint.getNextOffset()}.
* If they don't, search for the smallest offset of the message with the same log event time
* as {@code checkpoint.getNextEventTime()}
*
* @param checkpoint A {@link Checkpoint} containing the next offset of a message and its log event timestamp.
* {@link Checkpoint#getNextOffset()}, {@link Checkpoint#getNextEventTime()}
* and {@link Checkpoint#getMaxEventTime()} all must return a non-negative long
* @param partition the partition in the topic for searching matching offset
* @return the next offset of the message with smallest offset and log event time equal to
* {@code checkpoint.getNextEventTime()}.
* {@code -1} if no such offset can be found or {@code checkpoint.getNextOffset()} is negative.
*
* @throws LeaderNotAvailableException if there is no Kafka broker to talk to.
* @throws OffsetOutOfRangeException if the given offset is out of range.
* @throws NotLeaderForPartitionException if the broker that the consumer is talking to is not the leader
* for the given topic and partition.
* @throws UnknownTopicOrPartitionException if the topic or partition is not known by the Kafka server
* @throws UnknownServerException if the Kafka server responded with error.
*/
long getStartOffset(final Checkpoint checkpoint, final int partition) {
// This should never happen
Preconditions.checkArgument(checkpoint.getNextOffset() > 0, "Invalid checkpoint offset");
// Get BrokerInfo for constructing SimpleConsumer
String topic = config.getTopic();
BrokerInfo brokerInfo = brokerService.getLeader(topic, partition);
if (brokerInfo == null) {
throw new LeaderNotAvailableException(String.format("BrokerInfo from BrokerService is null for topic %s partition %d. Will retry in next run.", topic, partition));
}
SimpleConsumer consumer = new SimpleConsumer(brokerInfo.getHost(), brokerInfo.getPort(), SO_TIMEOUT_MILLIS, BUFFER_SIZE, "offset-finder-" + topic + "-" + partition);
// Check whether the message fetched with the offset in the given checkpoint has the timestamp from
// checkpoint.getNextOffset() - 1 to get the offset corresponding to the timestamp in checkpoint
long offset = checkpoint.getNextOffset() - 1;
try {
long timestamp = getEventTimeByOffset(consumer, partition, offset);
if (timestamp == checkpoint.getNextEventTime()) {
return checkpoint.getNextOffset();
}
// This can happen in replicated cluster
LOG.debug("Event timestamp in {}:{} at offset {} is {}. It doesn't match with checkpoint timestamp {}", topic, partition, offset, timestamp, checkpoint.getNextEventTime());
} catch (NotFoundException | OffsetOutOfRangeException e) {
// This means we can't find the timestamp. This can happen in replicated cluster
LOG.debug("Cannot get valid log event in {}:{} at offset {}", topic, partition, offset);
}
// Find offset that has an event that matches the timestamp
long nextOffset = findStartOffset(consumer, partition, checkpoint.getNextEventTime());
LOG.debug("Found new nextOffset {} for topic {} partition {} with existing checkpoint {}.", nextOffset, topic, partition, checkpoint);
return nextOffset;
}
use of kafka.javaapi.consumer.SimpleConsumer in project apex-malhar by apache.
the class KafkaMetadataUtil method getTopicMetadata.
/**
* @param brokerSet
* @param topic
* @return TopicMetadata for this specific topic via the brokerList<br>
* null if topic is not found
*/
public static TopicMetadata getTopicMetadata(Set<String> brokerSet, String topic) {
SimpleConsumer mdConsumer = null;
if (brokerSet == null || brokerSet == null || brokerSet.size() == 0) {
return null;
}
try {
for (Iterator<String> iterator = brokerSet.iterator(); iterator.hasNext(); ) {
String broker = iterator.next();
logger.debug("Try to get Metadata for topic {} broker {}", topic, broker);
try {
mdConsumer = new SimpleConsumer(broker.split(":")[0], Integer.parseInt(broker.split(":")[1]), timeout, bufferSize, mdClientId);
List<String> topics = new ArrayList<String>(1);
topics.add(topic);
kafka.javaapi.TopicMetadataRequest req = new kafka.javaapi.TopicMetadataRequest(topics);
TopicMetadataResponse resp = mdConsumer.send(req);
List<TopicMetadata> metaData = resp.topicsMetadata();
for (TopicMetadata item : metaData) {
// There is at most 1 topic for this method
return item;
}
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Wrong format for broker url, should be \"broker1:port1\"");
} catch (Exception e) {
logger.warn("Broker {} is unavailable or in bad state!", broker);
// skip and try next broker
}
}
return null;
} finally {
if (mdConsumer != null) {
mdConsumer.close();
}
}
}
use of kafka.javaapi.consumer.SimpleConsumer in project apex-malhar by apache.
the class AbstractExactlyOnceKafkaOutputOperator method initializeLastProcessingOffset.
private void initializeLastProcessingOffset() {
// read last received kafka message
TopicMetadata tm = KafkaMetadataUtil.getTopicMetadata(Sets.newHashSet((String) getConfigProperties().get(KafkaMetadataUtil.PRODUCER_PROP_BROKERLIST)), this.getTopic());
if (tm == null) {
throw new RuntimeException("Failed to retrieve topic metadata");
}
partitionNum = tm.partitionsMetadata().size();
lastMsgs = new HashMap<Integer, Pair<byte[], byte[]>>(partitionNum);
for (PartitionMetadata pm : tm.partitionsMetadata()) {
String leadBroker = pm.leader().host();
int port = pm.leader().port();
String clientName = this.getClass().getName().replace('$', '.') + "_Client_" + tm.topic() + "_" + pm.partitionId();
SimpleConsumer consumer = new SimpleConsumer(leadBroker, port, 100000, 64 * 1024, clientName);
long readOffset = KafkaMetadataUtil.getLastOffset(consumer, tm.topic(), pm.partitionId(), kafka.api.OffsetRequest.LatestTime(), clientName);
FetchRequest req = new FetchRequestBuilder().clientId(clientName).addFetch(tm.topic(), pm.partitionId(), readOffset - 1, 100000).build();
FetchResponse fetchResponse = consumer.fetch(req);
for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(tm.topic(), pm.partitionId())) {
Message m = messageAndOffset.message();
ByteBuffer payload = m.payload();
ByteBuffer key = m.key();
byte[] valueBytes = new byte[payload.limit()];
byte[] keyBytes = new byte[key.limit()];
payload.get(valueBytes);
key.get(keyBytes);
lastMsgs.put(pm.partitionId(), new Pair<byte[], byte[]>(keyBytes, valueBytes));
}
}
}
use of kafka.javaapi.consumer.SimpleConsumer in project apex-malhar by apache.
the class AbstractKafkaInputOperator method replay.
protected void replay(long windowId) {
try {
@SuppressWarnings("unchecked") Map<KafkaPartition, MutablePair<Long, Integer>> recoveredData = (Map<KafkaPartition, MutablePair<Long, Integer>>) windowDataManager.retrieve(windowId);
if (recoveredData != null) {
Map<String, List<PartitionMetadata>> pms = KafkaMetadataUtil.getPartitionsForTopic(getConsumer().brokers, getConsumer().topic);
if (pms != null) {
SimpleKafkaConsumer cons = (SimpleKafkaConsumer) getConsumer();
// add all partition request in one Fretch request together
FetchRequestBuilder frb = new FetchRequestBuilder().clientId(cons.getClientId());
for (Map.Entry<KafkaPartition, MutablePair<Long, Integer>> rc : recoveredData.entrySet()) {
KafkaPartition kp = rc.getKey();
List<PartitionMetadata> pmsVal = pms.get(kp.getClusterId());
Iterator<PartitionMetadata> pmIterator = pmsVal.iterator();
PartitionMetadata pm = pmIterator.next();
while (pm.partitionId() != kp.getPartitionId()) {
if (!pmIterator.hasNext()) {
break;
}
pm = pmIterator.next();
}
if (pm.partitionId() != kp.getPartitionId()) {
continue;
}
Broker bk = pm.leader();
frb.addFetch(consumer.topic, rc.getKey().getPartitionId(), rc.getValue().left, cons.getBufferSize());
FetchRequest req = frb.build();
SimpleConsumer ksc = new SimpleConsumer(bk.host(), bk.port(), cons.getTimeout(), cons.getBufferSize(), cons.getClientId());
FetchResponse fetchResponse = ksc.fetch(req);
Integer count = 0;
for (MessageAndOffset msg : fetchResponse.messageSet(consumer.topic, kp.getPartitionId())) {
KafkaConsumer.KafkaMessage kafkaMessage = new KafkaConsumer.KafkaMessage(kp, msg.message(), msg.offset());
emitTuple(kafkaMessage);
offsetStats.put(kp, msg.offset());
count = count + 1;
if (count.equals(rc.getValue().right)) {
break;
}
}
}
}
}
if (windowId == windowDataManager.getLargestCompletedWindow()) {
// Start the consumer at the largest recovery window
SimpleKafkaConsumer cons = (SimpleKafkaConsumer) getConsumer();
// Set the offset positions to the consumer
Map<KafkaPartition, Long> currentOffsets = new HashMap<KafkaPartition, Long>(cons.getCurrentOffsets());
// Increment the offsets
for (Map.Entry<KafkaPartition, Long> e : offsetStats.entrySet()) {
currentOffsets.put(e.getKey(), e.getValue() + 1);
}
cons.resetOffset(currentOffsets);
cons.start();
}
} catch (IOException e) {
throw new RuntimeException("replay", e);
}
}
use of kafka.javaapi.consumer.SimpleConsumer in project bagheera by mozilla-metrics.
the class ProducerTest method countMessages.
private int countMessages() throws InvalidProtocolBufferException {
SimpleConsumer consumer = new SimpleConsumer("localhost", KAFKA_BROKER_PORT, 100, 1024);
long offset = 0l;
int messageCount = 0;
for (int i = 0; i < BATCH_SIZE; i++) {
ByteBufferMessageSet messageSet = consumer.fetch(new FetchRequest(KAFKA_TOPIC, 0, offset, 1024));
Iterator<MessageAndOffset> iterator = messageSet.iterator();
MessageAndOffset msgAndOff;
while (iterator.hasNext()) {
messageCount++;
msgAndOff = iterator.next();
offset = msgAndOff.offset();
Message message2 = msgAndOff.message();
BagheeraMessage bmsg = BagheeraMessage.parseFrom(ByteString.copyFrom(message2.payload()));
String payload = new String(bmsg.getPayload().toByteArray());
System.out.println(String.format("Message %d @%d: %s", messageCount, offset, payload));
}
}
consumer.close();
return messageCount;
}
Aggregations