use of kafka.message.InvalidMessageException in project kafka-spout by HolmesNL.
the class KafkaSpout method fillBuffer.
/**
* Refills the buffer with messages from the configured kafka topic if available.
*
* @return Whether the buffer contains messages to be emitted after this call.
* @throws IllegalStateException When current buffer is not empty or messages not acknowledged by topology.
*/
protected boolean fillBuffer() {
if (!_inProgress.isEmpty() || !_queue.isEmpty()) {
throw new IllegalStateException("cannot fill buffer when buffer or pending messages are non-empty");
}
if (_iterator == null) {
// create a stream of messages from _consumer using the streams as defined on construction
final Map<String, List<KafkaStream<byte[], byte[]>>> streams = _consumer.createMessageStreams(Collections.singletonMap(_topic, 1));
_iterator = streams.get(_topic).get(0).iterator();
}
// throwing a ConsumerTimeoutException when the configured timeout is exceeded.
try {
int size = 0;
while (size < _bufSize && _iterator.hasNext()) {
final MessageAndMetadata<byte[], byte[]> message = _iterator.next();
final KafkaMessageId id = new KafkaMessageId(message.partition(), message.offset());
_inProgress.put(id, message.message());
size++;
}
} catch (final InvalidMessageException e) {
LOG.warn(e.getMessage(), e);
} catch (final ConsumerTimeoutException e) {
// ignore, storm will call nextTuple again at some point in the near future
// timeout does *not* mean that no messages were read (state is checked below)
}
if (_inProgress.size() > 0) {
// set _queue to all currently pending kafka message ids
_queue.addAll(_inProgress.keySet());
LOG.debug("buffer now has {} messages to be emitted", _queue.size());
// message(s) appended to buffer
return true;
} else {
// no messages appended to buffer
return false;
}
}
use of kafka.message.InvalidMessageException in project druid by druid-io.
the class KafkaEightFirehoseFactory method connect.
@Override
public Firehose connect(final ByteBufferInputRowParser firehoseParser) throws IOException {
Set<String> newDimExclus = Sets.union(firehoseParser.getParseSpec().getDimensionsSpec().getDimensionExclusions(), Sets.newHashSet("feed"));
final ByteBufferInputRowParser theParser = firehoseParser.withParseSpec(firehoseParser.getParseSpec().withDimensionsSpec(firehoseParser.getParseSpec().getDimensionsSpec().withDimensionExclusions(newDimExclus)));
final ConsumerConnector connector = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerProps));
final Map<String, List<KafkaStream<byte[], byte[]>>> streams = connector.createMessageStreams(ImmutableMap.of(feed, 1));
final List<KafkaStream<byte[], byte[]>> streamList = streams.get(feed);
if (streamList == null || streamList.size() != 1) {
return null;
}
final KafkaStream<byte[], byte[]> stream = streamList.get(0);
final ConsumerIterator<byte[], byte[]> iter = stream.iterator();
return new Firehose() {
@Override
public boolean hasMore() {
return iter.hasNext();
}
@Override
public InputRow nextRow() {
try {
final byte[] message = iter.next().message();
if (message == null) {
return null;
}
return theParser.parse(ByteBuffer.wrap(message));
} catch (InvalidMessageException e) {
/*
IF the CRC is caused within the wire transfer, this is not the best way to handel CRC.
Probably it is better to shutdown the fireHose without commit and start it again.
*/
log.error(e, "Message failed its checksum and it is corrupt, will skip it");
return null;
}
}
@Override
public Runnable commit() {
return new Runnable() {
@Override
public void run() {
/*
This is actually not going to do exactly what we want, cause it will be called asynchronously
after the persist is complete. So, it's going to commit that it's processed more than was actually
persisted. This is unfortunate, but good enough for now. Should revisit along with an upgrade
of our Kafka version.
*/
log.info("committing offsets");
connector.commitOffsets();
}
};
}
@Override
public void close() throws IOException {
connector.shutdown();
}
};
}
Aggregations