use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class CrKafkaConsumerGroupClient method onPartitionsLost.
/**
* Callback function of the ConsumerRebalanceListener, which is called in the context of KafkaConsumer.poll(...).
* With eager rebalance protocol, this callback is called if the member has missed a rebalance and fallen out of the
* group. It is invoked on the set of all owned partitions (unless empty). The member will then rejoin the group
* with onPartitionsRevoked(?)/onPartitionsAssigned cycle.
*
* For consistent region, we intentionally do not support incremental rebalancing, which can miss the
* {@link #onPartitionsRevoked(Collection)}.
*
* @param partitions - current partition assignment (assuming eager rebalancing protocol)
* @see org.apache.kafka.clients.consumer.ConsumerRebalanceListener#onPartitionsRevoked(java.util.Collection)
*/
@Override
public void onPartitionsLost(Collection<TopicPartition> partitions) {
trace.info(MsgFormatter.format("onPartitionsLost() [{0}]: owned partitions = {1}", state, getAssignedPartitions()));
trace.info(MsgFormatter.format("onPartitionsLost() [{0}]: lost partitions = {1}", state, partitions));
getMessageQueue().clear();
// Do not update the assignedPartitions here. We need this state on partition assignment.
// removeAssignedPartitions (partitions);
ClientState newState = ClientState.CR_RESET_PENDING;
trace.log(DEBUG_LEVEL, MsgFormatter.format("client state transition: {0} -> {1}", state, newState));
state = newState;
sendStopPollingEventAsync();
boolean resetPending;
try {
resetPending = this.crGroupCoordinatorMxBean.getAndSetRebalanceResetPending(true, getOperatorContext().getName());
} catch (IOException e1) {
trace.warn("JCP access failed: " + e1.getMessage());
resetPending = false;
}
if (!resetPending) {
try {
trace.info(MsgFormatter.format("onPartitionsLost() [{0}]: initiating consistent region reset", state));
crMxBean.reset(true);
} catch (Exception e) {
throw new KafkaOperatorRuntimeException("Failed to reset the consistent region: " + e.getMessage(), e);
}
} else {
trace.info(MsgFormatter.format("onPartitionsLost() [{0}]: consistent region reset already initiated", state));
}
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class CrKafkaStaticAssignConsumerClient method processControlPortActionEvent.
/**
* @see com.ibm.streamsx.kafka.clients.consumer.AbstractKafkaConsumerClient#processControlPortActionEvent(com.ibm.streamsx.kafka.clients.consumer.ControlPortAction)
*/
@Override
protected void processControlPortActionEvent(ControlPortAction update) {
// trace with info. to see this method call is important, and it happens not frequently.
logger.info("processControlPortActionEvent(): update = " + update);
// create a map of current topic partitions and their offsets
Map<TopicPartition, Long> /* offset */
currentTopicPartitionOffsets = new HashMap<TopicPartition, Long>();
Set<TopicPartition> topicPartitions = getConsumer().assignment();
topicPartitions.forEach(tp -> currentTopicPartitionOffsets.put(tp, getConsumer().position(tp)));
final ControlPortActionType actionType = update.getActionType();
switch(actionType) {
case ADD_ASSIGNMENT:
try {
update.getTopicPartitionOffsetMap().forEach((tp, offset) -> {
// offset can be -2, -1, or a valid offset o >= 0
// -2 means 'seek to beginning', -1 means 'seek to end'
currentTopicPartitionOffsets.put(tp, offset);
});
assignToPartitionsWithOffsets(currentTopicPartitionOffsets);
synchronized (offsetManager) {
// avoid concurrent access with tuple submission thread
// update offset manager: add topics or updates their partition lists
offsetManager.updateTopics(currentTopicPartitionOffsets.keySet());
// save the consumer offsets after moving it's position
offsetManager.savePositionFromCluster();
createJcpCvFromOffsetManagerl();
}
break;
} catch (Exception e) {
throw new KafkaOperatorRuntimeException(e.getMessage(), e);
}
case REMOVE_ASSIGNMENT:
try {
update.getTopicPartitionOffsetMap().forEach((tp, offset) -> {
currentTopicPartitionOffsets.remove(tp);
});
// remove messages of removed topic partitions from the message queue; we would not be able to commit them later
getMessageQueue().removeIf(record -> belongsToPartition(record, update.getTopicPartitionOffsetMap().keySet()));
// remove removed partitions from offset manager.
synchronized (offsetManager) {
// avoid concurrent access with tuple submission thread
update.getTopicPartitionOffsetMap().forEach((tp, offset) -> {
offsetManager.remove(tp.topic(), tp.partition());
});
assignToPartitionsWithOffsets(currentTopicPartitionOffsets);
// save the consumer offsets after moving it's position
offsetManager.savePositionFromCluster();
createJcpCvFromOffsetManagerl();
}
break;
} catch (Exception e) {
throw new KafkaOperatorRuntimeException(e.getMessage(), e);
}
default:
throw new UnsupportedControlPortActionException("processControlPortActionEvent(): action: " + actionType + " not supported by this client: " + getThisClassName());
}
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class NonCrKafkaConsumerGroupClient method processControlPortActionEvent.
/**
* Changes the subscription of the consumer via control port.
*
* @see com.ibm.streamsx.kafka.clients.consumer.AbstractKafkaConsumerClient#processControlPortActionEvent(com.ibm.streamsx.kafka.clients.consumer.ControlPortAction)
*/
@Override
protected void processControlPortActionEvent(ControlPortAction action) {
try {
final ControlPortActionType actionType = action.getActionType();
if (actionType == ControlPortActionType.ADD_SUBSCRIPTION || actionType == ControlPortActionType.REMOVE_SUBSCRIPTION) {
trace.info("action: " + action);
} else if (trace.isDebugEnabled()) {
trace.debug("action: " + action);
}
final Set<String> oldSubscription = getConsumer().subscription();
final Set<String> newSubscription = new HashSet<>(oldSubscription);
trace.info("current topic subscription: " + newSubscription);
boolean subscriptionChanged = false;
switch(actionType) {
case ADD_SUBSCRIPTION:
action.getTopics().forEach(tpc -> {
newSubscription.add(tpc);
});
break;
case REMOVE_SUBSCRIPTION:
action.getTopics().forEach(tpc -> {
newSubscription.remove(tpc);
});
break;
default:
throw new UnsupportedControlPortActionException("processControlPortActionEvent(): action: " + actionType + " not supported by this client: " + getThisClassName());
}
subscriptionChanged = !newSubscription.equals(oldSubscription);
if (!subscriptionChanged) {
trace.info("Subscriptiopn is unchanged: " + newSubscription);
} else {
if (newSubscription.size() > 0) {
trace.info("Subscription changed. New subscription: " + newSubscription);
} else {
// With Kafka client 2.3, no partition rebalance happened when we only unsubscribe, so that the
// onPartitionsRevoked callback has not been called, where we usually commit offsets before we give
// away partitions.
// With Kafka client 2.5.1, the things are different. The onPartitionsRevoked callback is called,
// so that we could commit the offsets there and remove the following code block, but it is also safe,
// to preserve the old logic and commit now.
trace.info("Unsubscribing all topics. Going to commit offsets.");
// remove the content of the queue. It contains uncommitted messages.
getMessageQueue().clear();
OffsetManager offsetManager = getOffsetManager();
try {
awaitMessageQueueProcessed();
// the post-condition is, that all messages from the queue have submitted as
// tuples and its offsets +1 are stored in OffsetManager.
final boolean commitSync = true;
final boolean commitPartitionWise = false;
CommitInfo offsets = new CommitInfo(commitSync, commitPartitionWise);
synchronized (offsetManager) {
Set<TopicPartition> partitionsInOffsetManager = offsetManager.getMappedTopicPartitions();
Set<TopicPartition> currentAssignment = getAssignedPartitions();
for (TopicPartition tp : partitionsInOffsetManager) {
if (currentAssignment.contains(tp)) {
offsets.put(tp, offsetManager.getOffset(tp.topic(), tp.partition()));
}
}
}
if (!offsets.isEmpty()) {
commitOffsets(offsets);
}
// reset the counter for periodic commit
resetCommitPeriod(System.currentTimeMillis());
} catch (InterruptedException | RuntimeException e) {
// Ignore InterruptedException, RuntimeException from commitOffsets is already traced.
}
// avoid committing offsets in onPartitionsRevoked (if called)
offsetManager.clear();
}
subscribe(newSubscription, this);
// getChkptContext().getKind() is not reported properly. Streams Build 20180710104900 (4.3.0.0) never returns OPERATOR_DRIVEN
if (getCheckpointKind() == Kind.OPERATOR_DRIVEN) {
trace.info("initiating checkpointing with current topic subscription");
// createCheckpoint() throws IOException
boolean result = getChkptContext().createCheckpoint();
trace.info("createCheckpoint() result: " + result);
}
}
} catch (Exception e) {
trace.error(e.getLocalizedMessage(), e);
throw new KafkaOperatorRuntimeException(e.getMessage(), e);
}
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class AbstractKafkaConsumerClient method runPollLoop.
/**
* Runs the loop polling for Kafka messages until an event is received in the event queue.
* @param pollTimeout the timeout in milliseconds used to wait for new Kafka messages if there are less than the maximum batch size.
* @param throttleSleepMillis the time in milliseconds the polling thread sleeps after each poll.
*
* @throws InterruptedException
*/
protected void runPollLoop(long pollTimeout, long throttleSleepMillis) throws InterruptedException {
if (throttleSleepMillis > 0l) {
logger.log(DEBUG_LEVEL, MsgFormatter.format("Initiating throttled polling (sleep time = {0} ms); maxPollRecords = {1}", throttleSleepMillis, getMaxPollRecords()));
} else {
logger.log(DEBUG_LEVEL, MsgFormatter.format("Initiating polling; maxPollRecords = {0}", getMaxPollRecords()));
}
synchronized (drainBuffer) {
if (!drainBuffer.isEmpty()) {
final int bufSz = drainBuffer.size();
final int capacity = messageQueue.remainingCapacity();
// restore records that have been put aside to the drain buffer
if (capacity < bufSz) {
String msg = MsgFormatter.format("drain buffer size {0} > capacity of message queue {1}", bufSz, capacity);
logger.error("runPollLoop() - " + msg);
// must restart operator.
throw new RuntimeException(msg);
}
messageQueue.addAll(drainBuffer);
final int qSize = messageQueue.size();
drainBuffer.clear();
logger.log(DEBUG_LEVEL, MsgFormatter.format("runPollLoop(): {0,number,#} consumer records added from drain buffer to the message queue. Message queue size is {1,number,#} now.", bufSz, qSize));
}
}
// continue polling for messages until a new event
// arrives in the event queue
fetchPaused = consumer.paused().size() > 0;
logger.log(DEBUG_LEVEL, "previously paused partitions: " + consumer.paused());
while (eventQueue.isEmpty()) {
boolean doPoll = true;
// can wait for 100 ms; throws InterruptedException:
try {
checkSpaceInMessageQueueAndPauseFetching(false);
} catch (IllegalStateException e) {
logger.warn("runPollLoop(): " + e.getLocalizedMessage());
// no space, could not pause - do not call poll
doPoll = false;
}
if (doPoll) {
try {
final long now = System.currentTimeMillis();
final long timeBetweenPolls = now - lastPollTimestamp;
if (lastPollTimestamp > 0) {
// this is not the first 'poll'
if (timeBetweenPolls >= maxPollIntervalMs) {
logger.warn(// $NON-NLS-1$
"Kafka client did'nt poll often enaugh for messages. " + "Maximum time between two polls is currently " + // $NON-NLS-1$
maxPollIntervalMs + // $NON-NLS-1$
" milliseconds. Consider to set consumer property '" + ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG + "' to a value higher than " + // $NON-NLS-1$
timeBetweenPolls);
}
}
lastPollTimestamp = System.currentTimeMillis();
EnqueResult r = pollAndEnqueue(pollTimeout, throttleSleepMillis > 0l);
final int nMessages = r.getNumRecords();
if (nMessages > 0) {
pollExcFilter.reset();
}
final long nQueuedBytes = r.getSumTotalSize();
final Level l = Level.DEBUG;
// final Level l = DEBUG_LEVEL;
if (logger.isEnabledFor(l) && nMessages > 0) {
logger.log(l, MsgFormatter.format("{0,number,#} records with total {1,number,#}/{2,number,#}/{3,number,#} bytes (key/value/sum) fetched and enqueued", nMessages, r.getSumKeySize(), r.getSumValueSize(), nQueuedBytes));
}
tryAdjustMinFreeMemory(nQueuedBytes, nMessages);
nPendingMessages.setValue(messageQueue.size());
if (throttleSleepMillis > 0l) {
synchronized (throttledPollWaitMonitor) {
throttledPollWaitMonitor.wait(throttleSleepMillis);
}
}
} catch (RetriableException e) {
logger.warn("Retriable exception (ignored, may succeed if retried): " + e, e);
logger.info("Going to sleep for 100 ms before next poll ...");
Thread.sleep(100l);
} catch (SerializationException e) {
// https://issues.apache.org/jira/browse/KAFKA-4740)
throw e;
} catch (Exception e) {
if (pollExcFilter.filter(e)) {
logger.warn(e);
} else {
logger.error(e);
throw new KafkaOperatorRuntimeException("Consecutive number of exceptions too high.", e);
}
}
}
}
// $NON-NLS-1$
logger.debug("Stop polling. Message in event queue: " + eventQueue.peek().getEventType());
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class NonCrKafkaConsumerClient method processControlPortActionEvent.
/**
* @see com.ibm.streamsx.kafka.clients.consumer.AbstractKafkaConsumerClient#processControlPortActionEvent(com.ibm.streamsx.kafka.clients.consumer.ControlPortAction)
*/
@Override
protected void processControlPortActionEvent(ControlPortAction action) {
try {
final ControlPortActionType actionType = action.getActionType();
if (actionType == ControlPortActionType.ADD_ASSIGNMENT || actionType == ControlPortActionType.REMOVE_ASSIGNMENT) {
trace.info("action: " + action);
} else if (trace.isDebugEnabled()) {
trace.debug("action: " + action);
}
// create a map of current topic partitions and their fetch offsets for next record
Map<TopicPartition, Long> /* offset */
currentTopicPartitionOffsets = new HashMap<TopicPartition, Long>();
Set<TopicPartition> topicPartitions = getConsumer().assignment();
topicPartitions.forEach(tp -> currentTopicPartitionOffsets.put(tp, getConsumer().position(tp)));
boolean doNewAssign = false;
switch(actionType) {
case ADD_ASSIGNMENT:
action.getTopicPartitionOffsetMap().forEach((tp, offset) -> {
// offset can be -2, -1, or a valid offset o >= 0
// -2 means 'seek to beginning', -1 means 'seek to end'
currentTopicPartitionOffsets.put(tp, offset);
});
doNewAssign = currentTopicPartitionOffsets.size() > 0;
if (!doNewAssign) {
trace.info("topic partition assignment unchanged: " + currentTopicPartitionOffsets);
} else {
assignToPartitionsWithOffsets(currentTopicPartitionOffsets);
trace.info("assigned partitions after ADD: " + currentTopicPartitionOffsets);
// No need to update offset manager here, like adding topics, etc.
// Missing topics in the offset manager are auto-created
CommitInfo commits = new CommitInfo(true, false);
// Immediately commit the fetch offsets of _only_the_added_ topic partitions
action.getTopicPartitionOffsetMap().forEach((tp, offset) -> {
// do not put 'offset' into the commits; 'offset' can be -1 or -2 for 'end' or 'begin'
commits.put(tp, getConsumer().position(tp));
});
commitOffsets(commits);
trace.info("committed offsets of the added topic partitions: " + commits);
}
break;
case REMOVE_ASSIGNMENT:
// x 1. remove messages of the removed topic partitions from the queue - they are all uncommitted
// x 2. wait that the queue gets processed - awaitMessageQueueProcessed();
// x 3. commit the offsets of the removed topic partitions
// x 4. remove the unassigned topic partitions from the offsetManager (or simply clear?)
// x 5. update the partition assignment in the consumer
// remove messages of removed topic partitions from the message queue
getMessageQueue().removeIf(record -> belongsToPartition(record, action.getTopicPartitionOffsetMap().keySet()));
awaitMessageQueueProcessed();
// now the offset manager can be cleaned without the chance that the removed partition(s) re-appear after tuple submission
// remove removed partitions from offset manager. We can't commit offsets for those partitions we are not assigned any more.
// the post-condition is, that all messages from the queue have submitted as
// tuples and its offsets +1 are stored in OffsetManager.
final boolean commitSync = true;
final boolean commitPartitionWise = false;
CommitInfo commitOffsets = new CommitInfo(commitSync, commitPartitionWise);
OffsetManager offsetManager = getOffsetManager();
synchronized (offsetManager) {
for (TopicPartition tp : action.getTopicPartitionOffsetMap().keySet()) {
// make sure that we commit only partitions that are assigned
if (currentTopicPartitionOffsets.containsKey(tp)) {
doNewAssign = true;
long offset = offsetManager.getOffset(tp.topic(), tp.partition());
// offset is -1 if there is no mapping from topic partition to offset
if (offset >= 0)
commitOffsets.put(tp, offset);
currentTopicPartitionOffsets.remove(tp);
}
offsetManager.remove(tp.topic(), tp.partition());
}
}
if (!commitOffsets.isEmpty()) {
commitOffsets(commitOffsets);
}
// we can end up here with an empty map after removal of assignments.
if (doNewAssign) {
assignToPartitionsWithOffsets(currentTopicPartitionOffsets);
trace.info("assigned partitions after REMOVE: " + currentTopicPartitionOffsets);
} else {
trace.info("topic partition assignment unchanged: " + currentTopicPartitionOffsets);
}
break;
default:
throw new UnsupportedControlPortActionException("processControlPortActionEvent(): action: " + actionType + " not supported by this client: " + getThisClassName());
}
// getChkptContext().getKind() is not reported properly. Streams Build 20180710104900 (4.3.0.0) never returns OPERATOR_DRIVEN
if (doNewAssign && getCheckpointKind() == Kind.OPERATOR_DRIVEN) {
trace.info("initiating checkpointing with current partition assignment");
// createCheckpoint() throws IOException
boolean result = getChkptContext().createCheckpoint();
trace.info("createCheckpoint() result: " + result);
}
} catch (Exception e) {
trace.error(e.getLocalizedMessage(), e);
throw new KafkaOperatorRuntimeException(e.getMessage(), e);
}
}
Aggregations