use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class CrKafkaConsumerGroupClient method processResetToInitEvent.
/**
* Resets the consumer client to the initial state.
* Resetting the client involves following steps:
* <ul>
* <li>clear the operator internal message queue</li>
* <li>restore 'initialOffsets' from JCP Control variable. On subscribe we have saved them into JCP.
* These are the initial offsets of all assignable partitions (group's view).</li>
* <li>initialize the 'seekOffsetMap' member variable from initial offsets.</li>
* <li>seek all assigned partitions to what is stored in 'seekOffsetMap'</li>
* <li>reset the assignedPartitionsOffsetManager by setting the seekOffsets for all assigned partitions.</li>
* </ul>
* This method is run by the event thread. State must be POLLING_STOPPED.
*
* @see com.ibm.streamsx.kafka.clients.consumer.AbstractKafkaConsumerClient#processResetToInitEvent()
*/
@Override
protected void processResetToInitEvent() {
trace.info(MsgFormatter.format("processResetToInitEvent() [{0}] - entering", state));
try {
clearDrainBuffer();
getMessageQueue().clear();
// When no one of the KafkaConsumer in this group has been restarted before the region reset,
// partition assignment will most likely not change and no onPartitionsRevoked()/onPartitionsAssigned will be fired on our
// ConsumerRebalanceListener. That's why we must seek here to the partitions we think we are assigned to (can also be no partition).
// If a consumer within our group - but not this operator - restarted, partition assignment may have changed, but we
// are not yet notified about it. That's why we must handle the failed seeks.
// When this operator is restarted and reset, getAssignedPartitions() will return an empty Set.
this.seekOffsetMap = initialOffsets.createOffsetMap(getAssignedPartitions());
trace.info(MsgFormatter.format("initial Offsets for assignment {0}: {1}", getAssignedPartitions(), this.seekOffsetMap));
// Reset also the assignedPartitionsOffsetManager to the initial offsets of the assigned partitions. The assignedPartitionsOffsetManager goes into the checkpoint,
// and its offsets are used as the seek position when it comes to reset from a checkpoint. There must be a seek position also in
// the case that no tuple has been submitted for a partition, which would update the assignedPartitionsOffsetManager.
assignedPartitionsOffsetManager.clear();
assignedPartitionsOffsetManager.addTopics(getAssignedPartitions());
Collection<TopicPartition> failedSeeks = seekPartitions(getAssignedPartitions(), this.seekOffsetMap);
failedSeeks.forEach(tp -> assignedPartitionsOffsetManager.remove(tp.topic(), tp.partition()));
assignedPartitionsOffsetManager.savePositionFromCluster();
// reset tuple counter for operator driven CR
nSubmittedRecords = 0l;
ClientState newState = ClientState.RESET_COMPLETE;
trace.log(DEBUG_LEVEL, MsgFormatter.format("client state transition: {0} -> {1}", state, newState));
state = newState;
trace.log(DEBUG_LEVEL, MsgFormatter.format("processResetToInitEvent() [{0}] - exiting", state));
} catch (Exception e) {
throw new KafkaOperatorRuntimeException(e.getMessage(), e);
}
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class CrKafkaConsumerGroupClient method processCheckpointEvent.
/**
* Creates a checkpoint of the current state when used in consistent region.
* Following data is included into the checkpoint in this sequence:
* <ul>
* <li>assignablePartitions</li>
* <li>assignedPartitionsOffsetManager</li>
* <li>current committed offsets fetched from the cluster. Gathering this data is expensive as it involves a Kafka server request.
* </ul
* @param checkpoint the reference of the checkpoint object
* @see com.ibm.streamsx.kafka.clients.consumer.AbstractKafkaConsumerClient#processCheckpointEvent(com.ibm.streams.operator.state.Checkpoint)
*/
@Override
protected void processCheckpointEvent(Checkpoint checkpoint) {
trace.info(MsgFormatter.format("processCheckpointEvent() [{0}] sequenceId = {1}", state, checkpoint.getSequenceId()));
try {
Set<String> registeredConsumers = this.crGroupCoordinatorMxBean.getRegisteredConsumerOperators();
final String myOperatorName = getOperatorContext().getName();
if (ENABLE_CHECK_REGISTERED_ON_CHECKPOINT) {
if (!registeredConsumers.contains(myOperatorName)) {
final String msg = MsgFormatter.format("My operator name not registered in group MXBean: {0}", myOperatorName);
trace.error(msg);
throw new KafkaOperatorNotRegisteredException(msg);
}
}
ObjectOutputStream oStream = checkpoint.getOutputStream();
oStream.writeObject(myOperatorName);
oStream.writeObject(registeredConsumers);
oStream.writeObject(this.assignedPartitionsOffsetManager);
if (trace.isEnabledFor(DEBUG_LEVEL)) {
trace.log(DEBUG_LEVEL, "data written to checkpoint: myOperatorName = " + myOperatorName);
trace.log(DEBUG_LEVEL, "data written to checkpoint: contributingOperators = " + registeredConsumers);
trace.log(DEBUG_LEVEL, "data written to checkpoint: assignedPartitionsOffsetManager = " + this.assignedPartitionsOffsetManager);
}
} catch (Exception e) {
throw new KafkaOperatorRuntimeException(e.getMessage(), e);
}
trace.log(DEBUG_LEVEL, "processCheckpointEvent() - exiting.");
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class CrKafkaConsumerGroupClient method seekPartitions.
/**
* Seeks the given partitions to the offsets in the map. If the map does not contain a mapping, the partition is seeked to what is given as initial start position (End, Beginning, Time)
* @param partitions the partitions
* @param offsetMap the map with mappings from partition to offset
* @return the topic partitions for which the seek failed because they are not assigned.
*/
private Collection<TopicPartition> seekPartitions(Collection<TopicPartition> partitions, Map<TopicPartition, Long> offsetMap) {
KafkaConsumer<?, ?> consumer = getConsumer();
// collection and map for seeking to inital startposition
Collection<TopicPartition> tp1 = new ArrayList<>(1);
Map<TopicPartition, Long> tpTimestampMap1 = new HashMap<>();
Set<TopicPartition> seekFailedPartitions = new HashSet<>(partitions);
List<TopicPartition> sortedPartitions = new LinkedList<>(partitions);
Collections.sort(sortedPartitions, new Comparator<TopicPartition>() {
@Override
public int compare(TopicPartition o1, TopicPartition o2) {
return o1.toString().compareTo(o2.toString());
}
});
for (TopicPartition tp : sortedPartitions) {
try {
if (offsetMap.containsKey(tp)) {
final long seekToOffset = offsetMap.get(tp);
trace.info(MsgFormatter.format("seekPartitions() seeking {0} to offset {1}", tp, seekToOffset));
consumer.seek(tp, seekToOffset);
} else {
// We have never seen the partition. Seek to startPosition given as operator parameter(s)
switch(this.initialStartPosition) {
case Default:
trace.info(MsgFormatter.format("seekPartitions() new topic partition {0}; no need to seek to {1}", tp, this.initialStartPosition));
// do not seek
break;
case Beginning:
case End:
tp1.clear();
tp1.add(tp);
trace.info(MsgFormatter.format("seekPartitions() seeking new topic partition {0} to {1}", tp, this.initialStartPosition));
seekToPosition(tp1, this.initialStartPosition);
break;
case Time:
tpTimestampMap1.clear();
tpTimestampMap1.put(tp, this.initialStartTimestamp);
trace.info(MsgFormatter.format("seekPartitions() seeking new topic partition {0} to timestamp {1,number,#}", tp, this.initialStartTimestamp));
seekToTimestamp(tpTimestampMap1);
break;
default:
// unsupported start position, like 'Offset', is already treated by initialization checks
final String msg = MsgFormatter.format("seekPartitions(): {0} does not support startPosition {1}.", getThisClassName(), this.initialStartPosition);
trace.error(msg);
throw new KafkaOperatorRuntimeException(msg);
}
long initialFetchOffset = getConsumer().position(tp);
initialOffsets.saveOffset(tp, initialFetchOffset, false);
}
seekFailedPartitions.remove(tp);
} catch (IllegalStateException topicPartitionNotAssigned) {
// when this happens the ConsumerRebalanceListener will be called later or we are in the middle of revocation/assignment
trace.info(MsgFormatter.format("seekPartitions(): seek failed for partition {0}: {1}", tp, topicPartitionNotAssigned.getLocalizedMessage()));
} catch (InterruptedException e) {
trace.log(DEBUG_LEVEL, "interrupted creating or saving offset to JCP control variable");
// leave for-loop
break;
} catch (IOException e) {
throw new KafkaOperatorRuntimeException(e.getMessage());
}
}
// for
trace.log(DEBUG_LEVEL, "partitions failed to seek: " + seekFailedPartitions);
return seekFailedPartitions;
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class CrKafkaConsumerGroupClient method onPartitionsAssigned.
/**
* Callback function of the ConsumerRebalanceListener, which is called in the context of KafkaConsumer.poll(...)
* after partitions are re-assigned. For consistent region, we intentionally do not support incremental rebalancing.
* So we can (still) assume, that always eager rebalancing is happening, where the complete assignment is passed.
* onPartitionsAssigned performs following:
* <ul>
* <li>
* save current assignment for this operator
* </li>
* <li>
* update the offsetManager for the assigned partitions: remove gone partitions, update the topics with the newly assigned partitions
* </li>
* <li>
* When the client has has been reset before from a checkpoint, or has been reset to initial state with assigned partitions,
* a 'seekOffsetMap' with content in it has been created before. This map maps
* topic partitions to offsets. When partitions are assigned that have no mapping in the map, the control variable is accessed
* for the initial offset, and added to the map. If there is no control variable, the initial offset is determined as given
* by operator parameter 'startPosition' and optionally 'startTimestamp' and written into a control variable for the topic partition.
* For every assigned partition, the client seeks to the offset in the map.
* </li>
* <li>
* update the offset manager with the new fetch positions. This information goes into the next checkpoint.
* </li>
* <li>
* When the client is in a different state, the client does not seek.
* </li>
* </ul>
*
* @param newAssignedPartitions new partition assignment; with eager rebalancing protocol this is always the complete assignment.
* @see org.apache.kafka.clients.consumer.ConsumerRebalanceListener#onPartitionsAssigned(java.util.Collection)
*/
@Override
public void onPartitionsAssigned(Collection<TopicPartition> newAssignedPartitions) {
trace.info(MsgFormatter.format("onPartitionsAssigned() [{0}]: new partition assignment = {1}", state, newAssignedPartitions));
getOperatorContext().getMetrics().getCustomMetric(N_PARTITION_REBALANCES).increment();
Set<TopicPartition> gonePartitions = new HashSet<>(getAssignedPartitions());
gonePartitions.removeAll(newAssignedPartitions);
clearAssignedPartitions();
addAssignedPartitions(newAssignedPartitions);
trace.info("onPartitionsAssigned(): topic partitions that are not assigned anymore: " + gonePartitions);
synchronized (assignedPartitionsOffsetManager) {
if (!gonePartitions.isEmpty()) {
for (TopicPartition tp : gonePartitions) {
// remove the topic partition also from the offset manager
assignedPartitionsOffsetManager.remove(tp.topic(), tp.partition());
}
}
assignedPartitionsOffsetManager.updateTopics(newAssignedPartitions);
}
trace.log(DEBUG_LEVEL, "onPartitionsAssigned() assignedPartitionsOffsetManager = " + assignedPartitionsOffsetManager);
trace.log(DEBUG_LEVEL, "onPartitionsAssigned() assignedPartitions = " + getAssignedPartitions());
switch(state) {
case SUBSCRIBED:
case RESET_COMPLETE:
assert (seekOffsetMap != null);
// Add all offsets from the 'initialOffsets' to the seekOffset if not yet present for the topic partition
try {
final Map<TopicPartition, Long> initialOffsetsMap = initialOffsets.createOffsetMap(newAssignedPartitions);
trace.info(MsgFormatter.format("seekOffsetMap created from initial offsets: {0}", initialOffsetsMap));
initialOffsetsMap.forEach((tp, offs) -> {
seekOffsetMap.putIfAbsent(tp, offs);
});
} catch (InterruptedException e) {
trace.log(DEBUG_LEVEL, "interrupted creating a seekOffsetMap from JCP control variables");
} catch (IOException e) {
throw new KafkaOperatorRuntimeException(e.getMessage());
}
seekPartitions(newAssignedPartitions, seekOffsetMap);
// update the fetch positions in the offset manager for all assigned partitions -
assignedPartitionsOffsetManager.savePositionFromCluster();
break;
case CR_RESET_PENDING:
// silently ignore; we have updated assigned partitions and assignedPartitionsOffsetManager before
break;
default:
// ... not observed during tests
trace.warn(MsgFormatter.format("onPartitionsAssigned() [{0}]: unexpected state for onPartitionsAssigned()", state));
}
try {
checkSpaceInMessageQueueAndPauseFetching(true);
} catch (IllegalStateException | InterruptedException e) {
;
// IllegalStateException cannot happen
// On Interruption, do nothing
}
}
use of com.ibm.streamsx.kafka.KafkaOperatorRuntimeException in project streamsx.kafka by IBMStreams.
the class CrKafkaConsumerGroupClient method onPartitionsRevoked.
/**
* Callback function of the ConsumerRebalanceListener, which is called in the context of KafkaConsumer.poll(...)
* before partitions are re-assigned. For consistent region, we intentionally do not support incremental rebalancing.
* So we can (still) assume, that always eager rebalancing is happening, where the complete assignment is revoked followed
* by a new assignment.
* onPartitionsRevoked is ignored when the client has initially subscribed to topics or when the client has been reset.
* In all other cases a reset of the consistent region is triggered. Polling for messsages is stopped.
* @param partitions current partition assignment
* @see org.apache.kafka.clients.consumer.ConsumerRebalanceListener#onPartitionsRevoked(java.util.Collection)
*/
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
trace.info(MsgFormatter.format("onPartitionsRevoked() [{0}]: owned partitions = {1}", state, getAssignedPartitions()));
trace.info(MsgFormatter.format("onPartitionsRevoked() [{0}]: revoked partitions = {1}", state, partitions));
// remove the content of the queue. It contains uncommitted messages.
// They will be fetched again after rebalance.
getMessageQueue().clear();
// removeAssignedPartitions (partitions);
if (state == ClientState.RECORDS_FETCHED) {
boolean resetPending;
try {
resetPending = this.crGroupCoordinatorMxBean.getAndSetRebalanceResetPending(true, getOperatorContext().getName());
} catch (IOException e) {
trace.warn("JCP access failed: " + e.getMessage());
resetPending = false;
}
ClientState newState = ClientState.CR_RESET_PENDING;
trace.log(DEBUG_LEVEL, MsgFormatter.format("client state transition: {0} -> {1}", state, newState));
state = newState;
sendStopPollingEventAsync();
if (!resetPending) {
trace.info(MsgFormatter.format("onPartitionsRevoked() [{0}]: initiating consistent region reset", state));
try {
crMxBean.reset(true);
} catch (Exception e) {
throw new KafkaOperatorRuntimeException("Failed to reset the consistent region: " + e.getMessage(), e);
}
} else {
trace.info(MsgFormatter.format("onPartitionsRevoked() [{0}]: consistent region reset already initiated", state));
}
// this callback is called within the context of a poll() invocation.
}
}
Aggregations