Search in sources :

Example 1 with TP

use of com.ibm.streamsx.kafka.clients.consumer.CrConsumerGroupCoordinator.TP in project streamsx.kafka by IBMStreams.

the class CrKafkaConsumerGroupClient method createSeekOffsetMap.

/**
 * The seek offsets are created with following algorithm from the checkpoint:
 * <ul>
 * <li>read the contributing operator names from the checkpoint (operator names of the consumer group)
 * <li>read the seek offsets from the checkpoint.
 *     These are the offsets of only those partitions the consumer was assigned at checkpoint time.</li>
 * <li>send the offsets of the prior partitions together with the number of operators and the own operator name to the CrGroupCoordinator MXBean.
 *     The other consumer operators will also send their prior partition-to-offset mappings, and and their dsitinct operator name.</li>
 * <li>wait for the JMX notification that the partition-to-offset map has merged</li>
 * <li>fetch the merged map from the MX bean so that the operator has the seek offsets of all partitions of
 *     all topics (the group's view) and store this in the 'seekOffsetMap' member variable.</li>
 * </ul>
 * @param checkpoint
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
private void createSeekOffsetMap(Checkpoint checkpoint) throws InterruptedException {
    final String operatorName = getOperatorContext().getName();
    long chkptSeqId = checkpoint.getSequenceId();
    int resetAttempt = getCrContext().getResetAttempt();
    MergeKey key = new MergeKey(chkptSeqId, resetAttempt);
    trace.info(MsgFormatter.format("createSeekOffsetMap() [{0}] - entering. chkptSeqId = {1,number,#}, resetAttempt = {2}", state, chkptSeqId, resetAttempt));
    try {
        final ObjectInputStream inputStream = checkpoint.getInputStream();
        final String myOperatorNameInCkpt = (String) inputStream.readObject();
        Set<String> contributingOperators = (Set<String>) inputStream.readObject();
        OffsetManager offsMgr = (OffsetManager) inputStream.readObject();
        trace.info(MsgFormatter.format("createSeekOffsetMap() - merging {0} operator checkpoints into a single group checkpoint", contributingOperators.size()));
        if (trace.isEnabledFor(DEBUG_LEVEL)) {
            trace.log(DEBUG_LEVEL, MsgFormatter.format("createSeekOffsetMap(): myOperatorName read from checkpoint: {0}", myOperatorNameInCkpt));
            trace.log(DEBUG_LEVEL, MsgFormatter.format("createSeekOffsetMap(): contributingOperators read from checkpoint: {0}", contributingOperators));
            trace.log(DEBUG_LEVEL, MsgFormatter.format("createSeekOffsetMap(): offset manager read from checkpoint: {0}", offsMgr));
        }
        if (!operatorName.equals(myOperatorNameInCkpt)) {
            trace.warn(MsgFormatter.format("Operator name in checkpoint ({0}) differs from current operator name: {1}", myOperatorNameInCkpt, operatorName));
        }
        if (!contributingOperators.contains(operatorName)) {
            final String msg = MsgFormatter.format("This operator''s name ({0}) not found in contributing operator names: {1}", operatorName, contributingOperators);
            trace.error(msg);
            throw new KafkaOperatorResetFailedException(msg);
        }
        trace.info(MsgFormatter.format("contributing {0} partition => offset mappings to the group''s checkpoint.", offsMgr.size()));
        if (contributingOperators.size() == 1) {
            trace.info("this single operator participated in consumer group at checkpoint time. Checkpoint merge and distribution via MXBean disabled.");
            assert (contributingOperators.contains(operatorName));
            initSeekOffsetMap();
            for (TopicPartition tp : offsMgr.getMappedTopicPartitions()) {
                final String topic = tp.topic();
                final int partition = tp.partition();
                final Long offset = offsMgr.getOffset(topic, partition);
                this.seekOffsetMap.put(tp, offset);
            }
        } else {
            // send checkpoint data to CrGroupCoordinator MXBean and wait for the notification
            // to fetch the group's complete checkpoint. Then, process the group's checkpoint.
            Map<CrConsumerGroupCoordinator.TP, Long> partialOffsetMap = new HashMap<>();
            for (TopicPartition tp : offsMgr.getMappedTopicPartitions()) {
                final String topic = tp.topic();
                final int partition = tp.partition();
                final Long offset = offsMgr.getOffset(topic, partition);
                partialOffsetMap.put(new TP(topic, partition), offset);
            }
            trace.info(MsgFormatter.format("Merging my group''s checkpoint contribution: partialOffsetMap = {0}, myOperatorName = {1}", partialOffsetMap, operatorName));
            this.crGroupCoordinatorMxBean.mergeConsumerCheckpoint(chkptSeqId, resetAttempt, contributingOperators.size(), partialOffsetMap, operatorName);
            // check JMX notification and wait for notification
            jmxNotificationConditionLock.lock();
            long waitStartTime = System.currentTimeMillis();
            // increase timeout exponentially with every reset attempt by 20%
            // long timeoutMillis = (long)(Math.pow (1.2, resetAttempt) * (double)timeouts.getJmxResetNotificationTimeout());
            long timeoutMillis = timeouts.getJmxResetNotificationTimeout();
            boolean waitTimeLeft = true;
            int nWaits = 0;
            long timeElapsed = 0;
            trace.log(DEBUG_LEVEL, MsgFormatter.format("checking receiption of JMX notification {0} for sequenceId {1}. timeout = {2,number,#} ms.", CrConsumerGroupCoordinatorMXBean.MERGE_COMPLETE_NTF_TYPE, key, timeoutMillis));
            while (!jmxMergeCompletedNotifMap.containsKey(key) && waitTimeLeft) {
                long remainingTime = timeoutMillis - timeElapsed;
                waitTimeLeft = remainingTime > 0;
                if (waitTimeLeft) {
                    if (nWaits++ % 50 == 0)
                        trace.log(DEBUG_LEVEL, MsgFormatter.format("waiting for JMX notification {0} for sequenceId {1}. Remaining time = {2,number,#} of {3,number,#} ms", CrConsumerGroupCoordinatorMXBean.MERGE_COMPLETE_NTF_TYPE, key, remainingTime, timeoutMillis));
                    jmxNotificationCondition.await(100, TimeUnit.MILLISECONDS);
                }
                timeElapsed = System.currentTimeMillis() - waitStartTime;
            }
            CrConsumerGroupCoordinator.CheckpointMerge merge = jmxMergeCompletedNotifMap.get(key);
            jmxNotificationConditionLock.unlock();
            if (merge == null) {
                final String msg = MsgFormatter.format("timeout receiving {0} JMX notification for {1} from MXBean {2} in JCP. Current timeout is {3,number,#} milliseconds.", CrConsumerGroupCoordinatorMXBean.MERGE_COMPLETE_NTF_TYPE, key, crGroupCoordinatorMXBeanName, timeoutMillis);
                trace.error(msg);
                throw new KafkaOperatorResetFailedException(msg);
            } else {
                trace.info(MsgFormatter.format("waiting for JMX notification for sequenceId {0} took {1} ms", key, timeElapsed));
            }
            Map<TP, Long> mergedOffsetMap = merge.getConsolidatedOffsetMap();
            trace.info("reset offsets (group's checkpoint) received from MXBean: " + mergedOffsetMap);
            initSeekOffsetMap();
            mergedOffsetMap.forEach((tp, offset) -> {
                this.seekOffsetMap.put(new TopicPartition(tp.getTopic(), tp.getPartition()), offset);
            });
        }
    } catch (InterruptedException e) {
        trace.log(DEBUG_LEVEL, "createSeekOffsetMap(): interrupted waiting for the JMX notification");
        return;
    } catch (IOException | ClassNotFoundException e) {
        trace.error("reset failed: " + e.getLocalizedMessage());
        throw new KafkaOperatorResetFailedException(MsgFormatter.format("resetting operator {0} to checkpoint sequence ID {1} failed: {2}", getOperatorContext().getName(), chkptSeqId, e.getLocalizedMessage()), e);
    }
    trace.log(DEBUG_LEVEL, "createSeekOffsetMap(): seekOffsetMap = " + this.seekOffsetMap);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) KafkaOperatorResetFailedException(com.ibm.streamsx.kafka.KafkaOperatorResetFailedException) HashMap(java.util.HashMap) OffsetManager(com.ibm.streamsx.kafka.clients.OffsetManager) IOException(java.io.IOException) Checkpoint(com.ibm.streams.operator.state.Checkpoint) MergeKey(com.ibm.streamsx.kafka.clients.consumer.CrConsumerGroupCoordinator.MergeKey) TopicPartition(org.apache.kafka.common.TopicPartition) TP(com.ibm.streamsx.kafka.clients.consumer.CrConsumerGroupCoordinator.TP) ObjectInputStream(java.io.ObjectInputStream)

Aggregations

Checkpoint (com.ibm.streams.operator.state.Checkpoint)1 KafkaOperatorResetFailedException (com.ibm.streamsx.kafka.KafkaOperatorResetFailedException)1 OffsetManager (com.ibm.streamsx.kafka.clients.OffsetManager)1 MergeKey (com.ibm.streamsx.kafka.clients.consumer.CrConsumerGroupCoordinator.MergeKey)1 TP (com.ibm.streamsx.kafka.clients.consumer.CrConsumerGroupCoordinator.TP)1 IOException (java.io.IOException)1 ObjectInputStream (java.io.ObjectInputStream)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 TopicPartition (org.apache.kafka.common.TopicPartition)1