Search in sources :

Example 1 with StreamDemarcator

use of org.apache.nifi.stream.io.util.StreamDemarcator in project nifi by apache.

the class PublisherLease method publish.

void publish(final FlowFile flowFile, final InputStream flowFileContent, final byte[] messageKey, final byte[] demarcatorBytes, final String topic) throws IOException {
    if (tracker == null) {
        tracker = new InFlightMessageTracker(logger);
    }
    try {
        byte[] messageContent;
        if (demarcatorBytes == null || demarcatorBytes.length == 0) {
            if (flowFile.getSize() > maxMessageSize) {
                tracker.fail(flowFile, new TokenTooLargeException("A message in the stream exceeds the maximum allowed message size of " + maxMessageSize + " bytes."));
                return;
            }
            // Send FlowFile content as it is, to support sending 0 byte message.
            messageContent = new byte[(int) flowFile.getSize()];
            StreamUtils.fillBuffer(flowFileContent, messageContent);
            publish(flowFile, messageKey, messageContent, topic, tracker);
            return;
        }
        try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) {
            while ((messageContent = demarcator.nextToken()) != null) {
                publish(flowFile, messageKey, messageContent, topic, tracker);
                if (tracker.isFailed(flowFile)) {
                    // If we have a failure, don't try to send anything else.
                    return;
                }
            }
        } catch (final TokenTooLargeException ttle) {
            tracker.fail(flowFile, ttle);
        }
    } catch (final Exception e) {
        tracker.fail(flowFile, e);
        poison();
        throw e;
    }
}
Also used : TokenTooLargeException(org.apache.nifi.stream.io.exception.TokenTooLargeException) StreamDemarcator(org.apache.nifi.stream.io.util.StreamDemarcator) TimeoutException(java.util.concurrent.TimeoutException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) TokenTooLargeException(org.apache.nifi.stream.io.exception.TokenTooLargeException) IOException(java.io.IOException)

Example 2 with StreamDemarcator

use of org.apache.nifi.stream.io.util.StreamDemarcator in project nifi by apache.

the class KafkaPublisher method publish.

/**
 * Publishes messages to Kafka topic. It uses {@link StreamDemarcator} to
 * determine how many messages to Kafka will be sent from a provided
 * {@link InputStream} (see {@link PublishingContext#getContentStream()}).
 * It supports two publishing modes:
 * <ul>
 * <li>Sending all messages constructed from
 * {@link StreamDemarcator#nextToken()} operation.</li>
 * <li>Sending only unacknowledged messages constructed from
 * {@link StreamDemarcator#nextToken()} operation.</li>
 * </ul>
 * The unacknowledged messages are determined from the value of
 * {@link PublishingContext#getLastAckedMessageIndex()}.
 * <br>
 * This method assumes content stream affinity where it is expected that the
 * content stream that represents the same Kafka message(s) will remain the
 * same across possible retries. This is required specifically for cases
 * where delimiter is used and a single content stream may represent
 * multiple Kafka messages. The
 * {@link PublishingContext#getLastAckedMessageIndex()} will provide the
 * index of the last ACKed message, so upon retry only messages with the
 * higher index are sent.
 *
 * @param publishingContext
 *            instance of {@link PublishingContext} which hold context
 *            information about the message(s) to be sent.
 * @return The index of the last successful offset.
 * @throws IOException if unable to read from the Input Stream
 */
KafkaPublisherResult publish(PublishingContext publishingContext) throws IOException {
    StreamDemarcator streamTokenizer = new StreamDemarcator(publishingContext.getContentStream(), publishingContext.getDelimiterBytes(), publishingContext.getMaxRequestSize());
    int prevLastAckedMessageIndex = publishingContext.getLastAckedMessageIndex();
    List<Future<RecordMetadata>> resultFutures = new ArrayList<>();
    byte[] messageBytes;
    int tokenCounter = 0;
    boolean continueSending = true;
    KafkaPublisherResult result = null;
    for (; continueSending && (messageBytes = streamTokenizer.nextToken()) != null; tokenCounter++) {
        if (prevLastAckedMessageIndex < tokenCounter) {
            Integer partitionId = publishingContext.getPartitionId();
            if (partitionId == null && publishingContext.getKeyBytes() != null) {
                partitionId = this.getPartition(publishingContext.getKeyBytes(), publishingContext.getTopic());
            }
            ProducerRecord<byte[], byte[]> message = new ProducerRecord<>(publishingContext.getTopic(), publishingContext.getPartitionId(), publishingContext.getKeyBytes(), messageBytes);
            resultFutures.add(this.kafkaProducer.send(message));
            if (tokenCounter % this.ackCheckSize == 0) {
                int lastAckedMessageIndex = this.processAcks(resultFutures, prevLastAckedMessageIndex);
                resultFutures.clear();
                if (lastAckedMessageIndex % this.ackCheckSize != 0) {
                    continueSending = false;
                    result = new KafkaPublisherResult(tokenCounter, lastAckedMessageIndex);
                }
                prevLastAckedMessageIndex = lastAckedMessageIndex;
            }
        }
    }
    if (result == null) {
        int lastAckedMessageIndex = this.processAcks(resultFutures, prevLastAckedMessageIndex);
        resultFutures.clear();
        result = new KafkaPublisherResult(tokenCounter, lastAckedMessageIndex);
    }
    return result;
}
Also used : ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) StreamDemarcator(org.apache.nifi.stream.io.util.StreamDemarcator) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future)

Aggregations

StreamDemarcator (org.apache.nifi.stream.io.util.StreamDemarcator)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Future (java.util.concurrent.Future)1 TimeoutException (java.util.concurrent.TimeoutException)1 ProducerRecord (org.apache.kafka.clients.producer.ProducerRecord)1 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)1 TokenTooLargeException (org.apache.nifi.stream.io.exception.TokenTooLargeException)1