Search in sources :

Example 11 with ConsumedEvent

use of org.zalando.nakadi.domain.ConsumedEvent in project nakadi by zalando.

the class PartitionDataTest method eventsShouldBeStreamedOnBatchSize.

@Test
public void eventsShouldBeStreamedOnBatchSize() {
    final long timeout = TimeUnit.SECONDS.toMillis(1);
    final PartitionData pd = new PartitionData(COMP, null, createCursor(100L), System.currentTimeMillis());
    for (int i = 0; i < 100; ++i) {
        pd.addEvent(new ConsumedEvent("test".getBytes(), createCursor(i + 100L + 1)));
    }
    assertNull(pd.takeEventsToStream(currentTimeMillis(), 1000, timeout));
    final List<ConsumedEvent> eventsToStream = pd.takeEventsToStream(currentTimeMillis(), 99, timeout);
    assertNotNull(eventsToStream);
    assertEquals(99, eventsToStream.size());
}
Also used : ConsumedEvent(org.zalando.nakadi.domain.ConsumedEvent) Test(org.junit.Test)

Example 12 with ConsumedEvent

use of org.zalando.nakadi.domain.ConsumedEvent in project nakadi by zalando.

the class EventStream method streamEvents.

public void streamEvents(final AtomicBoolean connectionReady, final Runnable checkAuthorization) {
    try {
        int messagesRead = 0;
        final Map<String, Integer> keepAliveInARow = createMapWithPartitionKeys(partition -> 0);
        final Map<String, List<byte[]>> currentBatches = createMapWithPartitionKeys(partition -> Lists.newArrayList());
        // Partition to NakadiCursor.
        final Map<String, NakadiCursor> latestOffsets = config.getCursors().stream().collect(Collectors.toMap(NakadiCursor::getPartition, c -> c));
        final long start = currentTimeMillis();
        final Map<String, Long> batchStartTimes = createMapWithPartitionKeys(partition -> start);
        final List<ConsumedEvent> consumedEvents = new LinkedList<>();
        long lastKpiEventSent = System.currentTimeMillis();
        long bytesInMemory = 0;
        while (connectionReady.get() && !blacklistService.isConsumptionBlocked(config.getEtName(), config.getConsumingClient().getClientId())) {
            checkAuthorization.run();
            if (consumedEvents.isEmpty()) {
                // TODO: There are a lot of optimizations here, one can significantly improve code by processing
                // all events at the same time, instead of processing one by one.
                consumedEvents.addAll(eventConsumer.readEvents());
            }
            final Optional<ConsumedEvent> eventOrEmpty = consumedEvents.isEmpty() ? Optional.empty() : Optional.of(consumedEvents.remove(0));
            if (eventOrEmpty.isPresent()) {
                final ConsumedEvent event = eventOrEmpty.get();
                // update offset for the partition of event that was read
                latestOffsets.put(event.getPosition().getPartition(), event.getPosition());
                // put message to batch
                currentBatches.get(event.getPosition().getPartition()).add(event.getEvent());
                messagesRead++;
                bytesInMemory += event.getEvent().length;
                // if we read the message - reset keep alive counter for this partition
                keepAliveInARow.put(event.getPosition().getPartition(), 0);
            }
            // for each partition check if it's time to send the batch
            for (final String partition : latestOffsets.keySet()) {
                final long timeSinceBatchStart = currentTimeMillis() - batchStartTimes.get(partition);
                if (config.getBatchTimeout() * 1000 <= timeSinceBatchStart || currentBatches.get(partition).size() >= config.getBatchLimit()) {
                    final List<byte[]> eventsToSend = currentBatches.get(partition);
                    sendBatch(latestOffsets.get(partition), eventsToSend);
                    if (!eventsToSend.isEmpty()) {
                        bytesInMemory -= eventsToSend.stream().mapToLong(v -> v.length).sum();
                        eventsToSend.clear();
                    } else {
                        // if we hit keep alive count limit - close the stream
                        keepAliveInARow.put(partition, keepAliveInARow.get(partition) + 1);
                    }
                    batchStartTimes.put(partition, currentTimeMillis());
                }
            }
            // Dump some data that is exceeding memory limits
            while (isMemoryLimitReached(bytesInMemory)) {
                final Map.Entry<String, List<byte[]>> heaviestPartition = currentBatches.entrySet().stream().max(Comparator.comparing(entry -> entry.getValue().stream().mapToLong(event -> event.length).sum())).get();
                sendBatch(latestOffsets.get(heaviestPartition.getKey()), heaviestPartition.getValue());
                final long freed = heaviestPartition.getValue().stream().mapToLong(v -> v.length).sum();
                LOG.warn("Memory limit reached for event type {}: {} bytes. Freed: {} bytes, {} messages", config.getEtName(), bytesInMemory, freed, heaviestPartition.getValue().size());
                bytesInMemory -= freed;
                // Init new batch for subscription
                heaviestPartition.getValue().clear();
                batchStartTimes.put(heaviestPartition.getKey(), currentTimeMillis());
            }
            if (lastKpiEventSent + kpiFrequencyMs < System.currentTimeMillis()) {
                final long count = kpiData.getAndResetNumberOfEventsSent();
                final long bytes = kpiData.getAndResetBytesSent();
                publishKpi(config.getConsumingClient(), count, bytes);
                lastKpiEventSent = System.currentTimeMillis();
            }
            // check if we reached keepAliveInARow for all the partitions; if yes - then close stream
            if (config.getStreamKeepAliveLimit() != 0) {
                final boolean keepAliveLimitReachedForAllPartitions = keepAliveInARow.values().stream().allMatch(keepAlives -> keepAlives >= config.getStreamKeepAliveLimit());
                if (keepAliveLimitReachedForAllPartitions) {
                    break;
                }
            }
            // check if we reached the stream timeout or message count limit
            final long timeSinceStart = currentTimeMillis() - start;
            if (config.getStreamTimeout() != 0 && timeSinceStart >= config.getStreamTimeout() * 1000 || config.getStreamLimit() != 0 && messagesRead >= config.getStreamLimit()) {
                for (final String partition : latestOffsets.keySet()) {
                    if (currentBatches.get(partition).size() > 0) {
                        sendBatch(latestOffsets.get(partition), currentBatches.get(partition));
                    }
                }
                break;
            }
        }
    } catch (final IOException e) {
        LOG.info("I/O error occurred when streaming events (possibly client closed connection)", e);
    } catch (final IllegalStateException e) {
        LOG.info("Error occurred when streaming events (possibly server closed connection)", e);
    } catch (final KafkaException e) {
        LOG.error("Error occurred when polling events from kafka; consumer: {}, event-type: {}", config.getConsumingClient().getClientId(), config.getEtName(), e);
    } finally {
        publishKpi(config.getConsumingClient(), kpiData.getAndResetNumberOfEventsSent(), kpiData.getAndResetBytesSent());
    }
}
Also used : OutputStream(java.io.OutputStream) ConsumedEvent(org.zalando.nakadi.domain.ConsumedEvent) Logger(org.slf4j.Logger) System.currentTimeMillis(java.lang.System.currentTimeMillis) NakadiCursor(org.zalando.nakadi.domain.NakadiCursor) LoggerFactory(org.slf4j.LoggerFactory) KafkaException(org.apache.kafka.common.KafkaException) StreamKpiData(org.zalando.nakadi.metrics.StreamKpiData) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) IOException(java.io.IOException) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) Meter(com.codahale.metrics.Meter) EventConsumer(org.zalando.nakadi.repository.EventConsumer) List(java.util.List) Lists(com.google.common.collect.Lists) JSONObject(org.json.JSONObject) Map(java.util.Map) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) Comparator(java.util.Comparator) LinkedList(java.util.LinkedList) Client(org.zalando.nakadi.security.Client) NakadiCursor(org.zalando.nakadi.domain.NakadiCursor) ConsumedEvent(org.zalando.nakadi.domain.ConsumedEvent) IOException(java.io.IOException) LinkedList(java.util.LinkedList) List(java.util.List) LinkedList(java.util.LinkedList) KafkaException(org.apache.kafka.common.KafkaException) Map(java.util.Map)

Example 13 with ConsumedEvent

use of org.zalando.nakadi.domain.ConsumedEvent in project nakadi by zalando.

the class NakadiKafkaConsumer method readEvents.

@Override
public List<ConsumedEvent> readEvents() {
    final ConsumerRecords<byte[], byte[]> records = kafkaConsumer.poll(pollTimeout);
    if (records.isEmpty()) {
        return Collections.emptyList();
    }
    final ArrayList<ConsumedEvent> result = new ArrayList<>(records.count());
    for (final ConsumerRecord<byte[], byte[]> record : records) {
        final KafkaCursor cursor = new KafkaCursor(record.topic(), record.partition(), record.offset());
        final Timeline timeline = timelineMap.get(new TopicPartition(record.topic(), record.partition()));
        result.add(new ConsumedEvent(record.value(), cursor.toNakadiCursor(timeline)));
    }
    return result;
}
Also used : Timeline(org.zalando.nakadi.domain.Timeline) TopicPartition(org.apache.kafka.common.TopicPartition) ArrayList(java.util.ArrayList) ConsumedEvent(org.zalando.nakadi.domain.ConsumedEvent)

Example 14 with ConsumedEvent

use of org.zalando.nakadi.domain.ConsumedEvent in project nakadi by zalando.

the class PartitionData method onCommitOffset.

CommitResult onCommitOffset(final NakadiCursor offset) {
    boolean seekKafka = false;
    if (comparator.compare(offset, sentOffset) > 0) {
        log.error("Commit in future: current: {}, committed {} will skip sending obsolete data", sentOffset, commitOffset);
        seekKafka = true;
        sentOffset = offset;
    }
    final long committed;
    if (comparator.compare(offset, commitOffset) >= 0) {
        final Set<NakadiCursor> committedCursors = allCursorsOrdered.headSet(offset, true);
        committed = committedCursors.size();
        commitOffset = offset;
        // Operation is cascaded to allCursorsOrdered set.
        committedCursors.clear();
    } else {
        log.error("Commits in past are evil!: Committing in {} while current commit is {}", offset, commitOffset);
        // Commit in past occurred. One should move storage pointer to sentOffset.
        seekKafka = true;
        commitOffset = offset;
        sentOffset = commitOffset;
        allCursorsOrdered.clear();
        nakadiEvents.clear();
        bytesInMemory = 0L;
        committed = 0;
    }
    while (!nakadiEvents.isEmpty() && comparator.compare(nakadiEvents.get(0).getPosition(), commitOffset) <= 0) {
        final ConsumedEvent evt = nakadiEvents.remove(0);
        bytesInMemory -= evt.getEvent().length;
    }
    return new CommitResult(seekKafka, committed);
}
Also used : NakadiCursor(org.zalando.nakadi.domain.NakadiCursor) ConsumedEvent(org.zalando.nakadi.domain.ConsumedEvent)

Example 15 with ConsumedEvent

use of org.zalando.nakadi.domain.ConsumedEvent in project nakadi by zalando.

the class PartitionData method extract.

private List<ConsumedEvent> extract(final int count) {
    final List<ConsumedEvent> result = new ArrayList<>(count);
    for (int i = 0; i < count && !nakadiEvents.isEmpty(); ++i) {
        final ConsumedEvent event = nakadiEvents.remove(0);
        bytesInMemory -= event.getEvent().length;
        result.add(event);
    }
    if (!result.isEmpty()) {
        this.sentOffset = result.get(result.size() - 1).getPosition();
        this.keepAliveInARow = 0;
    } else {
        this.keepAliveInARow += 1;
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) ConsumedEvent(org.zalando.nakadi.domain.ConsumedEvent)

Aggregations

ConsumedEvent (org.zalando.nakadi.domain.ConsumedEvent)15 Test (org.junit.Test)8 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 LinkedList (java.util.LinkedList)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 TopicPartition (org.apache.kafka.common.TopicPartition)2 JSONObject (org.json.JSONObject)2 NakadiCursor (org.zalando.nakadi.domain.NakadiCursor)2 Timeline (org.zalando.nakadi.domain.Timeline)2 NakadiKafkaConsumer (org.zalando.nakadi.repository.kafka.NakadiKafkaConsumer)2 Meter (com.codahale.metrics.Meter)1 Lists (com.google.common.collect.Lists)1 OutputStream (java.io.OutputStream)1 System.currentTimeMillis (java.lang.System.currentTimeMillis)1 Comparator (java.util.Comparator)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1