Search in sources :

Example 1 with WatermarkEstimator

use of cz.o2.proxima.time.WatermarkEstimator in project proxima-platform by O2-Czech-Republic.

the class KafkaLogReader method listener.

// create rebalance listener from consumer
private ConsumerRebalanceListener listener(String name, AtomicReference<KafkaConsumer<Object, Object>> kafka, ElementConsumer<Object, Object> consumer, Map<TopicPartition, Integer> emptyPollCount, Map<TopicPartition, Integer> topicPartitionToId, AtomicReference<PartitionedWatermarkEstimator> watermarkEstimator) {
    return new ConsumerRebalanceListener() {

        private final Set<TopicPartition> currentlyAssigned = new HashSet<>();

        @Override
        public void onPartitionsRevoked(Collection<TopicPartition> parts) {
            currentlyAssigned.removeAll(parts);
        }

        @Override
        public void onPartitionsAssigned(Collection<TopicPartition> parts) {
            currentlyAssigned.addAll(parts);
            log.info("Consumer {} has assigned partitions {}", name, currentlyAssigned);
            emptyPollCount.clear();
            topicPartitionToId.clear();
            AtomicInteger id = new AtomicInteger();
            currentlyAssigned.forEach(p -> {
                topicPartitionToId.put(p, id.getAndIncrement());
                emptyPollCount.put(p, 0);
            });
            if (currentlyAssigned.isEmpty()) {
                watermarkEstimator.set(createWatermarkEstimatorForEmptyParts());
            } else {
                watermarkEstimator.set(new MinimalPartitionWatermarkEstimator(currentlyAssigned.stream().collect(toMap(topicPartitionToId::get, item -> createWatermarkEstimator()))));
            }
            Optional.ofNullable(kafka.get()).ifPresent(c -> consumer.onAssign(c, name != null ? getCommittedTopicOffsets(currentlyAssigned, c) : getCurrentTopicOffsets(currentlyAssigned, c)));
        }

        List<TopicOffset> getCurrentTopicOffsets(Collection<TopicPartition> parts, KafkaConsumer<Object, Object> c) {
            return parts.stream().map(tp -> new TopicOffset(new PartitionWithTopic(tp.topic(), tp.partition()), c.position(tp), watermarkEstimator.get().getWatermark())).collect(Collectors.toList());
        }

        List<TopicOffset> getCommittedTopicOffsets(Collection<TopicPartition> parts, KafkaConsumer<Object, Object> c) {
            Map<TopicPartition, OffsetAndMetadata> committed = new HashMap<>(c.committed(new HashSet<>(parts)));
            for (TopicPartition tp : parts) {
                committed.putIfAbsent(tp, null);
            }
            return committed.entrySet().stream().map(entry -> {
                final long offset = entry.getValue() == null ? 0L : entry.getValue().offset();
                return new TopicOffset(new PartitionWithTopic(entry.getKey().topic(), entry.getKey().partition()), offset, watermarkEstimator.get().getWatermark());
            }).collect(Collectors.toList());
        }

        private WatermarkEstimator createWatermarkEstimator() {
            final WatermarkIdlePolicyFactory idlePolicyFactory = accessor.getWatermarkConfiguration().getWatermarkIdlePolicyFactory();
            final WatermarkEstimatorFactory estimatorFactory = accessor.getWatermarkConfiguration().getWatermarkEstimatorFactory();
            return estimatorFactory.create(cfg, idlePolicyFactory);
        }
    };
}
Also used : Partition(cz.o2.proxima.storage.Partition) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) ExceptionUtils(cz.o2.proxima.util.ExceptionUtils) Collectors.toMap(java.util.stream.Collectors.toMap) StreamElement(cz.o2.proxima.storage.StreamElement) WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PartitionedWatermarkEstimator(cz.o2.proxima.time.PartitionedWatermarkEstimator) OnlineConsumer(cz.o2.proxima.direct.kafka.ElementConsumers.OnlineConsumer) Duration(java.time.Duration) Map(java.util.Map) WatermarkEstimatorFactory(cz.o2.proxima.time.WatermarkEstimatorFactory) WatermarkIdlePolicyFactory(cz.o2.proxima.time.WatermarkIdlePolicyFactory) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetExternalizer(cz.o2.proxima.direct.commitlog.OffsetExternalizer) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) UUID(java.util.UUID) PartitionInfo(org.apache.kafka.common.PartitionInfo) Collectors(java.util.stream.Collectors) RebalanceInProgressException(org.apache.kafka.common.errors.RebalanceInProgressException) BiConsumer(cz.o2.proxima.functional.BiConsumer) Objects(java.util.Objects) BulkConsumer(cz.o2.proxima.direct.kafka.ElementConsumers.BulkConsumer) CountDownLatch(java.util.concurrent.CountDownLatch) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Optional(java.util.Optional) MinimalPartitionWatermarkEstimator(cz.o2.proxima.direct.time.MinimalPartitionWatermarkEstimator) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Context(cz.o2.proxima.direct.core.Context) Getter(lombok.Getter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Watermarks(cz.o2.proxima.time.Watermarks) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) AbstractStorage(cz.o2.proxima.storage.AbstractStorage) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) Offset(cz.o2.proxima.direct.commitlog.Offset) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) Position(cz.o2.proxima.storage.commitlog.Position) WatermarkIdlePolicyFactory(cz.o2.proxima.time.WatermarkIdlePolicyFactory) WatermarkEstimatorFactory(cz.o2.proxima.time.WatermarkEstimatorFactory) Set(java.util.Set) HashSet(java.util.HashSet) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) MinimalPartitionWatermarkEstimator(cz.o2.proxima.direct.time.MinimalPartitionWatermarkEstimator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Collection(java.util.Collection) HashSet(java.util.HashSet)

Example 2 with WatermarkEstimator

use of cz.o2.proxima.time.WatermarkEstimator in project proxima-platform by O2-Czech-Republic.

the class PubSubReader method createWatermarkEstimator.

WatermarkEstimator createWatermarkEstimator(long minWatermark) {
    final WatermarkIdlePolicyFactory idlePolicyFactory = watermarkConfiguration.getWatermarkIdlePolicyFactory();
    final WatermarkEstimatorFactory estimatorFactory = watermarkConfiguration.getWatermarkEstimatorFactory();
    final WatermarkEstimator estimator = estimatorFactory.create(cfg, idlePolicyFactory);
    estimator.setMinWatermark(minWatermark);
    return estimator;
}
Also used : WatermarkIdlePolicyFactory(cz.o2.proxima.time.WatermarkIdlePolicyFactory) WatermarkEstimatorFactory(cz.o2.proxima.time.WatermarkEstimatorFactory) WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator)

Example 3 with WatermarkEstimator

use of cz.o2.proxima.time.WatermarkEstimator in project proxima-platform by O2-Czech-Republic.

the class CommitLogReadTest method testWatermarkEstimator.

@Test(timeout = 60000)
public void testWatermarkEstimator() {
    int numElements = 1000;
    WatermarkEstimator estimator = new TestWatermarkEstimator(numElements);
    List<StreamElement> input = createInput(numElements);
    ListCommitLog commitLog = ListCommitLog.of(input, estimator, direct.getContext());
    testReadingFromCommitLogMany(numElements, commitLog);
}
Also used : WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator) StreamElement(cz.o2.proxima.storage.StreamElement) ListCommitLog(cz.o2.proxima.direct.storage.ListCommitLog) Test(org.junit.Test)

Example 4 with WatermarkEstimator

use of cz.o2.proxima.time.WatermarkEstimator in project proxima-platform by O2-Czech-Republic.

the class InMemStorageTest method testObserveWithEndOfTime.

@Test
public void testObserveWithEndOfTime() throws InterruptedException {
    URI uri = URI.create("inmem:///inmemstoragetest");
    InMemStorage storage = new InMemStorage();
    InMemStorage.setWatermarkEstimatorFactory(uri, (stamp, name, offset) -> new WatermarkEstimator() {

        {
            Preconditions.checkArgument(offset != null);
        }

        @Override
        public long getWatermark() {
            return Watermarks.MAX_WATERMARK - InMemStorage.getBoundedOutOfOrderness();
        }

        @Override
        public void update(StreamElement element) {
        }

        @Override
        public void setMinWatermark(long minWatermark) {
        }
    });
    DataAccessor accessor = storage.createAccessor(direct, createFamilyDescriptor(uri));
    CommitLogReader reader = accessor.getCommitLogReader(direct.getContext()).orElseThrow(() -> new IllegalStateException("Missing commit log reader"));
    CountDownLatch completed = new CountDownLatch(1);
    reader.observe("observer", new CommitLogObserver() {

        @Override
        public void onCompleted() {
            completed.countDown();
        }

        @Override
        public boolean onError(Throwable error) {
            return false;
        }

        @Override
        public boolean onNext(StreamElement ingest, OnNextContext context) {
            return false;
        }
    });
    assertTrue(completed.await(1, TimeUnit.SECONDS));
}
Also used : DataAccessor(cz.o2.proxima.direct.core.DataAccessor) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) StreamElement(cz.o2.proxima.storage.StreamElement) CountDownLatch(java.util.concurrent.CountDownLatch) URI(java.net.URI) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator) Test(org.junit.Test)

Example 5 with WatermarkEstimator

use of cz.o2.proxima.time.WatermarkEstimator in project proxima-platform by O2-Czech-Republic.

the class ListCommitLog method observe.

@Override
public ObserveHandle observe(@Nullable String name, Position position, CommitLogObserver observer) {
    String consumerName = name == null ? UUID.randomUUID().toString() : name;
    Consumer consumer = CONSUMERS.get(uuid).computeIfAbsent(consumerName, k -> new Consumer(uuid, consumerName, watermarkEstimator));
    ListObserveHandle handle = new ListObserveHandle(uuid, consumerName);
    pushTo((element, offset) -> {
        if (handle.isClosed()) {
            return false;
        }
        final CommitLogObserver.OffsetCommitter committer = (succ, exc) -> {
            if (exc != null) {
                observer.onError(exc);
            }
        };
        final boolean acceptable;
        OnNextContext context = null;
        synchronized (consumer) {
            acceptable = (externalizableOffsets || !consumer.getAckedOffsets().contains(offset) && !consumer.getInflightOffsets().contains(offset));
            if (acceptable) {
                context = consumer.asOnNextContext(committer, offset);
            }
        }
        if (acceptable) {
            return observer.onNext(element, context);
        }
        return true;
    }, externalizableOffsets ? () -> true : allMatchOffset(consumer::isAcked), observer::onCompleted, observer::onCancelled);
    return handle;
}
Also used : CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) Context(cz.o2.proxima.direct.core.Context) IntStream(java.util.stream.IntStream) Iterables(com.google.common.collect.Iterables) Getter(lombok.Getter) Partition(cz.o2.proxima.storage.Partition) OffsetCommitter(cz.o2.proxima.direct.commitlog.CommitLogObserver.OffsetCommitter) URISyntaxException(java.net.URISyntaxException) HashMap(java.util.HashMap) Function(java.util.function.Function) ObserverUtils(cz.o2.proxima.direct.commitlog.ObserverUtils) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamElement(cz.o2.proxima.storage.StreamElement) WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Watermarks(cz.o2.proxima.time.Watermarks) UnaryPredicate(cz.o2.proxima.functional.UnaryPredicate) SerializationException(cz.o2.proxima.scheme.SerializationException) URI(java.net.URI) TypeReference(com.fasterxml.jackson.core.type.TypeReference) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) Nonnull(javax.annotation.Nonnull) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) OffsetExternalizer(cz.o2.proxima.direct.commitlog.OffsetExternalizer) BiFunction(cz.o2.proxima.functional.BiFunction) Collection(java.util.Collection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) MoreObjects(com.google.common.base.MoreObjects) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) Set(java.util.Set) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Offset(cz.o2.proxima.direct.commitlog.Offset) Objects(java.util.Objects) List(java.util.List) OnNextContext(cz.o2.proxima.direct.commitlog.CommitLogObserver.OnNextContext) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) Position(cz.o2.proxima.storage.commitlog.Position) ObserverUtils.asRepartitionContext(cz.o2.proxima.direct.commitlog.ObserverUtils.asRepartitionContext) OnNextContext(cz.o2.proxima.direct.commitlog.CommitLogObserver.OnNextContext) OffsetCommitter(cz.o2.proxima.direct.commitlog.CommitLogObserver.OffsetCommitter)

Aggregations

WatermarkEstimator (cz.o2.proxima.time.WatermarkEstimator)9 StreamElement (cz.o2.proxima.storage.StreamElement)8 HashMap (java.util.HashMap)7 CommitLogObserver (cz.o2.proxima.direct.commitlog.CommitLogObserver)5 CommitLogReader (cz.o2.proxima.direct.commitlog.CommitLogReader)5 ObserveHandle (cz.o2.proxima.direct.commitlog.ObserveHandle)5 Offset (cz.o2.proxima.direct.commitlog.Offset)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 Test (org.junit.Test)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Preconditions (com.google.common.base.Preconditions)4 OffsetExternalizer (cz.o2.proxima.direct.commitlog.OffsetExternalizer)4 Context (cz.o2.proxima.direct.core.Context)4 Partition (cz.o2.proxima.storage.Partition)4 Position (cz.o2.proxima.storage.commitlog.Position)4 WatermarkEstimatorFactory (cz.o2.proxima.time.WatermarkEstimatorFactory)4 WatermarkIdlePolicyFactory (cz.o2.proxima.time.WatermarkIdlePolicyFactory)4