Search in sources :

Example 1 with Offset

use of cz.o2.proxima.direct.batch.Offset in project proxima-platform by O2-Czech-Republic.

the class RandomHBaseReader method listEntities.

@Override
public void listEntities(RandomOffset offset, int limit, Consumer<Pair<RandomOffset, String>> consumer) {
    ensureClient();
    Scan s = offset == null ? new Scan() : new Scan((((RawOffset) offset).getOffset() + '\00').getBytes(StandardCharsets.UTF_8));
    s.addFamily(family);
    s.setFilter(new KeyOnlyFilter());
    s.setCaching(keyCaching);
    try (ResultScanner scanner = client.getScanner(s)) {
        int taken = 0;
        while (limit <= 0 || taken++ < limit) {
            Result res = scanner.next();
            if (res != null) {
                String key = new String(res.getRow());
                consumer.accept(Pair.of(new RawOffset(key), key));
            } else {
                break;
            }
        }
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : KeyOnlyFilter(org.apache.hadoop.hbase.filter.KeyOnlyFilter) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) RawOffset(cz.o2.proxima.direct.randomaccess.RawOffset) Scan(org.apache.hadoop.hbase.client.Scan) IOException(java.io.IOException) Result(org.apache.hadoop.hbase.client.Result)

Example 2 with Offset

use of cz.o2.proxima.direct.batch.Offset in project proxima-platform by O2-Czech-Republic.

the class HadoopBatchLogReader method processPath.

private boolean processPath(BatchLogObserver observer, long watermark, HadoopPartition partition, HadoopPath path, TerminationContext terminationContext) {
    try {
        try (Reader reader = accessor.getFormat().openReader(path, accessor.getEntityDesc())) {
            long elementIndex = 0;
            final Iterator<StreamElement> iterator = reader.iterator();
            while (iterator.hasNext()) {
                final StreamElement element = iterator.next();
                final Offset offset = Offset.of(partition, elementIndex++, !iterator.hasNext());
                if (terminationContext.isCancelled() || !observer.onNext(element, BatchLogObservers.withWatermark(partition, offset, watermark))) {
                    return false;
                }
            }
        }
    } catch (IOException ex) {
        throw new RuntimeException("Failed to read file " + partition, ex);
    }
    return true;
}
Also used : BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) Reader(cz.o2.proxima.direct.bulk.Reader) StreamElement(cz.o2.proxima.storage.StreamElement) IOException(java.io.IOException) Offset(cz.o2.proxima.direct.batch.Offset)

Example 3 with Offset

use of cz.o2.proxima.direct.batch.Offset in project proxima-platform by O2-Czech-Republic.

the class KafkaLogReader method listener.

// create rebalance listener from consumer
private ConsumerRebalanceListener listener(String name, AtomicReference<KafkaConsumer<Object, Object>> kafka, ElementConsumer<Object, Object> consumer, Map<TopicPartition, Integer> emptyPollCount, Map<TopicPartition, Integer> topicPartitionToId, AtomicReference<PartitionedWatermarkEstimator> watermarkEstimator) {
    return new ConsumerRebalanceListener() {

        private final Set<TopicPartition> currentlyAssigned = new HashSet<>();

        @Override
        public void onPartitionsRevoked(Collection<TopicPartition> parts) {
            currentlyAssigned.removeAll(parts);
        }

        @Override
        public void onPartitionsAssigned(Collection<TopicPartition> parts) {
            currentlyAssigned.addAll(parts);
            log.info("Consumer {} has assigned partitions {}", name, currentlyAssigned);
            emptyPollCount.clear();
            topicPartitionToId.clear();
            AtomicInteger id = new AtomicInteger();
            currentlyAssigned.forEach(p -> {
                topicPartitionToId.put(p, id.getAndIncrement());
                emptyPollCount.put(p, 0);
            });
            if (currentlyAssigned.isEmpty()) {
                watermarkEstimator.set(createWatermarkEstimatorForEmptyParts());
            } else {
                watermarkEstimator.set(new MinimalPartitionWatermarkEstimator(currentlyAssigned.stream().collect(toMap(topicPartitionToId::get, item -> createWatermarkEstimator()))));
            }
            Optional.ofNullable(kafka.get()).ifPresent(c -> consumer.onAssign(c, name != null ? getCommittedTopicOffsets(currentlyAssigned, c) : getCurrentTopicOffsets(currentlyAssigned, c)));
        }

        List<TopicOffset> getCurrentTopicOffsets(Collection<TopicPartition> parts, KafkaConsumer<Object, Object> c) {
            return parts.stream().map(tp -> new TopicOffset(new PartitionWithTopic(tp.topic(), tp.partition()), c.position(tp), watermarkEstimator.get().getWatermark())).collect(Collectors.toList());
        }

        List<TopicOffset> getCommittedTopicOffsets(Collection<TopicPartition> parts, KafkaConsumer<Object, Object> c) {
            Map<TopicPartition, OffsetAndMetadata> committed = new HashMap<>(c.committed(new HashSet<>(parts)));
            for (TopicPartition tp : parts) {
                committed.putIfAbsent(tp, null);
            }
            return committed.entrySet().stream().map(entry -> {
                final long offset = entry.getValue() == null ? 0L : entry.getValue().offset();
                return new TopicOffset(new PartitionWithTopic(entry.getKey().topic(), entry.getKey().partition()), offset, watermarkEstimator.get().getWatermark());
            }).collect(Collectors.toList());
        }

        private WatermarkEstimator createWatermarkEstimator() {
            final WatermarkIdlePolicyFactory idlePolicyFactory = accessor.getWatermarkConfiguration().getWatermarkIdlePolicyFactory();
            final WatermarkEstimatorFactory estimatorFactory = accessor.getWatermarkConfiguration().getWatermarkEstimatorFactory();
            return estimatorFactory.create(cfg, idlePolicyFactory);
        }
    };
}
Also used : Partition(cz.o2.proxima.storage.Partition) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) ExceptionUtils(cz.o2.proxima.util.ExceptionUtils) Collectors.toMap(java.util.stream.Collectors.toMap) StreamElement(cz.o2.proxima.storage.StreamElement) WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PartitionedWatermarkEstimator(cz.o2.proxima.time.PartitionedWatermarkEstimator) OnlineConsumer(cz.o2.proxima.direct.kafka.ElementConsumers.OnlineConsumer) Duration(java.time.Duration) Map(java.util.Map) WatermarkEstimatorFactory(cz.o2.proxima.time.WatermarkEstimatorFactory) WatermarkIdlePolicyFactory(cz.o2.proxima.time.WatermarkIdlePolicyFactory) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetExternalizer(cz.o2.proxima.direct.commitlog.OffsetExternalizer) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) UUID(java.util.UUID) PartitionInfo(org.apache.kafka.common.PartitionInfo) Collectors(java.util.stream.Collectors) RebalanceInProgressException(org.apache.kafka.common.errors.RebalanceInProgressException) BiConsumer(cz.o2.proxima.functional.BiConsumer) Objects(java.util.Objects) BulkConsumer(cz.o2.proxima.direct.kafka.ElementConsumers.BulkConsumer) CountDownLatch(java.util.concurrent.CountDownLatch) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Optional(java.util.Optional) MinimalPartitionWatermarkEstimator(cz.o2.proxima.direct.time.MinimalPartitionWatermarkEstimator) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Context(cz.o2.proxima.direct.core.Context) Getter(lombok.Getter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Watermarks(cz.o2.proxima.time.Watermarks) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) AbstractStorage(cz.o2.proxima.storage.AbstractStorage) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) Offset(cz.o2.proxima.direct.commitlog.Offset) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) Position(cz.o2.proxima.storage.commitlog.Position) WatermarkIdlePolicyFactory(cz.o2.proxima.time.WatermarkIdlePolicyFactory) WatermarkEstimatorFactory(cz.o2.proxima.time.WatermarkEstimatorFactory) Set(java.util.Set) HashSet(java.util.HashSet) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) MinimalPartitionWatermarkEstimator(cz.o2.proxima.direct.time.MinimalPartitionWatermarkEstimator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Collection(java.util.Collection) HashSet(java.util.HashSet)

Example 4 with Offset

use of cz.o2.proxima.direct.batch.Offset in project proxima-platform by O2-Czech-Republic.

the class LocalKafkaCommitLogDescriptorTest method testBulkObserveOffsets2.

@Test(timeout = 10000)
public void testBulkObserveOffsets2() throws InterruptedException {
    final Accessor accessor = kafka.createAccessor(direct, createTestFamily(entity, storageUri, partitionsCfg(3)));
    final LocalKafkaWriter writer = accessor.newWriter();
    final CommitLogReader reader = Optionals.get(accessor.getCommitLogReader(context()));
    final List<KafkaStreamElement> input = new ArrayList<>();
    final AtomicReference<CountDownLatch> latch = new AtomicReference<>(new CountDownLatch(3));
    final StreamElement update = StreamElement.upsert(entity, attr, UUID.randomUUID().toString(), "key", attr.getName(), System.currentTimeMillis(), new byte[] { 1, 2 });
    final CommitLogObserver observer = new CommitLogObserver() {

        @Override
        public boolean onNext(StreamElement ingest, OnNextContext context) {
            input.add((KafkaStreamElement) ingest);
            latch.get().countDown();
            // terminate after reading first record
            return false;
        }

        @Override
        public boolean onError(Throwable error) {
            throw new RuntimeException(error);
        }
    };
    final List<Offset> offsets;
    try (final ObserveHandle handle = reader.observeBulkPartitions(reader.getPartitions(), Position.NEWEST, observer)) {
        // write two elements
        for (int i = 0; i < 2; i++) {
            writer.write(update, (succ, e) -> {
                assertTrue(succ);
                latch.get().countDown();
            });
        }
        latch.get().await();
        latch.set(new CountDownLatch(1));
        offsets = handle.getCurrentOffsets();
    }
    // restart from old offset
    reader.observeBulkOffsets(Lists.newArrayList(offsets), observer);
    latch.get().await();
    assertEquals(2, input.size());
    assertEquals(0, input.get(0).getOffset());
    assertEquals(0, input.get(1).getOffset());
}
Also used : LocalKafkaWriter(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.LocalKafkaWriter) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) OnNextContext(cz.o2.proxima.direct.commitlog.CommitLogObserver.OnNextContext) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) ArrayList(java.util.ArrayList) StreamElement(cz.o2.proxima.storage.StreamElement) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) Accessor(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.Accessor) Offset(cz.o2.proxima.direct.commitlog.Offset) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) Test(org.junit.Test)

Example 5 with Offset

use of cz.o2.proxima.direct.batch.Offset in project proxima-platform by O2-Czech-Republic.

the class LocalKafkaCommitLogDescriptorTest method testObserveBulkCommitsCorrectly.

@Test(timeout = 10000)
public void testObserveBulkCommitsCorrectly() throws InterruptedException {
    Accessor accessor = kafka.createAccessor(direct, createTestFamily(entity, storageUri, cfg(Pair.of(KafkaAccessor.ASSIGNMENT_TIMEOUT_MS, 1L), Pair.of(LocalKafkaCommitLogDescriptor.CFG_NUM_PARTITIONS, 3))));
    LocalKafkaWriter writer = accessor.newWriter();
    CommitLogReader reader = Optionals.get(accessor.getCommitLogReader(context()));
    long now = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
        StreamElement update = StreamElement.upsert(entity, attr, UUID.randomUUID().toString(), "key-" + i, attr.getName(), now + 2000, new byte[] { 1, 2 });
        // then we write single element
        writer.write(update, (succ, e) -> {
        });
    }
    CountDownLatch latch = new CountDownLatch(1);
    ObserveHandle handle = reader.observeBulk("test", Position.OLDEST, true, new CommitLogObserver() {

        int processed = 0;

        @Override
        public boolean onNext(StreamElement ingest, OnNextContext context) {
            if (++processed == 100) {
                context.confirm();
            }
            return true;
        }

        @Override
        public void onCompleted() {
            latch.countDown();
        }

        @Override
        public boolean onError(Throwable error) {
            throw new RuntimeException(error);
        }
    });
    latch.await();
    long offsetSum = handle.getCommittedOffsets().stream().mapToLong(o -> ((TopicOffset) o).getOffset()).sum();
    assertEquals(100, offsetSum);
    KafkaConsumer<Object, Object> consumer = ((LocalKafkaCommitLogDescriptor.LocalKafkaLogReader) reader).getConsumer();
    String topic = accessor.getTopic();
    assertEquals(100, consumer.committed(handle.getCommittedOffsets().stream().map(o -> new TopicPartition(topic, o.getPartition().getId())).collect(Collectors.toSet())).values().stream().mapToLong(OffsetAndMetadata::offset).sum());
}
Also used : Arrays(java.util.Arrays) LocalKafkaLogReader(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.LocalKafkaLogReader) LocalKafkaWriter(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.LocalKafkaWriter) Partition(cz.o2.proxima.storage.Partition) EntityDescriptor(cz.o2.proxima.repository.EntityDescriptor) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) CachedView(cz.o2.proxima.direct.view.CachedView) StreamElement(cz.o2.proxima.storage.StreamElement) WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Pair(cz.o2.proxima.util.Pair) Serde(org.apache.kafka.common.serialization.Serde) UnaryFunction(cz.o2.proxima.functional.UnaryFunction) Duration(java.time.Duration) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) URI(java.net.URI) WatermarkEstimatorFactory(cz.o2.proxima.time.WatermarkEstimatorFactory) Optionals(cz.o2.proxima.util.Optionals) WatermarkIdlePolicyFactory(cz.o2.proxima.time.WatermarkIdlePolicyFactory) TopicPartition(org.apache.kafka.common.TopicPartition) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) UUID(java.util.UUID) Accessor(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.Accessor) Collectors(java.util.stream.Collectors) RebalanceInProgressException(org.apache.kafka.common.errors.RebalanceInProgressException) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) CommitLogObservers(cz.o2.proxima.direct.commitlog.CommitLogObservers) Objects(java.util.Objects) CountDownLatch(java.util.concurrent.CountDownLatch) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) ConfigRepository(cz.o2.proxima.repository.ConfigRepository) Stream(java.util.stream.Stream) KeyValue(cz.o2.proxima.direct.randomaccess.KeyValue) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) OnNextContext(cz.o2.proxima.direct.commitlog.CommitLogObserver.OnNextContext) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Context(cz.o2.proxima.direct.core.Context) IntStream(java.util.stream.IntStream) TestUtils.createTestFamily(cz.o2.proxima.util.TestUtils.createTestFamily) KeyPartitioner(cz.o2.proxima.storage.commitlog.KeyPartitioner) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) ArgumentMatchers.anyMap(org.mockito.ArgumentMatchers.anyMap) HashMap(java.util.HashMap) OnlineAttributeWriter(cz.o2.proxima.direct.core.OnlineAttributeWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) ConfigFactory(com.typesafe.config.ConfigFactory) Watermarks(cz.o2.proxima.time.Watermarks) Factory(cz.o2.proxima.functional.Factory) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) WatermarkIdlePolicy(cz.o2.proxima.time.WatermarkIdlePolicy) Before(org.junit.Before) Properties(java.util.Properties) Repository(cz.o2.proxima.repository.Repository) Iterator(java.util.Iterator) AttributeDescriptor(cz.o2.proxima.repository.AttributeDescriptor) MoreObjects(com.google.common.base.MoreObjects) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) Test(org.junit.Test) AttributeFamilyDescriptor(cz.o2.proxima.repository.AttributeFamilyDescriptor) Offset(cz.o2.proxima.direct.commitlog.Offset) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) AtomicLong(java.util.concurrent.atomic.AtomicLong) AttributeDescriptorBase(cz.o2.proxima.repository.AttributeDescriptorBase) Partitioner(cz.o2.proxima.storage.commitlog.Partitioner) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) Position(cz.o2.proxima.storage.commitlog.Position) LocalKafkaLogReader(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.LocalKafkaLogReader) LocalKafkaWriter(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.LocalKafkaWriter) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) OnNextContext(cz.o2.proxima.direct.commitlog.CommitLogObserver.OnNextContext) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) StreamElement(cz.o2.proxima.storage.StreamElement) CountDownLatch(java.util.concurrent.CountDownLatch) Accessor(cz.o2.proxima.direct.kafka.LocalKafkaCommitLogDescriptor.Accessor) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Test(org.junit.Test)

Aggregations

StreamElement (cz.o2.proxima.storage.StreamElement)25 Offset (cz.o2.proxima.direct.commitlog.Offset)20 ArrayList (java.util.ArrayList)20 ObserveHandle (cz.o2.proxima.direct.commitlog.ObserveHandle)18 Test (org.junit.Test)18 CommitLogObserver (cz.o2.proxima.direct.commitlog.CommitLogObserver)17 CommitLogReader (cz.o2.proxima.direct.commitlog.CommitLogReader)15 Partition (cz.o2.proxima.storage.Partition)15 HashMap (java.util.HashMap)14 List (java.util.List)14 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)13 AtomicReference (java.util.concurrent.atomic.AtomicReference)13 WatermarkEstimator (cz.o2.proxima.time.WatermarkEstimator)12 CountDownLatch (java.util.concurrent.CountDownLatch)12 UUID (java.util.UUID)11 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)11 Collectors (java.util.stream.Collectors)11 HashSet (java.util.HashSet)10 Set (java.util.Set)10 AttributeDescriptor (cz.o2.proxima.repository.AttributeDescriptor)9