Search in sources :

Example 1 with Partitioner

use of cz.o2.proxima.storage.commitlog.Partitioner in project proxima-platform by O2-Czech-Republic.

the class KafkaLogReaderIT method writePoisonedPills.

/**
 * Write poisoned pills (element with timestamp = {@link Watermarks#MAX_WATERMARK}) to all
 * partitions.
 *
 * @param numPartitions Number of partitions in topic.
 * @return Completion latch.
 */
private CountDownLatch writePoisonedPills(int numPartitions) {
    final OnlineAttributeWriter writer = Optionals.get(operator.getWriter(fooDescriptor));
    // We assume test uses default partitioner.
    final KeyPartitioner keyPartitioner = new KeyPartitioner();
    final Set<Integer> poisonedPartitions = new HashSet<>();
    final CountDownLatch done = new CountDownLatch(numPartitions);
    for (int i = 0; poisonedPartitions.size() < numPartitions; i++) {
        final StreamElement poisonedPill = StreamElement.upsert(entity, fooDescriptor, UUID.randomUUID().toString(), String.format("poisoned-pill-%d", i), fooDescriptor.getName(), Watermarks.MAX_WATERMARK, "value".getBytes(StandardCharsets.UTF_8));
        final int partition = (keyPartitioner.getPartitionId(poisonedPill) & Integer.MAX_VALUE) % numPartitions;
        if (poisonedPartitions.add(partition)) {
            writer.write(poisonedPill, ((success, error) -> {
                if (success) {
                    done.countDown();
                }
            }));
        }
    }
    return done;
}
Also used : Arrays(java.util.Arrays) Partition(cz.o2.proxima.storage.Partition) ConfigValueFactory(com.typesafe.config.ConfigValueFactory) EntityDescriptor(cz.o2.proxima.repository.EntityDescriptor) Random(java.util.Random) ExceptionUtils(cz.o2.proxima.util.ExceptionUtils) StreamElement(cz.o2.proxima.storage.StreamElement) Map(java.util.Map) Optionals(cz.o2.proxima.util.Optionals) KafkaStreamElementSerializer(cz.o2.proxima.direct.kafka.KafkaStreamElement.KafkaStreamElementSerializer) Set(java.util.Set) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) UUID(java.util.UUID) ConfigResolveOptions(com.typesafe.config.ConfigResolveOptions) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Objects(java.util.Objects) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) Pattern(java.util.regex.Pattern) KeyPartitioner(cz.o2.proxima.storage.commitlog.KeyPartitioner) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) HashMap(java.util.HashMap) OnlineAttributeWriter(cz.o2.proxima.direct.core.OnlineAttributeWriter) EmbeddedKafkaBroker(org.springframework.kafka.test.EmbeddedKafkaBroker) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) ConfigFactory(com.typesafe.config.ConfigFactory) Watermarks(cz.o2.proxima.time.Watermarks) ObserveHandleUtils(cz.o2.proxima.direct.commitlog.ObserveHandleUtils) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) Before(org.junit.Before) Properties(java.util.Properties) Repository(cz.o2.proxima.repository.Repository) Config(com.typesafe.config.Config) AttributeDescriptor(cz.o2.proxima.repository.AttributeDescriptor) NewTopic(org.apache.kafka.clients.admin.NewTopic) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) Test(org.junit.Test) TimeUnit(java.util.concurrent.TimeUnit) EmbeddedKafkaRule(org.springframework.kafka.test.rule.EmbeddedKafkaRule) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Assert(org.junit.Assert) Collections(java.util.Collections) Position(cz.o2.proxima.storage.commitlog.Position) KeyPartitioner(cz.o2.proxima.storage.commitlog.KeyPartitioner) OnlineAttributeWriter(cz.o2.proxima.direct.core.OnlineAttributeWriter) StreamElement(cz.o2.proxima.storage.StreamElement) CountDownLatch(java.util.concurrent.CountDownLatch) HashSet(java.util.HashSet)

Example 2 with Partitioner

use of cz.o2.proxima.storage.commitlog.Partitioner in project proxima-platform by O2-Czech-Republic.

the class BatchLogSourceFunctionTest method testSnapshotAndRestore.

private void testSnapshotAndRestore(int numSubtasks, int numRestoredSubtasks) throws Exception {
    final Repository repository = Repository.ofTest(ConfigFactory.parseString(MODEL));
    final AttributeDescriptor<?> attributeDescriptor = repository.getEntity("test").getAttribute("data");
    final Instant now = Instant.now();
    final int numCommitLogPartitions = 30;
    final int numElements = 10_000;
    final Partitioner partitioner = new KeyAttributePartitioner();
    final Map<Integer, Integer> expectedElements = new HashMap<>();
    final Map<Integer, List<StreamElement>> partitionElements = new HashMap<>();
    final List<StreamElement> emittedElements = new ArrayList<>();
    for (int i = 0; i < numElements; i++) {
        final StreamElement element = newData(repository, "key_" + i, now, "value_" + i);
        emittedElements.add(element);
        final int partitionId = Partitioners.getTruncatedPartitionId(partitioner, element, numCommitLogPartitions);
        final int subtaskId = partitionId % numSubtasks;
        partitionElements.computeIfAbsent(partitionId, ArrayList::new).add(element);
        expectedElements.merge(subtaskId, 1, Integer::sum);
    }
    final List<StreamElement> result = Collections.synchronizedList(new ArrayList<>());
    final List<OperatorSubtaskState> snapshots = new ArrayList<>();
    // Run the first iteration - clean state. We subtract random number of elements from each
    // subTask, that we'll process in the second iteration.
    int subtractTotal = 0;
    for (int subtaskIndex = 0; subtaskIndex < numSubtasks; subtaskIndex++) {
        int numExpectedElements = expectedElements.getOrDefault(subtaskIndex, 0);
        if (numExpectedElements > 0) {
            final int subtractCurrent = RANDOM.nextInt(numExpectedElements);
            numExpectedElements -= subtractCurrent;
            subtractTotal += subtractCurrent;
        }
        snapshots.add(runSubtask(repository, attributeDescriptor, null, result::add, numSubtasks, subtaskIndex, numExpectedElements, partitionElements.entrySet().stream().sorted(Comparator.comparingInt(Map.Entry::getKey)).map(Map.Entry::getValue).collect(Collectors.toList())));
    }
    Assertions.assertEquals(numElements - subtractTotal, result.size());
    final OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(snapshots.toArray(new OperatorSubtaskState[0]));
    // Run the second iteration - restored from snapshot.
    for (int subtaskIndex = 0; subtaskIndex < numRestoredSubtasks; subtaskIndex++) {
        runSubtask(repository, attributeDescriptor, mergedState, result::add, numRestoredSubtasks, subtaskIndex, -1, partitionElements.entrySet().stream().sorted(Comparator.comparingInt(Map.Entry::getKey)).map(Map.Entry::getValue).collect(Collectors.toList()));
    }
    final List<String> expectedKeys = emittedElements.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
    final List<String> receivedKeys = result.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
    Assertions.assertEquals(expectedKeys.size(), receivedKeys.size());
    Assertions.assertEquals(expectedKeys, receivedKeys);
}
Also used : HashMap(java.util.HashMap) Instant(java.time.Instant) ArrayList(java.util.ArrayList) StreamElement(cz.o2.proxima.storage.StreamElement) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Repository(cz.o2.proxima.repository.Repository) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) Partitioner(cz.o2.proxima.storage.commitlog.Partitioner)

Example 3 with Partitioner

use of cz.o2.proxima.storage.commitlog.Partitioner in project proxima-platform by O2-Czech-Republic.

the class InMemStorageTest method testObserveSinglePartitionOutOfMultiplePartitions.

@Test
public void testObserveSinglePartitionOutOfMultiplePartitions() throws InterruptedException {
    final int numPartitions = 3;
    final InMemStorage storage = new InMemStorage();
    final DataAccessor accessor = storage.createAccessor(direct, createFamilyDescriptor(URI.create("inmem:///test"), numPartitions));
    final CommitLogReader reader = Optionals.get(accessor.getCommitLogReader(direct.getContext()));
    final AttributeWriterBase writer = Optionals.get(accessor.getWriter(direct.getContext()));
    final int numElements = 999;
    final ConcurrentMap<Partition, Long> partitionHistogram = new ConcurrentHashMap<>();
    // Elements are uniformly distributed between partitions.
    final CountDownLatch elementsReceived = new CountDownLatch(numElements / numPartitions);
    // Start observer.
    final List<Partition> consumedPartitions = reader.getPartitions().subList(0, 1);
    final ObserveHandle observeHandle = reader.observePartitions(reader.getPartitions().subList(0, 1), new CommitLogObserver() {

        @Override
        public void onRepartition(OnRepartitionContext context) {
            assertEquals(numPartitions, context.partitions().size());
        }

        @Override
        public boolean onNext(StreamElement ingest, OnNextContext context) {
            partitionHistogram.merge(context.getPartition(), 1L, Long::sum);
            context.confirm();
            elementsReceived.countDown();
            return elementsReceived.getCount() > 0;
        }

        @Override
        public boolean onError(Throwable error) {
            throw new RuntimeException(error);
        }
    });
    // Write data.
    final Partitioner partitioner = new KeyAttributePartitioner();
    final Map<Partition, Long> expectedPartitionHistogram = new HashMap<>();
    for (int i = 0; i < numElements; i++) {
        final StreamElement element = StreamElement.upsert(entity, data, UUID.randomUUID().toString(), "key_" + i, data.getName(), System.currentTimeMillis(), new byte[] { 1, 2, 3 });
        expectedPartitionHistogram.merge(Partition.of(Partitioners.getTruncatedPartitionId(partitioner, element, numPartitions)), 1L, Long::sum);
        writer.online().write(element, CommitCallback.noop());
    }
    assertEquals(3, expectedPartitionHistogram.size());
    // Wait for all elements to be received.
    elementsReceived.await();
    assertEquals(1, partitionHistogram.size());
    assertEquals(1, observeHandle.getCurrentOffsets().size());
    assertEquals(expectedPartitionHistogram.get(Iterables.getOnlyElement(consumedPartitions)), partitionHistogram.get(Iterables.getOnlyElement(consumedPartitions)));
}
Also used : Partition(cz.o2.proxima.storage.Partition) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) DataAccessor(cz.o2.proxima.direct.core.DataAccessor) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) AttributeWriterBase(cz.o2.proxima.direct.core.AttributeWriterBase) StreamElement(cz.o2.proxima.storage.StreamElement) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) CountDownLatch(java.util.concurrent.CountDownLatch) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) Partitioner(cz.o2.proxima.storage.commitlog.Partitioner) Test(org.junit.Test)

Example 4 with Partitioner

use of cz.o2.proxima.storage.commitlog.Partitioner in project proxima-platform by O2-Czech-Republic.

the class InMemStorage method createAccessor.

@Override
public DataAccessor createAccessor(DirectDataOperator op, AttributeFamilyDescriptor familyDescriptor) {
    final EntityDescriptor entity = familyDescriptor.getEntity();
    final URI uri = familyDescriptor.getStorageUri();
    final Map<String, Object> cfg = familyDescriptor.getCfg();
    log.info("Creating accessor {} for URI {}", getClass(), uri);
    holder().observers.computeIfAbsent(uri, k -> Collections.synchronizedNavigableMap(new TreeMap<>()));
    final int numPartitions = Optional.ofNullable(cfg.get(NUM_PARTITIONS)).map(v -> Integer.parseInt(v.toString())).orElse(1);
    final Partitioner partitioner = Optional.ofNullable(cfg.get(ConfigConstants.PARTITIONER)).map(name -> Classpath.newInstance(name.toString(), Partitioner.class)).orElseGet(KeyAttributePartitioner::new);
    final Repository opRepo = op.getRepository();
    final RepositoryFactory repositoryFactory = opRepo.asFactory();
    final OnlineAttributeWriter.Factory<?> writerFactory = new Writer(entity, uri, numPartitions, partitioner).asFactory();
    final CommitLogReader.Factory<?> commitLogReaderFactory = new InMemCommitLogReader(entity, uri, op.getContext().getExecutorFactory(), partitioner, numPartitions).asFactory();
    final RandomAccessReader.Factory<Reader> randomAccessReaderFactory;
    final BatchLogReader.Factory<Reader> batchLogReaderFactory;
    final CachedView.Factory cachedViewFactory;
    if (numPartitions > 1) {
        randomAccessReaderFactory = null;
        batchLogReaderFactory = null;
        cachedViewFactory = null;
    } else {
        final ReaderFactory readerFactory = new Reader(entity, uri, op.getContext().getExecutorFactory()).asFactory();
        randomAccessReaderFactory = readerFactory;
        batchLogReaderFactory = readerFactory;
        cachedViewFactory = new LocalCachedPartitionedView(entity, commitLogReaderFactory.apply(opRepo), writerFactory.apply(opRepo)).asFactory();
    }
    return new DataAccessor() {

        private static final long serialVersionUID = 1L;

        @Nullable
        private transient Repository repo = opRepo;

        @Override
        public URI getUri() {
            return uri;
        }

        @Override
        public Optional<AttributeWriterBase> getWriter(Context context) {
            Objects.requireNonNull(context);
            return Optional.of(writerFactory.apply(repo()));
        }

        @Override
        public Optional<CommitLogReader> getCommitLogReader(Context context) {
            Objects.requireNonNull(context);
            return Optional.of(commitLogReaderFactory.apply(repo()));
        }

        @Override
        public Optional<RandomAccessReader> getRandomAccessReader(Context context) {
            Objects.requireNonNull(context);
            return Optional.ofNullable(randomAccessReaderFactory).map(item -> item.apply(repo()));
        }

        @Override
        public Optional<CachedView> getCachedView(Context context) {
            Objects.requireNonNull(context);
            return Optional.ofNullable(cachedViewFactory).map(item -> item.apply(repo()));
        }

        @Override
        public Optional<BatchLogReader> getBatchLogReader(Context context) {
            Objects.requireNonNull(context);
            return Optional.ofNullable(batchLogReaderFactory).map(item -> item.apply(repo()));
        }

        private Repository repo() {
            if (this.repo == null) {
                this.repo = repositoryFactory.apply();
            }
            return this.repo;
        }
    };
}
Also used : Partitioners(cz.o2.proxima.storage.commitlog.Partitioners) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) ConfigConstants(cz.o2.proxima.repository.ConfigConstants) RepositoryFactory(cz.o2.proxima.repository.RepositoryFactory) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) Future(java.util.concurrent.Future) WatermarkEstimator(cz.o2.proxima.time.WatermarkEstimator) Pair(cz.o2.proxima.util.Pair) Map(java.util.Map) Optionals(cz.o2.proxima.util.Optionals) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) Set(java.util.Set) ObserveHandle(cz.o2.proxima.direct.commitlog.ObserveHandle) Serializable(java.io.Serializable) CountDownLatch(java.util.concurrent.CountDownLatch) Slf4j(lombok.extern.slf4j.Slf4j) KeyValue(cz.o2.proxima.direct.randomaccess.KeyValue) DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) AbstractOnlineAttributeWriter(cz.o2.proxima.direct.core.AbstractOnlineAttributeWriter) Consumer(cz.o2.proxima.functional.Consumer) OffsetCommitter(cz.o2.proxima.direct.commitlog.CommitLogObserver.OffsetCommitter) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) Supplier(java.util.function.Supplier) ObserverUtils(cz.o2.proxima.direct.commitlog.ObserverUtils) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) DataAccessor(cz.o2.proxima.direct.core.DataAccessor) RawOffset(cz.o2.proxima.direct.randomaccess.RawOffset) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Watermarks(cz.o2.proxima.time.Watermarks) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) Nullable(javax.annotation.Nullable) WatermarkIdlePolicy(cz.o2.proxima.time.WatermarkIdlePolicy) AbstractStorage(cz.o2.proxima.storage.AbstractStorage) MoreObjects(com.google.common.base.MoreObjects) CommitLogObserver(cz.o2.proxima.direct.commitlog.CommitLogObserver) AtomicLong(java.util.concurrent.atomic.AtomicLong) Partitioner(cz.o2.proxima.storage.commitlog.Partitioner) Lock(java.util.concurrent.locks.Lock) TreeMap(java.util.TreeMap) AutoService(com.google.auto.service.AutoService) Preconditions(com.google.common.base.Preconditions) Position(cz.o2.proxima.storage.commitlog.Position) ScheduledFuture(java.util.concurrent.ScheduledFuture) AttributeWriterBase(cz.o2.proxima.direct.core.AttributeWriterBase) Partition(cz.o2.proxima.storage.Partition) EntityDescriptor(cz.o2.proxima.repository.EntityDescriptor) TerminationContext(cz.o2.proxima.direct.batch.TerminationContext) CachedView(cz.o2.proxima.direct.view.CachedView) StreamElement(cz.o2.proxima.storage.StreamElement) PartitionedWatermarkEstimator(cz.o2.proxima.time.PartitionedWatermarkEstimator) SerializationException(cz.o2.proxima.scheme.SerializationException) URI(java.net.URI) TypeReference(com.fasterxml.jackson.core.type.TypeReference) OffsetExternalizer(cz.o2.proxima.direct.commitlog.OffsetExternalizer) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) UUID(java.util.UUID) NavigableMap(java.util.NavigableMap) CommitCallback(cz.o2.proxima.direct.core.CommitCallback) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) OnNextContext(cz.o2.proxima.direct.batch.BatchLogObserver.OnNextContext) Entry(java.util.Map.Entry) Optional(java.util.Optional) MinimalPartitionWatermarkEstimator(cz.o2.proxima.direct.time.MinimalPartitionWatermarkEstimator) RandomOffset(cz.o2.proxima.direct.randomaccess.RandomOffset) SortedMap(java.util.SortedMap) ObserverUtils.asRepartitionContext(cz.o2.proxima.direct.commitlog.ObserverUtils.asRepartitionContext) Context(cz.o2.proxima.direct.core.Context) LocalCachedPartitionedView(cz.o2.proxima.direct.view.LocalCachedPartitionedView) Getter(lombok.Getter) BoundedOutOfOrdernessWatermarkEstimator(cz.o2.proxima.direct.time.BoundedOutOfOrdernessWatermarkEstimator) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) OnlineAttributeWriter(cz.o2.proxima.direct.core.OnlineAttributeWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) BatchLogObservers(cz.o2.proxima.direct.batch.BatchLogObservers) HashSet(java.util.HashSet) Factory(cz.o2.proxima.functional.Factory) RandomAccessReader(cz.o2.proxima.direct.randomaccess.RandomAccessReader) ExecutorService(java.util.concurrent.ExecutorService) Classpath(cz.o2.proxima.util.Classpath) Repository(cz.o2.proxima.repository.Repository) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) AttributeDescriptor(cz.o2.proxima.repository.AttributeDescriptor) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) BatchLogObserver(cz.o2.proxima.direct.batch.BatchLogObserver) AttributeFamilyDescriptor(cz.o2.proxima.repository.AttributeFamilyDescriptor) Offset(cz.o2.proxima.direct.commitlog.Offset) TimeUnit(java.util.concurrent.TimeUnit) DataAccessorFactory(cz.o2.proxima.direct.core.DataAccessorFactory) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) CachedView(cz.o2.proxima.direct.view.CachedView) DataAccessor(cz.o2.proxima.direct.core.DataAccessor) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) CommitLogReader(cz.o2.proxima.direct.commitlog.CommitLogReader) RandomAccessReader(cz.o2.proxima.direct.randomaccess.RandomAccessReader) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) URI(java.net.URI) RandomAccessReader(cz.o2.proxima.direct.randomaccess.RandomAccessReader) AbstractOnlineAttributeWriter(cz.o2.proxima.direct.core.AbstractOnlineAttributeWriter) OnlineAttributeWriter(cz.o2.proxima.direct.core.OnlineAttributeWriter) RepositoryFactory(cz.o2.proxima.repository.RepositoryFactory) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) Partitioner(cz.o2.proxima.storage.commitlog.Partitioner) TerminationContext(cz.o2.proxima.direct.batch.TerminationContext) OnNextContext(cz.o2.proxima.direct.batch.BatchLogObserver.OnNextContext) ObserverUtils.asRepartitionContext(cz.o2.proxima.direct.commitlog.ObserverUtils.asRepartitionContext) Context(cz.o2.proxima.direct.core.Context) AttributeWriterBase(cz.o2.proxima.direct.core.AttributeWriterBase) TreeMap(java.util.TreeMap) EntityDescriptor(cz.o2.proxima.repository.EntityDescriptor) Repository(cz.o2.proxima.repository.Repository) LocalCachedPartitionedView(cz.o2.proxima.direct.view.LocalCachedPartitionedView) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) AbstractOnlineAttributeWriter(cz.o2.proxima.direct.core.AbstractOnlineAttributeWriter) OnlineAttributeWriter(cz.o2.proxima.direct.core.OnlineAttributeWriter)

Example 5 with Partitioner

use of cz.o2.proxima.storage.commitlog.Partitioner in project proxima-platform by O2-Czech-Republic.

the class CommitLogSourceFunctionTest method testSnapshotAndRestore.

private void testSnapshotAndRestore(int numSubtasks, int numRestoredSubtasks) throws Exception {
    final Repository repository = Repository.ofTest(ConfigFactory.parseString(MODEL));
    final DirectDataOperator direct = repository.getOrCreateOperator(DirectDataOperator.class);
    final AttributeDescriptor<?> attributeDescriptor = repository.getEntity("test").getAttribute("data");
    final Instant now = Instant.now();
    final OnlineAttributeWriter writer = Optionals.get(direct.getWriter(attributeDescriptor));
    final int numCommitLogPartitions = 3;
    final int numElements = 1000;
    final Partitioner partitioner = new KeyAttributePartitioner();
    final Map<Integer, Integer> partitionElements = new HashMap<>();
    final List<StreamElement> emittedElements = new ArrayList<>();
    for (int i = 0; i < numElements; i++) {
        final StreamElement element = newData(repository, "key_" + i, now, "value_" + i);
        emittedElements.add(element);
        partitionElements.merge(Partitioners.getTruncatedPartitionId(partitioner, element, Math.min(numCommitLogPartitions, numSubtasks)), 1, Integer::sum);
        writer.write(element, CommitCallback.noop());
    }
    final List<StreamElement> result = Collections.synchronizedList(new ArrayList<>());
    final List<OperatorSubtaskState> snapshots = new ArrayList<>();
    // Run first iteration - clean state.
    for (int subtaskIndex = 0; subtaskIndex < numSubtasks; subtaskIndex++) {
        final int expectedElements = partitionElements.getOrDefault(subtaskIndex, 0);
        snapshots.add(runSubtask(repository, attributeDescriptor, null, result::add, numSubtasks, subtaskIndex, expectedElements));
    }
    final OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(snapshots.toArray(new OperatorSubtaskState[0]));
    // Run second iteration - restored from snapshot.
    partitionElements.clear();
    for (int i = 0; i < numElements; i++) {
        final StreamElement element = newData(repository, "second_key_" + i, now, "value_" + i);
        emittedElements.add(element);
        partitionElements.merge(Partitioners.getTruncatedPartitionId(partitioner, element, Math.min(numCommitLogPartitions, numRestoredSubtasks)), 1, Integer::sum);
        writer.write(element, CommitCallback.noop());
    }
    Assertions.assertEquals(1000, result.size());
    for (int subtaskIndex = 0; subtaskIndex < numRestoredSubtasks; subtaskIndex++) {
        final int expectedElements = partitionElements.getOrDefault(subtaskIndex, 0);
        runSubtask(repository, attributeDescriptor, mergedState, result::add, numRestoredSubtasks, subtaskIndex, expectedElements);
    }
    final List<String> expectedKeys = emittedElements.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
    final List<String> receivedKeys = result.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
    Assertions.assertEquals(expectedKeys, receivedKeys);
}
Also used : HashMap(java.util.HashMap) Instant(java.time.Instant) ArrayList(java.util.ArrayList) StreamElement(cz.o2.proxima.storage.StreamElement) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) Repository(cz.o2.proxima.repository.Repository) OnlineAttributeWriter(cz.o2.proxima.direct.core.OnlineAttributeWriter) KeyAttributePartitioner(cz.o2.proxima.storage.commitlog.KeyAttributePartitioner) Partitioner(cz.o2.proxima.storage.commitlog.Partitioner)

Aggregations

StreamElement (cz.o2.proxima.storage.StreamElement)6 HashMap (java.util.HashMap)6 Repository (cz.o2.proxima.repository.Repository)4 KeyAttributePartitioner (cz.o2.proxima.storage.commitlog.KeyAttributePartitioner)4 Partitioner (cz.o2.proxima.storage.commitlog.Partitioner)4 ArrayList (java.util.ArrayList)4 CommitLogObserver (cz.o2.proxima.direct.commitlog.CommitLogObserver)3 CommitLogReader (cz.o2.proxima.direct.commitlog.CommitLogReader)3 ObserveHandle (cz.o2.proxima.direct.commitlog.ObserveHandle)3 DirectDataOperator (cz.o2.proxima.direct.core.DirectDataOperator)3 OnlineAttributeWriter (cz.o2.proxima.direct.core.OnlineAttributeWriter)3 Partition (cz.o2.proxima.storage.Partition)3 List (java.util.List)3 Map (java.util.Map)3 CountDownLatch (java.util.concurrent.CountDownLatch)3 Test (org.junit.Test)3 Lists (com.google.common.collect.Lists)2 AttributeWriterBase (cz.o2.proxima.direct.core.AttributeWriterBase)2 DataAccessor (cz.o2.proxima.direct.core.DataAccessor)2 AttributeDescriptor (cz.o2.proxima.repository.AttributeDescriptor)2