use of cz.o2.proxima.storage.commitlog.KeyAttributePartitioner in project proxima-platform by O2-Czech-Republic.
the class BatchLogSourceFunctionTest method testSnapshotAndRestore.
private void testSnapshotAndRestore(int numSubtasks, int numRestoredSubtasks) throws Exception {
final Repository repository = Repository.ofTest(ConfigFactory.parseString(MODEL));
final AttributeDescriptor<?> attributeDescriptor = repository.getEntity("test").getAttribute("data");
final Instant now = Instant.now();
final int numCommitLogPartitions = 30;
final int numElements = 10_000;
final Partitioner partitioner = new KeyAttributePartitioner();
final Map<Integer, Integer> expectedElements = new HashMap<>();
final Map<Integer, List<StreamElement>> partitionElements = new HashMap<>();
final List<StreamElement> emittedElements = new ArrayList<>();
for (int i = 0; i < numElements; i++) {
final StreamElement element = newData(repository, "key_" + i, now, "value_" + i);
emittedElements.add(element);
final int partitionId = Partitioners.getTruncatedPartitionId(partitioner, element, numCommitLogPartitions);
final int subtaskId = partitionId % numSubtasks;
partitionElements.computeIfAbsent(partitionId, ArrayList::new).add(element);
expectedElements.merge(subtaskId, 1, Integer::sum);
}
final List<StreamElement> result = Collections.synchronizedList(new ArrayList<>());
final List<OperatorSubtaskState> snapshots = new ArrayList<>();
// Run the first iteration - clean state. We subtract random number of elements from each
// subTask, that we'll process in the second iteration.
int subtractTotal = 0;
for (int subtaskIndex = 0; subtaskIndex < numSubtasks; subtaskIndex++) {
int numExpectedElements = expectedElements.getOrDefault(subtaskIndex, 0);
if (numExpectedElements > 0) {
final int subtractCurrent = RANDOM.nextInt(numExpectedElements);
numExpectedElements -= subtractCurrent;
subtractTotal += subtractCurrent;
}
snapshots.add(runSubtask(repository, attributeDescriptor, null, result::add, numSubtasks, subtaskIndex, numExpectedElements, partitionElements.entrySet().stream().sorted(Comparator.comparingInt(Map.Entry::getKey)).map(Map.Entry::getValue).collect(Collectors.toList())));
}
Assertions.assertEquals(numElements - subtractTotal, result.size());
final OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(snapshots.toArray(new OperatorSubtaskState[0]));
// Run the second iteration - restored from snapshot.
for (int subtaskIndex = 0; subtaskIndex < numRestoredSubtasks; subtaskIndex++) {
runSubtask(repository, attributeDescriptor, mergedState, result::add, numRestoredSubtasks, subtaskIndex, -1, partitionElements.entrySet().stream().sorted(Comparator.comparingInt(Map.Entry::getKey)).map(Map.Entry::getValue).collect(Collectors.toList()));
}
final List<String> expectedKeys = emittedElements.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
final List<String> receivedKeys = result.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
Assertions.assertEquals(expectedKeys.size(), receivedKeys.size());
Assertions.assertEquals(expectedKeys, receivedKeys);
}
use of cz.o2.proxima.storage.commitlog.KeyAttributePartitioner in project proxima-platform by O2-Czech-Republic.
the class InMemStorageTest method testObserveSinglePartitionOutOfMultiplePartitions.
@Test
public void testObserveSinglePartitionOutOfMultiplePartitions() throws InterruptedException {
final int numPartitions = 3;
final InMemStorage storage = new InMemStorage();
final DataAccessor accessor = storage.createAccessor(direct, createFamilyDescriptor(URI.create("inmem:///test"), numPartitions));
final CommitLogReader reader = Optionals.get(accessor.getCommitLogReader(direct.getContext()));
final AttributeWriterBase writer = Optionals.get(accessor.getWriter(direct.getContext()));
final int numElements = 999;
final ConcurrentMap<Partition, Long> partitionHistogram = new ConcurrentHashMap<>();
// Elements are uniformly distributed between partitions.
final CountDownLatch elementsReceived = new CountDownLatch(numElements / numPartitions);
// Start observer.
final List<Partition> consumedPartitions = reader.getPartitions().subList(0, 1);
final ObserveHandle observeHandle = reader.observePartitions(reader.getPartitions().subList(0, 1), new CommitLogObserver() {
@Override
public void onRepartition(OnRepartitionContext context) {
assertEquals(numPartitions, context.partitions().size());
}
@Override
public boolean onNext(StreamElement ingest, OnNextContext context) {
partitionHistogram.merge(context.getPartition(), 1L, Long::sum);
context.confirm();
elementsReceived.countDown();
return elementsReceived.getCount() > 0;
}
@Override
public boolean onError(Throwable error) {
throw new RuntimeException(error);
}
});
// Write data.
final Partitioner partitioner = new KeyAttributePartitioner();
final Map<Partition, Long> expectedPartitionHistogram = new HashMap<>();
for (int i = 0; i < numElements; i++) {
final StreamElement element = StreamElement.upsert(entity, data, UUID.randomUUID().toString(), "key_" + i, data.getName(), System.currentTimeMillis(), new byte[] { 1, 2, 3 });
expectedPartitionHistogram.merge(Partition.of(Partitioners.getTruncatedPartitionId(partitioner, element, numPartitions)), 1L, Long::sum);
writer.online().write(element, CommitCallback.noop());
}
assertEquals(3, expectedPartitionHistogram.size());
// Wait for all elements to be received.
elementsReceived.await();
assertEquals(1, partitionHistogram.size());
assertEquals(1, observeHandle.getCurrentOffsets().size());
assertEquals(expectedPartitionHistogram.get(Iterables.getOnlyElement(consumedPartitions)), partitionHistogram.get(Iterables.getOnlyElement(consumedPartitions)));
}
use of cz.o2.proxima.storage.commitlog.KeyAttributePartitioner in project proxima-platform by O2-Czech-Republic.
the class InMemStorage method createAccessor.
@Override
public DataAccessor createAccessor(DirectDataOperator op, AttributeFamilyDescriptor familyDescriptor) {
final EntityDescriptor entity = familyDescriptor.getEntity();
final URI uri = familyDescriptor.getStorageUri();
final Map<String, Object> cfg = familyDescriptor.getCfg();
log.info("Creating accessor {} for URI {}", getClass(), uri);
holder().observers.computeIfAbsent(uri, k -> Collections.synchronizedNavigableMap(new TreeMap<>()));
final int numPartitions = Optional.ofNullable(cfg.get(NUM_PARTITIONS)).map(v -> Integer.parseInt(v.toString())).orElse(1);
final Partitioner partitioner = Optional.ofNullable(cfg.get(ConfigConstants.PARTITIONER)).map(name -> Classpath.newInstance(name.toString(), Partitioner.class)).orElseGet(KeyAttributePartitioner::new);
final Repository opRepo = op.getRepository();
final RepositoryFactory repositoryFactory = opRepo.asFactory();
final OnlineAttributeWriter.Factory<?> writerFactory = new Writer(entity, uri, numPartitions, partitioner).asFactory();
final CommitLogReader.Factory<?> commitLogReaderFactory = new InMemCommitLogReader(entity, uri, op.getContext().getExecutorFactory(), partitioner, numPartitions).asFactory();
final RandomAccessReader.Factory<Reader> randomAccessReaderFactory;
final BatchLogReader.Factory<Reader> batchLogReaderFactory;
final CachedView.Factory cachedViewFactory;
if (numPartitions > 1) {
randomAccessReaderFactory = null;
batchLogReaderFactory = null;
cachedViewFactory = null;
} else {
final ReaderFactory readerFactory = new Reader(entity, uri, op.getContext().getExecutorFactory()).asFactory();
randomAccessReaderFactory = readerFactory;
batchLogReaderFactory = readerFactory;
cachedViewFactory = new LocalCachedPartitionedView(entity, commitLogReaderFactory.apply(opRepo), writerFactory.apply(opRepo)).asFactory();
}
return new DataAccessor() {
private static final long serialVersionUID = 1L;
@Nullable
private transient Repository repo = opRepo;
@Override
public URI getUri() {
return uri;
}
@Override
public Optional<AttributeWriterBase> getWriter(Context context) {
Objects.requireNonNull(context);
return Optional.of(writerFactory.apply(repo()));
}
@Override
public Optional<CommitLogReader> getCommitLogReader(Context context) {
Objects.requireNonNull(context);
return Optional.of(commitLogReaderFactory.apply(repo()));
}
@Override
public Optional<RandomAccessReader> getRandomAccessReader(Context context) {
Objects.requireNonNull(context);
return Optional.ofNullable(randomAccessReaderFactory).map(item -> item.apply(repo()));
}
@Override
public Optional<CachedView> getCachedView(Context context) {
Objects.requireNonNull(context);
return Optional.ofNullable(cachedViewFactory).map(item -> item.apply(repo()));
}
@Override
public Optional<BatchLogReader> getBatchLogReader(Context context) {
Objects.requireNonNull(context);
return Optional.ofNullable(batchLogReaderFactory).map(item -> item.apply(repo()));
}
private Repository repo() {
if (this.repo == null) {
this.repo = repositoryFactory.apply();
}
return this.repo;
}
};
}
use of cz.o2.proxima.storage.commitlog.KeyAttributePartitioner in project proxima-platform by O2-Czech-Republic.
the class CommitLogSourceFunctionTest method testSnapshotAndRestore.
private void testSnapshotAndRestore(int numSubtasks, int numRestoredSubtasks) throws Exception {
final Repository repository = Repository.ofTest(ConfigFactory.parseString(MODEL));
final DirectDataOperator direct = repository.getOrCreateOperator(DirectDataOperator.class);
final AttributeDescriptor<?> attributeDescriptor = repository.getEntity("test").getAttribute("data");
final Instant now = Instant.now();
final OnlineAttributeWriter writer = Optionals.get(direct.getWriter(attributeDescriptor));
final int numCommitLogPartitions = 3;
final int numElements = 1000;
final Partitioner partitioner = new KeyAttributePartitioner();
final Map<Integer, Integer> partitionElements = new HashMap<>();
final List<StreamElement> emittedElements = new ArrayList<>();
for (int i = 0; i < numElements; i++) {
final StreamElement element = newData(repository, "key_" + i, now, "value_" + i);
emittedElements.add(element);
partitionElements.merge(Partitioners.getTruncatedPartitionId(partitioner, element, Math.min(numCommitLogPartitions, numSubtasks)), 1, Integer::sum);
writer.write(element, CommitCallback.noop());
}
final List<StreamElement> result = Collections.synchronizedList(new ArrayList<>());
final List<OperatorSubtaskState> snapshots = new ArrayList<>();
// Run first iteration - clean state.
for (int subtaskIndex = 0; subtaskIndex < numSubtasks; subtaskIndex++) {
final int expectedElements = partitionElements.getOrDefault(subtaskIndex, 0);
snapshots.add(runSubtask(repository, attributeDescriptor, null, result::add, numSubtasks, subtaskIndex, expectedElements));
}
final OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(snapshots.toArray(new OperatorSubtaskState[0]));
// Run second iteration - restored from snapshot.
partitionElements.clear();
for (int i = 0; i < numElements; i++) {
final StreamElement element = newData(repository, "second_key_" + i, now, "value_" + i);
emittedElements.add(element);
partitionElements.merge(Partitioners.getTruncatedPartitionId(partitioner, element, Math.min(numCommitLogPartitions, numRestoredSubtasks)), 1, Integer::sum);
writer.write(element, CommitCallback.noop());
}
Assertions.assertEquals(1000, result.size());
for (int subtaskIndex = 0; subtaskIndex < numRestoredSubtasks; subtaskIndex++) {
final int expectedElements = partitionElements.getOrDefault(subtaskIndex, 0);
runSubtask(repository, attributeDescriptor, mergedState, result::add, numRestoredSubtasks, subtaskIndex, expectedElements);
}
final List<String> expectedKeys = emittedElements.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
final List<String> receivedKeys = result.stream().map(StreamElement::getKey).sorted().collect(Collectors.toList());
Assertions.assertEquals(expectedKeys, receivedKeys);
}
Aggregations