Search in sources :

Example 1 with BatchLogReader

use of cz.o2.proxima.direct.batch.BatchLogReader in project proxima-platform by O2-Czech-Republic.

the class HadoopStorageTest method testObserveCancel.

@Test(timeout = 5000L)
public void testObserveCancel() throws InterruptedException {
    Map<String, Object> cfg = cfg(HadoopDataAccessor.HADOOP_ROLL_INTERVAL, -1);
    HadoopDataAccessor accessor = new HadoopDataAccessor(TestUtils.createTestFamily(entity, uri, cfg));
    CountDownLatch latch = new CountDownLatch(1);
    writeOneElement(accessor, (success, error) -> {
        assertTrue(success);
        assertNull(error);
        latch.countDown();
    }).updateWatermark(Long.MAX_VALUE);
    latch.await();
    BatchLogReader reader = accessor.getBatchLogReader(direct.getContext()).orElse(null);
    assertNotNull(reader);
    List<Partition> partitions = reader.getPartitions();
    assertEquals(1, partitions.size());
    CountDownLatch cancelledLatch = new CountDownLatch(1);
    AtomicReference<ObserveHandle> handle = new AtomicReference<>();
    handle.set(reader.observe(partitions, Collections.singletonList(attribute), new BatchLogObserver() {

        @Override
        public boolean onNext(StreamElement element) {
            handle.get().close();
            return true;
        }

        @Override
        public void onCompleted() {
            fail("onCompleted should not have been called");
        }

        @Override
        public void onCancelled() {
            cancelledLatch.countDown();
        }

        @Override
        public boolean onError(Throwable error) {
            onCancelled();
            return true;
        }
    }));
    cancelledLatch.await();
}
Also used : Iterables(com.google.common.collect.Iterables) AttributeWriterBase(cz.o2.proxima.direct.core.AttributeWriterBase) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) Partition(cz.o2.proxima.storage.Partition) EntityDescriptor(cz.o2.proxima.repository.EntityDescriptor) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) ExceptionUtils(cz.o2.proxima.util.ExceptionUtils) StreamElement(cz.o2.proxima.storage.StreamElement) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) ConfigFactory(com.typesafe.config.ConfigFactory) URI(java.net.URI) ObserveHandle(cz.o2.proxima.direct.batch.ObserveHandle) Before(org.junit.Before) BulkAttributeWriter(cz.o2.proxima.direct.core.BulkAttributeWriter) Repository(cz.o2.proxima.repository.Repository) TestUtils(cz.o2.proxima.util.TestUtils) SynchronousQueue(java.util.concurrent.SynchronousQueue) AttributeDescriptor(cz.o2.proxima.repository.AttributeDescriptor) BatchLogObserver(cz.o2.proxima.direct.batch.BatchLogObserver) IOException(java.io.IOException) BlockingQueue(java.util.concurrent.BlockingQueue) Test(org.junit.Test) AttributeFamilyDescriptor(cz.o2.proxima.repository.AttributeFamilyDescriptor) UUID(java.util.UUID) CommitCallback(cz.o2.proxima.direct.core.CommitCallback) File(java.io.File) Objects(java.util.Objects) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) ConfigRepository(cz.o2.proxima.repository.ConfigRepository) Rule(org.junit.Rule) Accept(cz.o2.proxima.storage.internal.AbstractDataAccessorFactory.Accept) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Partition(cz.o2.proxima.storage.Partition) ObserveHandle(cz.o2.proxima.direct.batch.ObserveHandle) StreamElement(cz.o2.proxima.storage.StreamElement) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) BatchLogObserver(cz.o2.proxima.direct.batch.BatchLogObserver) Test(org.junit.Test)

Example 2 with BatchLogReader

use of cz.o2.proxima.direct.batch.BatchLogReader in project proxima-platform by O2-Czech-Republic.

the class HadoopStorageTest method testWriteElementNotYetFlushed.

@Test(timeout = 5_000L)
public void testWriteElementNotYetFlushed() throws InterruptedException {
    Map<String, Object> cfg = cfg(HadoopDataAccessor.HADOOP_ROLL_INTERVAL, 1000);
    HadoopDataAccessor accessor = new HadoopDataAccessor(TestUtils.createTestFamily(entity, uri, cfg));
    CountDownLatch latch = new CountDownLatch(1);
    BulkAttributeWriter writer = writeOneElement(accessor, ((success, error) -> {
        if (error != null) {
            log.error("Failed to flush write", error);
        }
        assertTrue("Error in flush " + error, success);
        assertNull(error);
        latch.countDown();
    }));
    assertTrue(root.exists());
    List<File> files = listRecursively(root);
    assertEquals("Expected single file in " + files, 1, files.size());
    assertTrue(Iterables.getOnlyElement(files).getAbsolutePath().contains("_tmp"));
    BatchLogReader reader = accessor.getBatchLogReader(direct.getContext()).orElse(null);
    assertNotNull(reader);
    List<Partition> partitions = reader.getPartitions();
    assertTrue("Expected empty partitions, got " + partitions, partitions.isEmpty());
    // advance watermark to flush
    writer.updateWatermark(Long.MAX_VALUE);
    latch.await();
    partitions = reader.getPartitions();
    assertEquals(1, partitions.size());
    BlockingQueue<StreamElement> queue = new SynchronousQueue<>();
    reader.observe(partitions, Collections.singletonList(attribute), new BatchLogObserver() {

        @Override
        public boolean onNext(StreamElement element) {
            ExceptionUtils.unchecked(() -> queue.put(element));
            return true;
        }
    });
    StreamElement element = queue.take();
    assertNotNull(element);
}
Also used : Iterables(com.google.common.collect.Iterables) AttributeWriterBase(cz.o2.proxima.direct.core.AttributeWriterBase) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) Partition(cz.o2.proxima.storage.Partition) EntityDescriptor(cz.o2.proxima.repository.EntityDescriptor) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) ExceptionUtils(cz.o2.proxima.util.ExceptionUtils) StreamElement(cz.o2.proxima.storage.StreamElement) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) ConfigFactory(com.typesafe.config.ConfigFactory) URI(java.net.URI) ObserveHandle(cz.o2.proxima.direct.batch.ObserveHandle) Before(org.junit.Before) BulkAttributeWriter(cz.o2.proxima.direct.core.BulkAttributeWriter) Repository(cz.o2.proxima.repository.Repository) TestUtils(cz.o2.proxima.util.TestUtils) SynchronousQueue(java.util.concurrent.SynchronousQueue) AttributeDescriptor(cz.o2.proxima.repository.AttributeDescriptor) BatchLogObserver(cz.o2.proxima.direct.batch.BatchLogObserver) IOException(java.io.IOException) BlockingQueue(java.util.concurrent.BlockingQueue) Test(org.junit.Test) AttributeFamilyDescriptor(cz.o2.proxima.repository.AttributeFamilyDescriptor) UUID(java.util.UUID) CommitCallback(cz.o2.proxima.direct.core.CommitCallback) File(java.io.File) Objects(java.util.Objects) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) ConfigRepository(cz.o2.proxima.repository.ConfigRepository) Rule(org.junit.Rule) Accept(cz.o2.proxima.storage.internal.AbstractDataAccessorFactory.Accept) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Partition(cz.o2.proxima.storage.Partition) StreamElement(cz.o2.proxima.storage.StreamElement) CountDownLatch(java.util.concurrent.CountDownLatch) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) SynchronousQueue(java.util.concurrent.SynchronousQueue) BulkAttributeWriter(cz.o2.proxima.direct.core.BulkAttributeWriter) BatchLogObserver(cz.o2.proxima.direct.batch.BatchLogObserver) File(java.io.File) Test(org.junit.Test)

Example 3 with BatchLogReader

use of cz.o2.proxima.direct.batch.BatchLogReader in project proxima-platform by O2-Czech-Republic.

the class HadoopStorageTest method testWriteElementJson.

@Test(timeout = 5000L)
public void testWriteElementJson() throws InterruptedException {
    Map<String, Object> cfg = cfg(HadoopDataAccessor.HADOOP_ROLL_INTERVAL, -1, "hadoop.format", "json");
    HadoopDataAccessor accessor = new HadoopDataAccessor(TestUtils.createTestFamily(entity, uri, cfg));
    CountDownLatch latch = new CountDownLatch(1);
    BulkAttributeWriter writer = writeOneElement(accessor, ((success, error) -> {
        assertTrue(success);
        assertNull(error);
        latch.countDown();
    }));
    writer.updateWatermark(Long.MAX_VALUE);
    latch.await();
    assertTrue(root.exists());
    List<File> files = listRecursively(root);
    assertEquals("Expected single file in " + files, 1, files.size());
    assertFalse(Iterables.getOnlyElement(files).getAbsolutePath().contains("_tmp"));
    BatchLogReader reader = accessor.getBatchLogReader(direct.getContext()).orElse(null);
    assertNotNull(reader);
    List<Partition> partitions = reader.getPartitions();
    assertEquals(1, partitions.size());
    BlockingQueue<StreamElement> queue = new SynchronousQueue<>();
    reader.observe(partitions, Collections.singletonList(attribute), new BatchLogObserver() {

        @Override
        public boolean onNext(StreamElement element) {
            ExceptionUtils.unchecked(() -> queue.put(element));
            return true;
        }
    });
    StreamElement element = queue.take();
    assertNotNull(element);
}
Also used : Iterables(com.google.common.collect.Iterables) AttributeWriterBase(cz.o2.proxima.direct.core.AttributeWriterBase) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) Partition(cz.o2.proxima.storage.Partition) EntityDescriptor(cz.o2.proxima.repository.EntityDescriptor) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) ExceptionUtils(cz.o2.proxima.util.ExceptionUtils) StreamElement(cz.o2.proxima.storage.StreamElement) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) ConfigFactory(com.typesafe.config.ConfigFactory) URI(java.net.URI) ObserveHandle(cz.o2.proxima.direct.batch.ObserveHandle) Before(org.junit.Before) BulkAttributeWriter(cz.o2.proxima.direct.core.BulkAttributeWriter) Repository(cz.o2.proxima.repository.Repository) TestUtils(cz.o2.proxima.util.TestUtils) SynchronousQueue(java.util.concurrent.SynchronousQueue) AttributeDescriptor(cz.o2.proxima.repository.AttributeDescriptor) BatchLogObserver(cz.o2.proxima.direct.batch.BatchLogObserver) IOException(java.io.IOException) BlockingQueue(java.util.concurrent.BlockingQueue) Test(org.junit.Test) AttributeFamilyDescriptor(cz.o2.proxima.repository.AttributeFamilyDescriptor) UUID(java.util.UUID) CommitCallback(cz.o2.proxima.direct.core.CommitCallback) File(java.io.File) Objects(java.util.Objects) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) ConfigRepository(cz.o2.proxima.repository.ConfigRepository) Rule(org.junit.Rule) Accept(cz.o2.proxima.storage.internal.AbstractDataAccessorFactory.Accept) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Partition(cz.o2.proxima.storage.Partition) StreamElement(cz.o2.proxima.storage.StreamElement) CountDownLatch(java.util.concurrent.CountDownLatch) BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) SynchronousQueue(java.util.concurrent.SynchronousQueue) BulkAttributeWriter(cz.o2.proxima.direct.core.BulkAttributeWriter) BatchLogObserver(cz.o2.proxima.direct.batch.BatchLogObserver) File(java.io.File) Test(org.junit.Test)

Example 4 with BatchLogReader

use of cz.o2.proxima.direct.batch.BatchLogReader in project proxima-platform by O2-Czech-Republic.

the class DirectDataAccessorWrapper method createStreamFromUpdates.

@Override
public PCollection<StreamElement> createStreamFromUpdates(Pipeline pipeline, List<AttributeDescriptor<?>> attrs, long startStamp, long endStamp, long limit) {
    BatchLogReader reader = direct.getBatchLogReader(context).orElseThrow(() -> new IllegalArgumentException("Cannot create batch reader from " + direct));
    final PCollection<StreamElement> ret;
    ret = pipeline.apply("ReadBatchUnbounded:" + uri, BatchLogRead.of(attrs, Long.MAX_VALUE, factory, reader, startStamp, endStamp));
    return ret.setCoder(StreamElementCoder.of(factory)).setTypeDescriptor(TypeDescriptor.of(StreamElement.class));
}
Also used : BatchLogReader(cz.o2.proxima.direct.batch.BatchLogReader) StreamElement(cz.o2.proxima.storage.StreamElement)

Example 5 with BatchLogReader

use of cz.o2.proxima.direct.batch.BatchLogReader in project proxima-platform by O2-Czech-Republic.

the class BatchLogSourceFunctionTest method testRunAndClose.

@Test
void testRunAndClose() throws Exception {
    final Repository repository = Repository.ofTest(ConfigFactory.parseString(MODEL));
    final AttributeDescriptor<?> attribute = repository.getEntity("test").getAttribute("data");
    final BatchLogSourceFunction<StreamElement> sourceFunction = new BatchLogSourceFunction<StreamElement>(repository.asFactory(), Collections.singletonList(attribute), ResultExtractor.identity()) {

        @Override
        BatchLogReader createLogReader(List<AttributeDescriptor<?>> attributeDescriptors) {
            final DirectDataOperator direct = repository.getOrCreateOperator(DirectDataOperator.class);
            final ListBatchReader reader = ListBatchReader.ofPartitioned(direct.getContext());
            return OffsetTrackingBatchLogReader.of(reader);
        }
    };
    final AbstractStreamOperatorTestHarness<StreamElement> testHarness = createTestHarness(sourceFunction, 1, 0);
    testHarness.initializeEmptyState();
    testHarness.open();
    final CheckedThread runThread = new CheckedThread("run") {

        @Override
        public void go() throws Exception {
            sourceFunction.run(new TestSourceContext<StreamElement>() {

                @Override
                public void collect(StreamElement element) {
                // No-op.
                }
            });
        }
    };
    runThread.start();
    sourceFunction.awaitRunning();
    sourceFunction.cancel();
    testHarness.close();
    // Make sure run thread finishes normally.
    runThread.sync();
}
Also used : DirectDataOperator(cz.o2.proxima.direct.core.DirectDataOperator) Repository(cz.o2.proxima.repository.Repository) ListBatchReader(cz.o2.proxima.direct.storage.ListBatchReader) StreamElement(cz.o2.proxima.storage.StreamElement) ArrayList(java.util.ArrayList) List(java.util.List) CheckedThread(org.apache.flink.core.testutils.CheckedThread) Test(org.junit.jupiter.api.Test)

Aggregations

StreamElement (cz.o2.proxima.storage.StreamElement)19 BatchLogReader (cz.o2.proxima.direct.batch.BatchLogReader)16 Test (org.junit.Test)14 BatchLogObserver (cz.o2.proxima.direct.batch.BatchLogObserver)11 CountDownLatch (java.util.concurrent.CountDownLatch)10 DirectDataOperator (cz.o2.proxima.direct.core.DirectDataOperator)9 ArrayList (java.util.ArrayList)9 SynchronousQueue (java.util.concurrent.SynchronousQueue)9 List (java.util.List)8 AttributeWriterBase (cz.o2.proxima.direct.core.AttributeWriterBase)7 Repository (cz.o2.proxima.repository.Repository)7 Partition (cz.o2.proxima.storage.Partition)7 ObserveHandle (cz.o2.proxima.direct.batch.ObserveHandle)6 AttributeDescriptor (cz.o2.proxima.repository.AttributeDescriptor)6 Slf4j (lombok.extern.slf4j.Slf4j)6 Preconditions (com.google.common.base.Preconditions)5 BulkAttributeWriter (cz.o2.proxima.direct.core.BulkAttributeWriter)5 CommitCallback (cz.o2.proxima.direct.core.CommitCallback)5 AttributeFamilyDescriptor (cz.o2.proxima.repository.AttributeFamilyDescriptor)5 EntityDescriptor (cz.o2.proxima.repository.EntityDescriptor)5