Search in sources :

Example 11 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestCouchbaseRemoteTableEndToEnd method testEndToEnd.

@Test
public void testEndToEnd() {
    Bucket inputBucket = cluster.openBucket(inputBucketName);
    inputBucket.upsert(ByteArrayDocument.create("Alice", "20".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("Bob", "30".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("Chris", "40".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("David", "50".getBytes()));
    inputBucket.close();
    List<String> users = Arrays.asList("Alice", "Bob", "Chris", "David");
    final StreamApplication app = appDesc -> {
        DelegatingSystemDescriptor inputSystemDescriptor = new DelegatingSystemDescriptor("test");
        GenericInputDescriptor<String> inputDescriptor = inputSystemDescriptor.getInputDescriptor("User", new NoOpSerde<>());
        CouchbaseTableReadFunction<String> readFunction = new CouchbaseTableReadFunction<>(inputBucketName, String.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(inputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort()).withSerde(new StringSerde());
        CouchbaseTableWriteFunction<JsonObject> writeFunction = new CouchbaseTableWriteFunction<>(outputBucketName, JsonObject.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(outputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort());
        RemoteTableDescriptor inputTableDesc = new RemoteTableDescriptor<String, String, Void>("input-table").withReadFunction(readFunction).withRateLimiterDisabled();
        Table<KV<String, String>> inputTable = appDesc.getTable(inputTableDesc);
        RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<String, JsonObject, Object>("output-table").withReadFunction(new NoOpTableReadFunction<>()).withWriteFunction(writeFunction).withRateLimiterDisabled();
        Table<KV<String, JsonObject>> outputTable = appDesc.getTable(outputTableDesc);
        appDesc.getInputStream(inputDescriptor).map(k -> KV.of(k, k)).join(inputTable, new JoinFunction()).sendTo(outputTable);
    };
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<TestTableData.PageView> inputDescriptor = isd.getInputDescriptor("User", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, users).run(Duration.ofSeconds(10));
    Bucket outputBucket = cluster.openBucket(outputBucketName);
    Assert.assertEquals("{\"name\":\"Alice\",\"age\":\"20\"}", outputBucket.get("Alice").content().toString());
    Assert.assertEquals("{\"name\":\"Bob\",\"age\":\"30\"}", outputBucket.get("Bob").content().toString());
    Assert.assertEquals("{\"name\":\"Chris\",\"age\":\"40\"}", outputBucket.get("Chris").content().toString());
    Assert.assertEquals("{\"name\":\"David\",\"age\":\"50\"}", outputBucket.get("David").content().toString());
    outputBucket.close();
}
Also used : CouchbaseTableWriteFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableWriteFunction) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) CouchbaseEnvironment(com.couchbase.client.java.env.CouchbaseEnvironment) CouchbaseTableReadFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableReadFunction) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) JsonObject(com.couchbase.client.java.document.json.JsonObject) BucketConfiguration(com.couchbase.mock.BucketConfiguration) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) DefaultCouchbaseEnvironment(com.couchbase.client.java.env.DefaultCouchbaseEnvironment) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) CouchbaseMock(com.couchbase.mock.CouchbaseMock) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) After(org.junit.After) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Before(org.junit.Before) Table(org.apache.samza.table.Table) ByteArrayDocument(com.couchbase.client.java.document.ByteArrayDocument) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Test(org.junit.Test) TestRunner(org.apache.samza.test.framework.TestRunner) Bucket(com.couchbase.client.java.Bucket) List(java.util.List) CouchbaseCluster(com.couchbase.client.java.CouchbaseCluster) Cluster(com.couchbase.client.java.Cluster) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) JsonObject(com.couchbase.client.java.document.json.JsonObject) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Bucket(com.couchbase.client.java.Bucket) CouchbaseTableWriteFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableWriteFunction) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) NoOpSerde(org.apache.samza.serializers.NoOpSerde) CouchbaseTableReadFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableReadFunction) Test(org.junit.Test)

Example 12 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestRemoteTableWithBatchEndToEnd method doTestStreamTableJoinRemoteTable.

private void doTestStreamTableJoinRemoteTable(String testName, boolean batchRead, boolean batchWrite) throws Exception {
    final InMemoryWriteFunction writer = new InMemoryWriteFunction(testName);
    BATCH_READS.put(testName, new AtomicInteger());
    BATCH_WRITES.put(testName, new AtomicInteger());
    WRITTEN_RECORDS.put(testName, new HashMap<>());
    int count = 16;
    int batchSize = 4;
    String profiles = Base64Serializer.serialize(generateProfiles(count));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final RateLimiter writeRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        RemoteTableDescriptor<Integer, Profile, Void> inputTableDesc = new RemoteTableDescriptor<>("profile-table-1");
        inputTableDesc.withReadFunction(InMemoryReadFunction.getInMemoryReadFunction(testName, profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        if (batchRead) {
            inputTableDesc.withBatchProvider(new CompactBatchProvider().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
        }
        // dummy reader
        TableReadFunction readFn = new MyReadFunction();
        RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView> outputTableDesc = new RemoteTableDescriptor<>("enriched-page-view-table-1");
        outputTableDesc.withReadFunction(readFn).withWriteFunction(writer).withRateLimiter(writeRateLimiter, creditFunction, creditFunction);
        if (batchWrite) {
            outputTableDesc.withBatchProvider(new CompactBatchProvider().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
        }
        Table outputTable = appDesc.getTable(outputTableDesc);
        Table<KV<Integer, Profile>> inputTable = appDesc.getTable(inputTableDesc);
        DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(inputTable, new PageViewToProfileJoinFunction()).map(m -> new KV<>(m.getMemberId(), m)).sendTo(outputTable);
    };
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, Arrays.asList(generatePageViewsWithDistinctKeys(count))).addConfig("task.max.concurrency", String.valueOf(count)).addConfig("task.async.commit", String.valueOf(true)).run(Duration.ofSeconds(10));
    Assert.assertEquals(count, WRITTEN_RECORDS.get(testName).size());
    Assert.assertNotNull(WRITTEN_RECORDS.get(testName).get(0));
    if (batchWrite) {
        Assert.assertEquals(count / batchSize, BATCH_WRITES.get(testName).get());
    }
}
Also used : Arrays(java.util.Arrays) UpdateMessage(org.apache.samza.operators.UpdateMessage) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) ObjectInputStream(java.io.ObjectInputStream) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Base64Serializer(org.apache.samza.test.util.Base64Serializer) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestTableData(org.apache.samza.test.table.TestTableData) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) Collection(java.util.Collection) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) TestRunner(org.apache.samza.test.framework.TestRunner) Mockito(org.mockito.Mockito) Entry(org.apache.samza.storage.kv.Entry) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) CompactBatchProvider(org.apache.samza.table.batching.CompactBatchProvider) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) CompleteBatchProvider(org.apache.samza.table.batching.CompleteBatchProvider) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpSerde(org.apache.samza.serializers.NoOpSerde) CompactBatchProvider(org.apache.samza.table.batching.CompactBatchProvider)

Example 13 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class WatermarkIntegrationTest method testWatermark.

@Test
public void testWatermark() throws Exception {
    Map<String, String> configs = new HashMap<>();
    configs.put(ApplicationConfig.APP_RUNNER_CLASS, MockLocalApplicationRunner.class.getName());
    configs.put("systems.test.samza.factory", TestSystemFactory.class.getName());
    configs.put("streams.PageView.samza.system", "test");
    configs.put("streams.PageView.partitionCount", String.valueOf(PARTITION_COUNT));
    configs.put(JobConfig.JOB_NAME, "test-watermark-job");
    configs.put(JobConfig.PROCESSOR_ID, "1");
    configs.put(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, PassthroughJobCoordinatorFactory.class.getName());
    configs.put(TaskConfig.GROUPER_FACTORY, SingleContainerGrouperFactory.class.getName());
    configs.put("systems.kafka.samza.factory", "org.apache.samza.system.kafka.KafkaSystemFactory");
    configs.put("systems.kafka.producer.bootstrap.servers", bootstrapUrl());
    configs.put("systems.kafka.consumer.zookeeper.connect", zkConnect());
    configs.put("systems.kafka.samza.key.serde", "int");
    configs.put("systems.kafka.samza.msg.serde", "json");
    configs.put("systems.kafka.default.stream.replication.factor", "1");
    configs.put("job.default.system", "kafka");
    configs.put("serializers.registry.int.class", IntegerSerdeFactory.class.getName());
    configs.put("serializers.registry.string.class", StringSerdeFactory.class.getName());
    configs.put("serializers.registry.json.class", PageViewJsonSerdeFactory.class.getName());
    List<PageView> received = new ArrayList<>();
    class TestStreamApp implements StreamApplication {

        @Override
        public void describe(StreamApplicationDescriptor appDescriptor) {
            DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor("test");
            GenericInputDescriptor<KV<String, PageView>> isd = sd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
            appDescriptor.getInputStream(isd).map(KV::getValue).partitionBy(pv -> pv.getMemberId(), pv -> pv, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "p1").sink((m, collector, coordinator) -> {
                received.add(m.getValue());
            });
        }
    }
    Config config = new MapConfig(configs);
    final ApplicationRunner runner = ApplicationRunners.getApplicationRunner(new TestStreamApp(), config);
    executeRun(runner, config);
    // processors are only available when the app is running
    Map<String, StreamOperatorTask> tasks = getTaskOperationGraphs((MockLocalApplicationRunner) runner);
    runner.waitForFinish();
    // wait for the completion to ensure that all tasks are actually initialized and the OperatorImplGraph is initialized
    StreamOperatorTask task0 = tasks.get("Partition 0");
    OperatorImplGraph graph = TestStreamOperatorTask.getOperatorImplGraph(task0);
    OperatorImpl pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 4);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 4);
    OperatorImpl sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
    StreamOperatorTask task1 = tasks.get("Partition 1");
    graph = TestStreamOperatorTask.getOperatorImplGraph(task1);
    pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 3);
    sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
}
Also used : StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) SamzaContainer(org.apache.samza.container.SamzaContainer) StreamProcessor(org.apache.samza.processor.StreamProcessor) TestStreamConsumer(org.apache.samza.test.util.TestStreamConsumer) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) PageView(org.apache.samza.test.controlmessages.TestData.PageView) SystemConsumer(org.apache.samza.system.SystemConsumer) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) TaskInstance(org.apache.samza.container.TaskInstance) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Set(java.util.Set) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) List(java.util.List) SystemProducer(org.apache.samza.system.SystemProducer) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) IntegrationTestHarness(org.apache.samza.test.harness.IntegrationTestHarness) SimpleSystemAdmin(org.apache.samza.test.util.SimpleSystemAdmin) JavaConverters(scala.collection.JavaConverters) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) ArrayList(java.util.ArrayList) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) ApplicationConfig(org.apache.samza.config.ApplicationConfig) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) TaskConfig(org.apache.samza.config.TaskConfig) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) Partition(org.apache.samza.Partition) SystemFactory(org.apache.samza.system.SystemFactory) Test(org.junit.Test) SystemAdmin(org.apache.samza.system.SystemAdmin) TestStreamProcessorUtil(org.apache.samza.processor.TestStreamProcessorUtil) Assert.assertEquals(org.junit.Assert.assertEquals) StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) ArrayList(java.util.ArrayList) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) MapConfig(org.apache.samza.config.MapConfig) PageView(org.apache.samza.test.controlmessages.TestData.PageView) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) StreamApplication(org.apache.samza.application.StreamApplication) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) KV(org.apache.samza.operators.KV) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) Test(org.junit.Test)

Example 14 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestRemoteTableEndToEnd method testSendToUpdatesWithoutUpdateOptions.

// Test will fail as we use sendTo with KV<K, UpdateMessage> stream without UpdateOptions
@Test(expected = SamzaException.class)
public void testSendToUpdatesWithoutUpdateOptions() throws Exception {
    // max member id for page views is 10
    final String profiles = Base64Serializer.serialize(generateProfiles(10));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2("testUpdateWithoutUpdateOptions", false)).withWriteRateLimit(1000);
        final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable);
    };
    int numPageViews = 40;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test)

Example 15 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestRemoteTableEndToEnd method testSendToUpdatesFailureAfterPutDefault.

// Test fails with the following exception:
// org.apache.samza.SamzaException: Update after Put default failed with exception.
@Test(expected = SamzaException.class)
public void testSendToUpdatesFailureAfterPutDefault() throws Exception {
    // the test checks for failure when update after put default fails
    String testName = "testSendToUpdatesFailureAfterPutDefault";
    final String profiles = Base64Serializer.serialize(generateProfiles(30));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, false, true)).withWriteRateLimit(1000);
        final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_WITH_DEFAULTS);
    };
    int numPageViews = 15;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
    TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) List(java.util.List) ArrayList(java.util.ArrayList) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test)

Aggregations

GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)31 Test (org.junit.Test)29 NoOpSerde (org.apache.samza.serializers.NoOpSerde)23 KV (org.apache.samza.operators.KV)20 KVSerde (org.apache.samza.serializers.KVSerde)20 GenericSystemDescriptor (org.apache.samza.system.descriptors.GenericSystemDescriptor)20 HashMap (java.util.HashMap)19 Duration (java.time.Duration)18 Map (java.util.Map)17 MapConfig (org.apache.samza.config.MapConfig)17 Serde (org.apache.samza.serializers.Serde)17 List (java.util.List)16 IntegerSerde (org.apache.samza.serializers.IntegerSerde)16 ArrayList (java.util.ArrayList)14 Function (java.util.function.Function)12 StreamApplication (org.apache.samza.application.StreamApplication)12 JobConfig (org.apache.samza.config.JobConfig)12 Collectors (java.util.stream.Collectors)11 Config (org.apache.samza.config.Config)11 StringSerde (org.apache.samza.serializers.StringSerde)11