Search in sources :

Example 1 with PageView

use of org.apache.samza.test.controlmessages.TestData.PageView in project samza by apache.

the class WatermarkIntegrationTest method testWatermark.

@Test
public void testWatermark() throws Exception {
    Map<String, String> configs = new HashMap<>();
    configs.put(ApplicationConfig.APP_RUNNER_CLASS, MockLocalApplicationRunner.class.getName());
    configs.put("systems.test.samza.factory", TestSystemFactory.class.getName());
    configs.put("streams.PageView.samza.system", "test");
    configs.put("streams.PageView.partitionCount", String.valueOf(PARTITION_COUNT));
    configs.put(JobConfig.JOB_NAME, "test-watermark-job");
    configs.put(JobConfig.PROCESSOR_ID, "1");
    configs.put(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, PassthroughJobCoordinatorFactory.class.getName());
    configs.put(TaskConfig.GROUPER_FACTORY, SingleContainerGrouperFactory.class.getName());
    configs.put("systems.kafka.samza.factory", "org.apache.samza.system.kafka.KafkaSystemFactory");
    configs.put("systems.kafka.producer.bootstrap.servers", bootstrapUrl());
    configs.put("systems.kafka.consumer.zookeeper.connect", zkConnect());
    configs.put("systems.kafka.samza.key.serde", "int");
    configs.put("systems.kafka.samza.msg.serde", "json");
    configs.put("systems.kafka.default.stream.replication.factor", "1");
    configs.put("job.default.system", "kafka");
    configs.put("serializers.registry.int.class", IntegerSerdeFactory.class.getName());
    configs.put("serializers.registry.string.class", StringSerdeFactory.class.getName());
    configs.put("serializers.registry.json.class", PageViewJsonSerdeFactory.class.getName());
    List<PageView> received = new ArrayList<>();
    class TestStreamApp implements StreamApplication {

        @Override
        public void describe(StreamApplicationDescriptor appDescriptor) {
            DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor("test");
            GenericInputDescriptor<KV<String, PageView>> isd = sd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
            appDescriptor.getInputStream(isd).map(KV::getValue).partitionBy(pv -> pv.getMemberId(), pv -> pv, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "p1").sink((m, collector, coordinator) -> {
                received.add(m.getValue());
            });
        }
    }
    Config config = new MapConfig(configs);
    final ApplicationRunner runner = ApplicationRunners.getApplicationRunner(new TestStreamApp(), config);
    executeRun(runner, config);
    // processors are only available when the app is running
    Map<String, StreamOperatorTask> tasks = getTaskOperationGraphs((MockLocalApplicationRunner) runner);
    runner.waitForFinish();
    // wait for the completion to ensure that all tasks are actually initialized and the OperatorImplGraph is initialized
    StreamOperatorTask task0 = tasks.get("Partition 0");
    OperatorImplGraph graph = TestStreamOperatorTask.getOperatorImplGraph(task0);
    OperatorImpl pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 4);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 4);
    OperatorImpl sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
    StreamOperatorTask task1 = tasks.get("Partition 1");
    graph = TestStreamOperatorTask.getOperatorImplGraph(task1);
    pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 3);
    sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
}
Also used : StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) SamzaContainer(org.apache.samza.container.SamzaContainer) StreamProcessor(org.apache.samza.processor.StreamProcessor) TestStreamConsumer(org.apache.samza.test.util.TestStreamConsumer) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) PageView(org.apache.samza.test.controlmessages.TestData.PageView) SystemConsumer(org.apache.samza.system.SystemConsumer) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) TaskInstance(org.apache.samza.container.TaskInstance) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Set(java.util.Set) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) List(java.util.List) SystemProducer(org.apache.samza.system.SystemProducer) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) IntegrationTestHarness(org.apache.samza.test.harness.IntegrationTestHarness) SimpleSystemAdmin(org.apache.samza.test.util.SimpleSystemAdmin) JavaConverters(scala.collection.JavaConverters) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) ArrayList(java.util.ArrayList) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) ApplicationConfig(org.apache.samza.config.ApplicationConfig) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) TaskConfig(org.apache.samza.config.TaskConfig) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) Partition(org.apache.samza.Partition) SystemFactory(org.apache.samza.system.SystemFactory) Test(org.junit.Test) SystemAdmin(org.apache.samza.system.SystemAdmin) TestStreamProcessorUtil(org.apache.samza.processor.TestStreamProcessorUtil) Assert.assertEquals(org.junit.Assert.assertEquals) StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) ArrayList(java.util.ArrayList) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) MapConfig(org.apache.samza.config.MapConfig) PageView(org.apache.samza.test.controlmessages.TestData.PageView) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) StreamApplication(org.apache.samza.application.StreamApplication) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) KV(org.apache.samza.operators.KV) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) Test(org.junit.Test)

Example 2 with PageView

use of org.apache.samza.test.controlmessages.TestData.PageView in project samza by apache.

the class StreamApplicationIntegrationTest method testSamzaJobFailureForStreamApplication.

/**
 * Null page key is passed in input data which should fail filter logic
 */
@Test(expected = SamzaException.class)
public void testSamzaJobFailureForStreamApplication() {
    int count = 10;
    List<TestData.PageView> pageviews = new ArrayList<>();
    for (int memberId = 0; memberId < count; memberId++) {
        pageviews.add(new TestData.PageView(null, memberId));
    }
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> imid = isd.getInputDescriptor("PageView", new NoOpSerde<PageView>());
    InMemoryOutputDescriptor<PageView> imod = isd.getOutputDescriptor("Output", new NoOpSerde<PageView>());
    TestRunner.of(new PageViewFilterApplication()).addInputStream(imid, pageviews).addOutputStream(imod, 10).run(Duration.ofMillis(1000));
}
Also used : PageView(org.apache.samza.test.controlmessages.TestData.PageView) TestData(org.apache.samza.test.controlmessages.TestData) ArrayList(java.util.ArrayList) PageView(org.apache.samza.test.controlmessages.TestData.PageView) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test)

Example 3 with PageView

use of org.apache.samza.test.controlmessages.TestData.PageView in project samza by apache.

the class StreamApplicationIntegrationTest method testHighLevelApi.

@Test
public void testHighLevelApi() throws Exception {
    Random random = new Random();
    int count = 10;
    List<PageView> pageViews = new ArrayList<>();
    for (int memberId = 0; memberId < count; memberId++) {
        String pagekey = PAGEKEYS[random.nextInt(PAGEKEYS.length - 1)];
        PageView pv = new PageView(pagekey, memberId);
        pageViews.add(pv);
    }
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> imid = isd.getInputDescriptor("PageView", new NoOpSerde<PageView>());
    InMemoryOutputDescriptor<PageView> imod = isd.getOutputDescriptor("Output", new NoOpSerde<PageView>());
    TestRunner.of(new PageViewRepartitionApplication()).addInputStream(imid, pageViews).addOutputStream(imod, 10).run(Duration.ofMillis(1500));
    Assert.assertEquals(TestRunner.consumeStream(imod, Duration.ofMillis(1000)).get(random.nextInt(count)).size(), 1);
}
Also used : PageView(org.apache.samza.test.controlmessages.TestData.PageView) Random(java.util.Random) ArrayList(java.util.ArrayList) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test)

Example 4 with PageView

use of org.apache.samza.test.controlmessages.TestData.PageView in project samza by apache.

the class StreamApplicationIntegrationTest method testStatefulJoinWithLocalTable.

@Test
public void testStatefulJoinWithLocalTable() {
    Random random = new Random();
    List<KV<String, TestTableData.PageView>> pageViews = Arrays.asList(TestTableData.generatePageViews(10)).stream().map(x -> KV.of(PAGEKEYS[random.nextInt(PAGEKEYS.length)], x)).collect(Collectors.toList());
    List<KV<String, TestTableData.Profile>> profiles = Arrays.asList(TestTableData.generateProfiles(10)).stream().map(x -> KV.of(PAGEKEYS[random.nextInt(PAGEKEYS.length)], x)).collect(Collectors.toList());
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<KV<String, TestTableData.PageView>> pageViewStreamDesc = isd.getInputDescriptor("PageView", new NoOpSerde<KV<String, TestTableData.PageView>>());
    InMemoryInputDescriptor<KV<String, TestTableData.Profile>> profileStreamDesc = isd.getInputDescriptor("Profile", new NoOpSerde<KV<String, TestTableData.Profile>>()).shouldBootstrap();
    InMemoryOutputDescriptor<TestTableData.EnrichedPageView> outputStreamDesc = isd.getOutputDescriptor("EnrichedPageView", new NoOpSerde<>());
    InMemoryOutputDescriptor<String> joinKeysDescriptor = isd.getOutputDescriptor("JoinPageKeys", new NoOpSerde<>());
    TestRunner.of(new PageViewProfileViewJoinApplication()).addInputStream(pageViewStreamDesc, pageViews).addInputStream(profileStreamDesc, profiles).addOutputStream(outputStreamDesc, 1).addOutputStream(joinKeysDescriptor, 1).run(Duration.ofSeconds(2));
    Assert.assertEquals(10, TestRunner.consumeStream(outputStreamDesc, Duration.ofSeconds(1)).get(0).size());
    Assert.assertEquals(10, TestRunner.consumeStream(joinKeysDescriptor, Duration.ofSeconds(1)).get(0).size());
}
Also used : RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) PageView(org.apache.samza.test.controlmessages.TestData.PageView) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) TestTableData(org.apache.samza.test.table.TestTableData) SystemStream(org.apache.samza.system.SystemStream) Duration(java.time.Duration) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) Logger(org.slf4j.Logger) TestData(org.apache.samza.test.controlmessages.TestData) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) Test(org.junit.Test) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) PageViewToProfileJoinFunction(org.apache.samza.test.table.PageViewToProfileJoinFunction) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Random(java.util.Random) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestTableData(org.apache.samza.test.table.TestTableData) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)4 PageView (org.apache.samza.test.controlmessages.TestData.PageView)4 Test (org.junit.Test)4 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)3 List (java.util.List)2 Random (java.util.Random)2 StreamApplication (org.apache.samza.application.StreamApplication)2 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)2 KV (org.apache.samza.operators.KV)2 KVSerde (org.apache.samza.serializers.KVSerde)2 NoOpSerde (org.apache.samza.serializers.NoOpSerde)2 Duration (java.time.Duration)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 Partition (org.apache.samza.Partition)1 SamzaException (org.apache.samza.SamzaException)1 SamzaApplication (org.apache.samza.application.SamzaApplication)1