Search in sources :

Example 1 with OperatorImpl

use of org.apache.samza.operators.impl.OperatorImpl in project samza by apache.

the class WatermarkIntegrationTest method testWatermark.

@Test
public void testWatermark() throws Exception {
    Map<String, String> configs = new HashMap<>();
    configs.put(ApplicationConfig.APP_RUNNER_CLASS, MockLocalApplicationRunner.class.getName());
    configs.put("systems.test.samza.factory", TestSystemFactory.class.getName());
    configs.put("streams.PageView.samza.system", "test");
    configs.put("streams.PageView.partitionCount", String.valueOf(PARTITION_COUNT));
    configs.put(JobConfig.JOB_NAME, "test-watermark-job");
    configs.put(JobConfig.PROCESSOR_ID, "1");
    configs.put(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, PassthroughJobCoordinatorFactory.class.getName());
    configs.put(TaskConfig.GROUPER_FACTORY, SingleContainerGrouperFactory.class.getName());
    configs.put("systems.kafka.samza.factory", "org.apache.samza.system.kafka.KafkaSystemFactory");
    configs.put("systems.kafka.producer.bootstrap.servers", bootstrapUrl());
    configs.put("systems.kafka.consumer.zookeeper.connect", zkConnect());
    configs.put("systems.kafka.samza.key.serde", "int");
    configs.put("systems.kafka.samza.msg.serde", "json");
    configs.put("systems.kafka.default.stream.replication.factor", "1");
    configs.put("job.default.system", "kafka");
    configs.put("serializers.registry.int.class", IntegerSerdeFactory.class.getName());
    configs.put("serializers.registry.string.class", StringSerdeFactory.class.getName());
    configs.put("serializers.registry.json.class", PageViewJsonSerdeFactory.class.getName());
    List<PageView> received = new ArrayList<>();
    class TestStreamApp implements StreamApplication {

        @Override
        public void describe(StreamApplicationDescriptor appDescriptor) {
            DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor("test");
            GenericInputDescriptor<KV<String, PageView>> isd = sd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
            appDescriptor.getInputStream(isd).map(KV::getValue).partitionBy(pv -> pv.getMemberId(), pv -> pv, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "p1").sink((m, collector, coordinator) -> {
                received.add(m.getValue());
            });
        }
    }
    Config config = new MapConfig(configs);
    final ApplicationRunner runner = ApplicationRunners.getApplicationRunner(new TestStreamApp(), config);
    executeRun(runner, config);
    // processors are only available when the app is running
    Map<String, StreamOperatorTask> tasks = getTaskOperationGraphs((MockLocalApplicationRunner) runner);
    runner.waitForFinish();
    // wait for the completion to ensure that all tasks are actually initialized and the OperatorImplGraph is initialized
    StreamOperatorTask task0 = tasks.get("Partition 0");
    OperatorImplGraph graph = TestStreamOperatorTask.getOperatorImplGraph(task0);
    OperatorImpl pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 4);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 4);
    OperatorImpl sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
    StreamOperatorTask task1 = tasks.get("Partition 1");
    graph = TestStreamOperatorTask.getOperatorImplGraph(task1);
    pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 3);
    sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
}
Also used : StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) SamzaContainer(org.apache.samza.container.SamzaContainer) StreamProcessor(org.apache.samza.processor.StreamProcessor) TestStreamConsumer(org.apache.samza.test.util.TestStreamConsumer) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) PageView(org.apache.samza.test.controlmessages.TestData.PageView) SystemConsumer(org.apache.samza.system.SystemConsumer) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) TaskInstance(org.apache.samza.container.TaskInstance) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Set(java.util.Set) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) List(java.util.List) SystemProducer(org.apache.samza.system.SystemProducer) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) IntegrationTestHarness(org.apache.samza.test.harness.IntegrationTestHarness) SimpleSystemAdmin(org.apache.samza.test.util.SimpleSystemAdmin) JavaConverters(scala.collection.JavaConverters) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) ArrayList(java.util.ArrayList) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) ApplicationConfig(org.apache.samza.config.ApplicationConfig) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) TaskConfig(org.apache.samza.config.TaskConfig) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) Partition(org.apache.samza.Partition) SystemFactory(org.apache.samza.system.SystemFactory) Test(org.junit.Test) SystemAdmin(org.apache.samza.system.SystemAdmin) TestStreamProcessorUtil(org.apache.samza.processor.TestStreamProcessorUtil) Assert.assertEquals(org.junit.Assert.assertEquals) StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) ArrayList(java.util.ArrayList) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) MapConfig(org.apache.samza.config.MapConfig) PageView(org.apache.samza.test.controlmessages.TestData.PageView) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) StreamApplication(org.apache.samza.application.StreamApplication) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) KV(org.apache.samza.operators.KV) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Partition (org.apache.samza.Partition)1 SamzaApplication (org.apache.samza.application.SamzaApplication)1 StreamApplication (org.apache.samza.application.StreamApplication)1 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)1 ApplicationConfig (org.apache.samza.config.ApplicationConfig)1 Config (org.apache.samza.config.Config)1 JobConfig (org.apache.samza.config.JobConfig)1 JobCoordinatorConfig (org.apache.samza.config.JobCoordinatorConfig)1 MapConfig (org.apache.samza.config.MapConfig)1 TaskConfig (org.apache.samza.config.TaskConfig)1 SamzaContainer (org.apache.samza.container.SamzaContainer)1 TaskInstance (org.apache.samza.container.TaskInstance)1 TaskName (org.apache.samza.container.TaskName)1 SingleContainerGrouperFactory (org.apache.samza.container.grouper.task.SingleContainerGrouperFactory)1 MetricsRegistry (org.apache.samza.metrics.MetricsRegistry)1