Search in sources :

Example 1 with OperatorImplGraph

use of org.apache.samza.operators.impl.OperatorImplGraph in project samza by apache.

the class StreamOperatorTask method init.

/**
   * Initializes this task during startup.
   * <p>
   * Implementation: Initializes the user-implemented {@link StreamApplication}. The {@link StreamApplication} sets
   * the input and output streams and the task-wide context manager using the {@link StreamGraphImpl} APIs,
   * and the logical transforms using the {@link org.apache.samza.operators.MessageStream} APIs.
   *<p>
   * It then uses the {@link StreamGraphImpl} to create the {@link OperatorImplGraph} corresponding to the logical
   * DAG. It also saves the mapping between input {@link SystemStream}s and their corresponding
   * {@link InputStreamInternal}s for delivering incoming messages to the appropriate sub-DAG.
   *
   * @param config allows accessing of fields in the configuration files that this StreamTask is specified in
   * @param context allows initializing and accessing contextual data of this StreamTask
   * @throws Exception in case of initialization errors
   */
@Override
public final void init(Config config, TaskContext context) throws Exception {
    StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
    // initialize the user-implemented stream application.
    this.streamApplication.init(streamGraph, config);
    // get the user-implemented context manager and initialize it
    this.contextManager = streamGraph.getContextManager();
    if (this.contextManager != null) {
        this.contextManager.init(config, context);
    }
    // create the operator impl DAG corresponding to the logical operator spec DAG
    OperatorImplGraph operatorImplGraph = new OperatorImplGraph(clock);
    operatorImplGraph.init(streamGraph, config, context);
    this.operatorImplGraph = operatorImplGraph;
    // TODO: SAMZA-1118 - Remove mapping after SystemConsumer starts returning logical streamId with incoming messages
    inputSystemStreamToInputStream = new HashMap<>();
    streamGraph.getInputStreams().forEach((streamSpec, inputStream) -> {
        SystemStream systemStream = new SystemStream(streamSpec.getSystemName(), streamSpec.getPhysicalName());
        inputSystemStreamToInputStream.put(systemStream, inputStream);
    });
}
Also used : OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) SystemStream(org.apache.samza.system.SystemStream) StreamGraphImpl(org.apache.samza.operators.StreamGraphImpl)

Example 2 with OperatorImplGraph

use of org.apache.samza.operators.impl.OperatorImplGraph in project samza by apache.

the class WatermarkIntegrationTest method testWatermark.

@Test
public void testWatermark() throws Exception {
    Map<String, String> configs = new HashMap<>();
    configs.put(ApplicationConfig.APP_RUNNER_CLASS, MockLocalApplicationRunner.class.getName());
    configs.put("systems.test.samza.factory", TestSystemFactory.class.getName());
    configs.put("streams.PageView.samza.system", "test");
    configs.put("streams.PageView.partitionCount", String.valueOf(PARTITION_COUNT));
    configs.put(JobConfig.JOB_NAME, "test-watermark-job");
    configs.put(JobConfig.PROCESSOR_ID, "1");
    configs.put(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, PassthroughJobCoordinatorFactory.class.getName());
    configs.put(TaskConfig.GROUPER_FACTORY, SingleContainerGrouperFactory.class.getName());
    configs.put("systems.kafka.samza.factory", "org.apache.samza.system.kafka.KafkaSystemFactory");
    configs.put("systems.kafka.producer.bootstrap.servers", bootstrapUrl());
    configs.put("systems.kafka.consumer.zookeeper.connect", zkConnect());
    configs.put("systems.kafka.samza.key.serde", "int");
    configs.put("systems.kafka.samza.msg.serde", "json");
    configs.put("systems.kafka.default.stream.replication.factor", "1");
    configs.put("job.default.system", "kafka");
    configs.put("serializers.registry.int.class", IntegerSerdeFactory.class.getName());
    configs.put("serializers.registry.string.class", StringSerdeFactory.class.getName());
    configs.put("serializers.registry.json.class", PageViewJsonSerdeFactory.class.getName());
    List<PageView> received = new ArrayList<>();
    class TestStreamApp implements StreamApplication {

        @Override
        public void describe(StreamApplicationDescriptor appDescriptor) {
            DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor("test");
            GenericInputDescriptor<KV<String, PageView>> isd = sd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
            appDescriptor.getInputStream(isd).map(KV::getValue).partitionBy(pv -> pv.getMemberId(), pv -> pv, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "p1").sink((m, collector, coordinator) -> {
                received.add(m.getValue());
            });
        }
    }
    Config config = new MapConfig(configs);
    final ApplicationRunner runner = ApplicationRunners.getApplicationRunner(new TestStreamApp(), config);
    executeRun(runner, config);
    // processors are only available when the app is running
    Map<String, StreamOperatorTask> tasks = getTaskOperationGraphs((MockLocalApplicationRunner) runner);
    runner.waitForFinish();
    // wait for the completion to ensure that all tasks are actually initialized and the OperatorImplGraph is initialized
    StreamOperatorTask task0 = tasks.get("Partition 0");
    OperatorImplGraph graph = TestStreamOperatorTask.getOperatorImplGraph(task0);
    OperatorImpl pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 4);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 4);
    OperatorImpl sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
    StreamOperatorTask task1 = tasks.get("Partition 1");
    graph = TestStreamOperatorTask.getOperatorImplGraph(task1);
    pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
    assertEquals(TestOperatorImpl.getInputWatermark(pb), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(pb), 3);
    sink = getOperator(graph, OperatorSpec.OpCode.SINK);
    assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
    assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
}
Also used : StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) SamzaContainer(org.apache.samza.container.SamzaContainer) StreamProcessor(org.apache.samza.processor.StreamProcessor) TestStreamConsumer(org.apache.samza.test.util.TestStreamConsumer) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) PageView(org.apache.samza.test.controlmessages.TestData.PageView) SystemConsumer(org.apache.samza.system.SystemConsumer) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) TaskInstance(org.apache.samza.container.TaskInstance) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Set(java.util.Set) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) List(java.util.List) SystemProducer(org.apache.samza.system.SystemProducer) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) IntegrationTestHarness(org.apache.samza.test.harness.IntegrationTestHarness) SimpleSystemAdmin(org.apache.samza.test.util.SimpleSystemAdmin) JavaConverters(scala.collection.JavaConverters) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) ArrayList(java.util.ArrayList) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) ApplicationConfig(org.apache.samza.config.ApplicationConfig) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) TaskConfig(org.apache.samza.config.TaskConfig) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) Partition(org.apache.samza.Partition) SystemFactory(org.apache.samza.system.SystemFactory) Test(org.junit.Test) SystemAdmin(org.apache.samza.system.SystemAdmin) TestStreamProcessorUtil(org.apache.samza.processor.TestStreamProcessorUtil) Assert.assertEquals(org.junit.Assert.assertEquals) StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) ArrayList(java.util.ArrayList) IntegerSerdeFactory(org.apache.samza.serializers.IntegerSerdeFactory) OperatorImpl(org.apache.samza.operators.impl.OperatorImpl) InputOperatorImpl(org.apache.samza.operators.impl.InputOperatorImpl) TestOperatorImpl(org.apache.samza.operators.impl.TestOperatorImpl) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) MapConfig(org.apache.samza.config.MapConfig) PageView(org.apache.samza.test.controlmessages.TestData.PageView) PageViewJsonSerdeFactory(org.apache.samza.test.controlmessages.TestData.PageViewJsonSerdeFactory) StreamApplication(org.apache.samza.application.StreamApplication) TestStreamOperatorTask(org.apache.samza.task.TestStreamOperatorTask) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) KV(org.apache.samza.operators.KV) OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) Test(org.junit.Test)

Example 3 with OperatorImplGraph

use of org.apache.samza.operators.impl.OperatorImplGraph in project samza by apache.

the class TestStreamOperatorTask method testExceptionIfInputOperatorMissing.

/**
 * Tests if the appropriate SamzaException is propagated to the TaskCallback if there is no InputOperator for a given
 * SystemStream in the OperatorGraph.
 */
@Test
public void testExceptionIfInputOperatorMissing() throws NoSuchFieldException, IllegalAccessException {
    IncomingMessageEnvelope mockIme = mock(IncomingMessageEnvelope.class, RETURNS_DEEP_STUBS);
    SystemStream testSystemStream = new SystemStream("foo", "bar");
    when(mockIme.getSystemStreamPartition().getSystemStream()).thenReturn(testSystemStream);
    OperatorImplGraph mockOperatorImplGraph = mock(OperatorImplGraph.class);
    when(mockOperatorImplGraph.getInputOperator(anyObject())).thenReturn(null);
    StreamOperatorTask operatorTask = new StreamOperatorTask(mock(OperatorSpecGraph.class));
    operatorTask.setOperatorImplGraph(mockOperatorImplGraph);
    TaskCallback mockTaskCallback = mock(TaskCallback.class);
    operatorTask.processAsync(mockIme, mock(MessageCollector.class), mock(TaskCoordinator.class), mockTaskCallback);
    ArgumentCaptor<Throwable> throwableCaptor = ArgumentCaptor.forClass(Throwable.class);
    verify(mockTaskCallback, only()).failure(throwableCaptor.capture());
    assertEquals(throwableCaptor.getValue().getClass(), SamzaException.class);
    String expectedErrMessage = String.format("InputOperator not found in OperatorGraph for %s. The available input" + " operators are: %s. Please check SystemStream configuration for the `SystemConsumer` and/or task.inputs" + " task configuration.", testSystemStream, mockOperatorImplGraph.getAllInputOperators());
    assertEquals(throwableCaptor.getValue().getMessage(), expectedErrMessage);
}
Also used : OperatorImplGraph(org.apache.samza.operators.impl.OperatorImplGraph) OperatorSpecGraph(org.apache.samza.operators.OperatorSpecGraph) SystemStream(org.apache.samza.system.SystemStream) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Test(org.junit.Test)

Aggregations

OperatorImplGraph (org.apache.samza.operators.impl.OperatorImplGraph)3 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Partition (org.apache.samza.Partition)1 SamzaApplication (org.apache.samza.application.SamzaApplication)1 StreamApplication (org.apache.samza.application.StreamApplication)1 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)1 ApplicationConfig (org.apache.samza.config.ApplicationConfig)1 Config (org.apache.samza.config.Config)1 JobConfig (org.apache.samza.config.JobConfig)1 JobCoordinatorConfig (org.apache.samza.config.JobCoordinatorConfig)1 MapConfig (org.apache.samza.config.MapConfig)1 TaskConfig (org.apache.samza.config.TaskConfig)1 SamzaContainer (org.apache.samza.container.SamzaContainer)1 TaskInstance (org.apache.samza.container.TaskInstance)1 TaskName (org.apache.samza.container.TaskName)1