Search in sources :

Example 1 with MessageStream

use of org.apache.samza.operators.MessageStream in project samza by apache.

the class ExecutionPlanner method findReachableJoins.

/**
   * This function traverses the StreamGraph to find and update mappings for all Joins reachable from this input StreamEdge
   * @param inputMessageStream next input MessageStream to traverse {@link MessageStream}
   * @param sourceStreamEdge source {@link StreamEdge}
   * @param joinSpecToStreamEdges mapping from join spec to its source {@link StreamEdge}s
   * @param streamEdgeToJoinSpecs mapping from source {@link StreamEdge} to the join specs that consumes it
   * @param outputStreamToJoinSpec mapping from the output stream to the join spec
   * @param joinQ queue that contains joinSpecs where at least one of the input stream edge partitions is known.
   */
private static void findReachableJoins(MessageStream inputMessageStream, StreamEdge sourceStreamEdge, Multimap<OperatorSpec, StreamEdge> joinSpecToStreamEdges, Multimap<StreamEdge, OperatorSpec> streamEdgeToJoinSpecs, Map<MessageStream, OperatorSpec> outputStreamToJoinSpec, Queue<OperatorSpec> joinQ, Set<OperatorSpec> visited) {
    Collection<OperatorSpec> specs = ((MessageStreamImpl) inputMessageStream).getRegisteredOperatorSpecs();
    for (OperatorSpec spec : specs) {
        if (spec instanceof PartialJoinOperatorSpec) {
            // every join will have two partial join operators
            // we will choose one of them in order to consolidate the inputs
            // the first one who registered with the outputStreamToJoinSpec will win
            MessageStream output = spec.getNextStream();
            OperatorSpec joinSpec = outputStreamToJoinSpec.get(output);
            if (joinSpec == null) {
                joinSpec = spec;
                outputStreamToJoinSpec.put(output, joinSpec);
            }
            joinSpecToStreamEdges.put(joinSpec, sourceStreamEdge);
            streamEdgeToJoinSpecs.put(sourceStreamEdge, joinSpec);
            if (!visited.contains(joinSpec) && sourceStreamEdge.getPartitionCount() > 0) {
                // put the joins with known input partitions into the queue
                joinQ.add(joinSpec);
                visited.add(joinSpec);
            }
        }
        if (spec.getNextStream() != null) {
            findReachableJoins(spec.getNextStream(), sourceStreamEdge, joinSpecToStreamEdges, streamEdgeToJoinSpecs, outputStreamToJoinSpec, joinQ, visited);
        }
    }
}
Also used : OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) PartialJoinOperatorSpec(org.apache.samza.operators.spec.PartialJoinOperatorSpec) MessageStreamImpl(org.apache.samza.operators.MessageStreamImpl) MessageStream(org.apache.samza.operators.MessageStream) PartialJoinOperatorSpec(org.apache.samza.operators.spec.PartialJoinOperatorSpec)

Example 2 with MessageStream

use of org.apache.samza.operators.MessageStream in project samza by apache.

the class TestJobGraphJsonGenerator method test.

@Test
public void test() throws Exception {
    /**
     * the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
     *
     *                               input1 (64) -> map -> join -> output1 (8)
     *                                                       |
     *          input2 (16) -> partitionBy ("64") -> filter -|
     *                                                       |
     * input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
     *
     */
    Map<String, String> configMap = new HashMap<>();
    configMap.put(JobConfig.JOB_NAME(), "test-app");
    configMap.put(JobConfig.JOB_DEFAULT_SYSTEM(), "test-system");
    Config config = new MapConfig(configMap);
    StreamSpec input1 = new StreamSpec("input1", "input1", "system1");
    StreamSpec input2 = new StreamSpec("input2", "input2", "system2");
    StreamSpec input3 = new StreamSpec("input3", "input3", "system2");
    StreamSpec output1 = new StreamSpec("output1", "output1", "system1");
    StreamSpec output2 = new StreamSpec("output2", "output2", "system2");
    ApplicationRunner runner = mock(ApplicationRunner.class);
    when(runner.getStreamSpec("input1")).thenReturn(input1);
    when(runner.getStreamSpec("input2")).thenReturn(input2);
    when(runner.getStreamSpec("input3")).thenReturn(input3);
    when(runner.getStreamSpec("output1")).thenReturn(output1);
    when(runner.getStreamSpec("output2")).thenReturn(output2);
    // intermediate streams used in tests
    when(runner.getStreamSpec("test-app-1-partition_by-0")).thenReturn(new StreamSpec("test-app-1-partition_by-0", "test-app-1-partition_by-0", "default-system"));
    when(runner.getStreamSpec("test-app-1-partition_by-1")).thenReturn(new StreamSpec("test-app-1-partition_by-1", "test-app-1-partition_by-1", "default-system"));
    when(runner.getStreamSpec("test-app-1-partition_by-4")).thenReturn(new StreamSpec("test-app-1-partition_by-4", "test-app-1-partition_by-4", "default-system"));
    // set up external partition count
    Map<String, Integer> system1Map = new HashMap<>();
    system1Map.put("input1", 64);
    system1Map.put("output1", 8);
    Map<String, Integer> system2Map = new HashMap<>();
    system2Map.put("input2", 16);
    system2Map.put("input3", 32);
    system2Map.put("output2", 16);
    Map<String, SystemAdmin> systemAdmins = new HashMap<>();
    SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
    SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
    systemAdmins.put("system1", systemAdmin1);
    systemAdmins.put("system2", systemAdmin2);
    StreamManager streamManager = new StreamManager(systemAdmins);
    StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
    BiFunction mockBuilder = mock(BiFunction.class);
    MessageStream m1 = streamGraph.getInputStream("input1", mockBuilder).map(m -> m);
    MessageStream m2 = streamGraph.getInputStream("input2", mockBuilder).partitionBy(m -> "haha").filter(m -> true);
    MessageStream m3 = streamGraph.getInputStream("input3", mockBuilder).filter(m -> true).partitionBy(m -> "hehe").map(m -> m);
    Function mockFn = mock(Function.class);
    OutputStream<Object, Object, Object> outputStream1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
    OutputStream<Object, Object, Object> outputStream2 = streamGraph.getOutputStream("output2", mockFn, mockFn);
    m1.join(m2, mock(JoinFunction.class), Duration.ofHours(2)).sendTo(outputStream1);
    m2.sink((message, collector, coordinator) -> {
    });
    m3.join(m2, mock(JoinFunction.class), Duration.ofHours(1)).sendTo(outputStream2);
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    ExecutionPlan plan = planner.plan(streamGraph);
    String json = plan.getPlanAsJson();
    System.out.println(json);
    // deserialize
    ObjectMapper mapper = new ObjectMapper();
    JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
    assertTrue(nodes.jobs.get(0).operatorGraph.inputStreams.size() == 5);
    assertTrue(nodes.jobs.get(0).operatorGraph.operators.size() == 13);
    assertTrue(nodes.sourceStreams.size() == 3);
    assertTrue(nodes.sinkStreams.size() == 2);
    assertTrue(nodes.intermediateStreams.size() == 2);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) BiFunction(java.util.function.BiFunction) JobConfig(org.apache.samza.config.JobConfig) Assert.assertTrue(org.junit.Assert.assertTrue) HashMap(java.util.HashMap) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Function(java.util.function.Function) TestExecutionPlanner.createSystemAdmin(org.apache.samza.execution.TestExecutionPlanner.createSystemAdmin) StreamGraphImpl(org.apache.samza.operators.StreamGraphImpl) Duration(java.time.Duration) Map(java.util.Map) SystemAdmin(org.apache.samza.system.SystemAdmin) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) OutputStream(org.apache.samza.operators.OutputStream) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) MessageStream(org.apache.samza.operators.MessageStream) Mockito.mock(org.mockito.Mockito.mock) StreamSpec(org.apache.samza.system.StreamSpec) HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) BiFunction(java.util.function.BiFunction) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Function(java.util.function.Function) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) BiFunction(java.util.function.BiFunction) MessageStream(org.apache.samza.operators.MessageStream) StreamGraphImpl(org.apache.samza.operators.StreamGraphImpl) MapConfig(org.apache.samza.config.MapConfig) TestExecutionPlanner.createSystemAdmin(org.apache.samza.execution.TestExecutionPlanner.createSystemAdmin) SystemAdmin(org.apache.samza.system.SystemAdmin) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) Test(org.junit.Test)

Example 3 with MessageStream

use of org.apache.samza.operators.MessageStream in project samza by apache.

the class TestOperatorImplGraph method testBroadcastChain.

@Test
public void testBroadcastChain() {
    String inputStreamId = "input";
    String inputSystem = "input-system";
    String inputPhysicalName = "input-stream";
    HashMap<String, String> configMap = new HashMap<>();
    configMap.put(JobConfig.JOB_NAME, "test-job");
    configMap.put(JobConfig.JOB_ID, "1");
    StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName);
    Config config = new MapConfig(configMap);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
        MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
        inputStream.filter(mock(FilterFunction.class));
        inputStream.map(mock(MapFunction.class));
    }, config);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
    assertEquals(2, inputOpImpl.registeredOperators.size());
    assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER));
    assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) FilterFunction(org.apache.samza.operators.functions.FilterFunction) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemStream(org.apache.samza.system.SystemStream) MapFunction(org.apache.samza.operators.functions.MapFunction) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 4 with MessageStream

use of org.apache.samza.operators.MessageStream in project samza by apache.

the class TestOperatorImplGraph method testGetOutputToInputStreams.

@Test
public void testGetOutputToInputStreams() {
    String inputStreamId1 = "input1";
    String inputStreamId2 = "input2";
    String inputStreamId3 = "input3";
    String inputSystem = "input-system";
    String outputStreamId1 = "output1";
    String outputStreamId2 = "output2";
    String outputSystem = "output-system";
    String intStreamId1 = "test-app-1-partition_by-p1";
    String intStreamId2 = "test-app-1-partition_by-p2";
    String intSystem = "test-system";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "test-app");
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
        GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
        GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
        GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
        GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
        GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
        MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
        MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
        MessageStream messageStream3 = appDesc.getInputStream(inputDescriptor3).filter(m -> true).partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p1").map(m -> m);
        OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
        OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);
        messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2").sendTo(outputStream1);
        messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
    }, config);
    Multimap<SystemStream, SystemStream> outputToInput = OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
    Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
    assertEquals(inputs.size(), 2);
    assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
    assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));
    inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
    assertEquals(inputs.size(), 1);
    assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemStream(org.apache.samza.system.SystemStream) StreamConfig(org.apache.samza.config.StreamConfig) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) JoinFunction(org.apache.samza.operators.functions.JoinFunction) MessageStream(org.apache.samza.operators.MessageStream) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 5 with MessageStream

use of org.apache.samza.operators.MessageStream in project samza by apache.

the class TestPartitionByOperatorSpec method testPartitionByWithNoSerde.

@Test
public void testPartitionByWithNoSerde() {
    MapFunction<Object, String> keyFn = m -> m.toString();
    MapFunction<Object, Object> valueFn = m -> m;
    StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
        inputStream.partitionBy(keyFn, valueFn, mock(KVSerde.class), testRepartitionedStreamName);
    }, getConfig());
    InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
    assertNotNull(inputOpSpec);
    assertNull(inputOpSpec.getKeySerde());
    assertNull(inputOpSpec.getValueSerde());
    assertTrue(inputOpSpec.isKeyed());
    assertNull(inputOpSpec.getScheduledFn());
    assertNull(inputOpSpec.getWatermarkFn());
    InputOperatorSpec originInputSpec = streamAppDesc.getInputOperators().get(testInputDescriptor.getStreamId());
    assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
    PartitionByOperatorSpec reparOpSpec = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
    assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
    assertEquals(reparOpSpec.getKeyFunction(), keyFn);
    assertEquals(reparOpSpec.getValueFunction(), valueFn);
    assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
    assertNull(reparOpSpec.getScheduledFn());
    assertNull(reparOpSpec.getWatermarkFn());
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) ScheduledFunction(org.apache.samza.operators.functions.ScheduledFunction) Assert.assertNotNull(org.junit.Assert.assertNotNull) Collection(java.util.Collection) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) Assert.assertTrue(org.junit.Assert.assertTrue) HashMap(java.util.HashMap) Scheduler(org.apache.samza.operators.Scheduler) Serde(org.apache.samza.serializers.Serde) Test(org.junit.Test) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) OperatorSpecGraph(org.apache.samza.operators.OperatorSpecGraph) MapFunction(org.apache.samza.operators.functions.MapFunction) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) Assert.assertNull(org.junit.Assert.assertNull) Map(java.util.Map) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) MapConfig(org.apache.samza.config.MapConfig) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MessageStream(org.apache.samza.operators.MessageStream) Mockito.mock(org.mockito.Mockito.mock) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MessageStream(org.apache.samza.operators.MessageStream) Test(org.junit.Test)

Aggregations

MessageStream (org.apache.samza.operators.MessageStream)34 Config (org.apache.samza.config.Config)22 KVSerde (org.apache.samza.serializers.KVSerde)21 Duration (java.time.Duration)19 HashMap (java.util.HashMap)19 OutputStream (org.apache.samza.operators.OutputStream)19 KV (org.apache.samza.operators.KV)18 Map (java.util.Map)17 ArrayList (java.util.ArrayList)16 List (java.util.List)16 StringSerde (org.apache.samza.serializers.StringSerde)16 Test (org.junit.Test)16 Collection (java.util.Collection)14 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)14 JobConfig (org.apache.samza.config.JobConfig)14 MapConfig (org.apache.samza.config.MapConfig)14 Windows (org.apache.samza.operators.windows.Windows)13 Collections (java.util.Collections)12 JoinFunction (org.apache.samza.operators.functions.JoinFunction)12 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)12