Search in sources :

Example 16 with OutputStream

use of org.apache.samza.operators.OutputStream in project samza by apache.

the class TestJobGraphJsonGenerator method testRepartitionedJoinStreamApplication.

@Test
public void testRepartitionedJoinStreamApplication() throws Exception {
    /**
     * the graph looks like the following.
     * number in parentheses () indicates number of stream partitions.
     * number in parentheses in quotes ("") indicates expected partition count.
     * number in square brackets [] indicates operator ID.
     *
     * input3 (32) -> filter [7] -> partitionBy [8] ("64") -> map [10] -> join [14] -> sendTo(output2) [15] (16)
     *                                                                   |
     *              input2 (16) -> partitionBy [3] ("64") -> filter [5] -| -> sink [13]
     *                                                                   |
     *                                         input1 (64) -> map [1] -> join [11] -> sendTo(output1) [12] (8)
     */
    Map<String, String> configMap = new HashMap<>();
    configMap.put(JobConfig.JOB_NAME, "test-app");
    configMap.put(JobConfig.JOB_DEFAULT_SYSTEM, "test-system");
    StreamTestUtils.addStreamConfigs(configMap, "input1", "system1", "input1");
    StreamTestUtils.addStreamConfigs(configMap, "input2", "system2", "input2");
    StreamTestUtils.addStreamConfigs(configMap, "input3", "system2", "input3");
    StreamTestUtils.addStreamConfigs(configMap, "output1", "system1", "output1");
    StreamTestUtils.addStreamConfigs(configMap, "output2", "system2", "output2");
    Config config = new MapConfig(configMap);
    // set up external partition count
    Map<String, Integer> system1Map = new HashMap<>();
    system1Map.put("input1", 64);
    system1Map.put("output1", 8);
    Map<String, Integer> system2Map = new HashMap<>();
    system2Map.put("input2", 16);
    system2Map.put("input3", 32);
    system2Map.put("output2", 16);
    SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
    SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
    SystemAdmins systemAdmins = mock(SystemAdmins.class);
    when(systemAdmins.getSystemAdmin("system1")).thenReturn(systemAdmin1);
    when(systemAdmins.getSystemAdmin("system2")).thenReturn(systemAdmin2);
    StreamManager streamManager = new StreamManager(systemAdmins);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        KVSerde<Object, Object> kvSerde = new KVSerde<>(new NoOpSerde(), new NoOpSerde());
        String mockSystemFactoryClass = "factory.class.name";
        GenericSystemDescriptor system1 = new GenericSystemDescriptor("system1", mockSystemFactoryClass);
        GenericSystemDescriptor system2 = new GenericSystemDescriptor("system2", mockSystemFactoryClass);
        GenericInputDescriptor<KV<Object, Object>> input1Descriptor = system1.getInputDescriptor("input1", kvSerde);
        GenericInputDescriptor<KV<Object, Object>> input2Descriptor = system2.getInputDescriptor("input2", kvSerde);
        GenericInputDescriptor<KV<Object, Object>> input3Descriptor = system2.getInputDescriptor("input3", kvSerde);
        GenericOutputDescriptor<KV<Object, Object>> output1Descriptor = system1.getOutputDescriptor("output1", kvSerde);
        GenericOutputDescriptor<KV<Object, Object>> output2Descriptor = system2.getOutputDescriptor("output2", kvSerde);
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor).map(m -> m);
        MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor).partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").filter(m -> true);
        MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor).filter(m -> true).partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2").map(m -> m);
        OutputStream<KV<Object, Object>> outputStream1 = appDesc.getOutputStream(output1Descriptor);
        OutputStream<KV<Object, Object>> outputStream2 = appDesc.getOutputStream(output2Descriptor);
        messageStream1.join(messageStream2, (JoinFunction<Object, KV<Object, Object>, KV<Object, Object>, KV<Object, Object>>) mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").sendTo(outputStream1);
        messageStream2.sink((message, collector, coordinator) -> {
        });
        messageStream3.join(messageStream2, (JoinFunction<Object, KV<Object, Object>, KV<Object, Object>, KV<Object, Object>>) mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
    }, config);
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    ExecutionPlan plan = planner.plan(graphSpec);
    String json = plan.getPlanAsJson();
    System.out.println(json);
    // deserialize
    ObjectMapper mapper = new ObjectMapper();
    JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
    assertEquals(5, nodes.jobs.get(0).operatorGraph.inputStreams.size());
    assertEquals(11, nodes.jobs.get(0).operatorGraph.operators.size());
    assertEquals(3, nodes.sourceStreams.size());
    assertEquals(2, nodes.sinkStreams.size());
    assertEquals(2, nodes.intermediateStreams.size());
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) StringSerde(org.apache.samza.serializers.StringSerde) HashSet(java.util.HashSet) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) TestExecutionPlanner(org.apache.samza.execution.TestExecutionPlanner) Duration(java.time.Duration) Map(java.util.Map) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) LongSerde(org.apache.samza.serializers.LongSerde) Before(org.junit.Before) Windows(org.apache.samza.operators.windows.Windows) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Set(java.util.Set) Matchers(org.hamcrest.Matchers) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Collectors(java.util.stream.Collectors) Mockito(org.mockito.Mockito) OperatorSpecs(org.apache.samza.operators.spec.OperatorSpecs) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) SystemAdmin(org.apache.samza.system.SystemAdmin) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) Assert(org.junit.Assert) Collections(java.util.Collections) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) KVSerde(org.apache.samza.serializers.KVSerde) HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) TestExecutionPlanner(org.apache.samza.execution.TestExecutionPlanner) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) SystemAdmins(org.apache.samza.system.SystemAdmins) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) KV(org.apache.samza.operators.KV) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) JoinFunction(org.apache.samza.operators.functions.JoinFunction) NoOpSerde(org.apache.samza.serializers.NoOpSerde) SystemAdmin(org.apache.samza.system.SystemAdmin) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 17 with OutputStream

use of org.apache.samza.operators.OutputStream in project samza by apache.

the class TestExecutionPlanner method testMaxPartitionLimit.

@Test
public void testMaxPartitionLimit() {
    int partitionLimit = IntermediateStreamManager.MAX_INFERRED_PARTITIONS;
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input4Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        input1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").map(kv -> kv).sendTo(output1);
    }, config);
    JobGraph jobGraph = (JobGraph) planner.plan(graphSpec);
    // Partitions should be the same as input1
    jobGraph.getIntermediateStreams().forEach(edge -> {
        // max of input1 and output1
        assertEquals(partitionLimit, edge.getPartitionCount());
    });
}
Also used : Arrays(java.util.Arrays) TaskApplicationDescriptorImpl(org.apache.samza.application.descriptors.TaskApplicationDescriptorImpl) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Duration(java.time.Duration) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Mockito.doReturn(org.mockito.Mockito.doReturn) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) Table(org.apache.samza.table.Table) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) Mockito.mock(org.mockito.Mockito.mock) SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) StreamConfig(org.apache.samza.config.StreamConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) ApplicationDescriptor(org.apache.samza.application.descriptors.ApplicationDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Windows(org.apache.samza.operators.windows.Windows) TaskConfig(org.apache.samza.config.TaskConfig) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor) SamzaException(org.apache.samza.SamzaException) SystemAdmin(org.apache.samza.system.SystemAdmin) Assert(org.junit.Assert) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) Test(org.junit.Test)

Aggregations

MessageStream (org.apache.samza.operators.MessageStream)17 OutputStream (org.apache.samza.operators.OutputStream)17 Config (org.apache.samza.config.Config)16 Duration (java.time.Duration)14 KV (org.apache.samza.operators.KV)12 Windows (org.apache.samza.operators.windows.Windows)12 KVSerde (org.apache.samza.serializers.KVSerde)12 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)11 StringSerde (org.apache.samza.serializers.StringSerde)11 HashMap (java.util.HashMap)10 Map (java.util.Map)10 Collections (java.util.Collections)9 JobConfig (org.apache.samza.config.JobConfig)9 MapConfig (org.apache.samza.config.MapConfig)9 JoinFunction (org.apache.samza.operators.functions.JoinFunction)9 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)9 Test (org.junit.Test)9 Collection (java.util.Collection)8 List (java.util.List)8 Set (java.util.Set)8