Search in sources :

Example 26 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class EndOfStreamIntegrationTest method testPipeline.

@Test
public void testPipeline() {
    class PipelineApplication implements StreamApplication {

        @Override
        public void describe(StreamApplicationDescriptor appDescriptor) {
            DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor("test");
            GenericInputDescriptor<KV<String, PageView>> isd = sd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
            appDescriptor.getInputStream(isd).map(KV::getValue).partitionBy(PageView::getMemberId, pv -> pv, KVSerde.of(new IntegerSerde(), new TestTableData.PageViewJsonSerde()), "p1").sink((m, collector, coordinator) -> {
                RECEIVED.add(m.getValue());
            });
        }
    }
    int numPageViews = 40;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<TestTableData.PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(new PipelineApplication()).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
    assertEquals(RECEIVED.size(), numPageViews);
}
Also used : InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) TestRunner(org.apache.samza.test.framework.TestRunner) List(java.util.List) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData(org.apache.samza.test.table.TestTableData) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) KVSerde(org.apache.samza.serializers.KVSerde) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Assert.assertEquals(org.junit.Assert.assertEquals) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestTableData(org.apache.samza.test.table.TestTableData) Test(org.junit.Test)

Example 27 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestStreamApplicationDescriptorImpl method testSetDefaultSystemDescriptorAfterGettingInputStream.

@Test(expected = IllegalStateException.class)
public void testSetDefaultSystemDescriptorAfterGettingInputStream() {
    String streamId = "test-stream-1";
    GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
    GenericInputDescriptor isd = sd.getInputDescriptor(streamId, mock(Serde.class));
    new StreamApplicationDescriptorImpl(appDesc -> {
        appDesc.getInputStream(isd);
        // should throw exception
        appDesc.withDefaultSystem(sd);
    }, getConfig());
}
Also used : Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) KVSerde(org.apache.samza.serializers.KVSerde) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 28 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestStreamApplicationDescriptorImpl method testGetInputStreamWithValueSerde.

@Test
public void testGetInputStreamWithValueSerde() {
    String streamId = "test-stream-1";
    Serde mockValueSerde = mock(Serde.class);
    GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
    GenericInputDescriptor isd = sd.getInputDescriptor(streamId, mockValueSerde);
    StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
        appDesc.getInputStream(isd);
    }, getConfig());
    InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(streamId);
    assertEquals(OpCode.INPUT, inputOpSpec.getOpCode());
    assertEquals(streamId, inputOpSpec.getStreamId());
    assertEquals(isd, streamAppDesc.getInputDescriptors().get(streamId));
    assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde);
    assertEquals(mockValueSerde, inputOpSpec.getValueSerde());
}
Also used : Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) KVSerde(org.apache.samza.serializers.KVSerde) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 29 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestStreamApplicationDescriptorImpl method testGetInputStreamWithNullSerde.

@Test(expected = IllegalArgumentException.class)
public void testGetInputStreamWithNullSerde() {
    GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
    GenericInputDescriptor isd = sd.getInputDescriptor("mockStreamId", null);
    new StreamApplicationDescriptorImpl(appDesc -> {
        appDesc.getInputStream(isd);
    }, getConfig());
}
Also used : GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 30 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestJobGraphJsonGenerator method testRepartitionedWindowStreamApplication.

@Test
public void testRepartitionedWindowStreamApplication() throws Exception {
    Map<String, String> configMap = new HashMap<>();
    configMap.put(JobConfig.JOB_NAME, "test-app");
    configMap.put(JobConfig.JOB_DEFAULT_SYSTEM, "test-system");
    StreamTestUtils.addStreamConfigs(configMap, "PageView", "hdfs", "hdfs:/user/dummy/PageViewEvent");
    StreamTestUtils.addStreamConfigs(configMap, "PageViewCount", "kafka", "PageViewCount");
    Config config = new MapConfig(configMap);
    // set up external partition count
    Map<String, Integer> system1Map = new HashMap<>();
    system1Map.put("hdfs:/user/dummy/PageViewEvent", 512);
    Map<String, Integer> system2Map = new HashMap<>();
    system2Map.put("PageViewCount", 16);
    SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
    SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
    SystemAdmins systemAdmins = mock(SystemAdmins.class);
    when(systemAdmins.getSystemAdmin("hdfs")).thenReturn(systemAdmin1);
    when(systemAdmins.getSystemAdmin("kafka")).thenReturn(systemAdmin2);
    StreamManager streamManager = new StreamManager(systemAdmins);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        KVSerde<String, PageViewEvent> pvSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class));
        GenericSystemDescriptor isd = new GenericSystemDescriptor("hdfs", "mockSystemFactoryClass");
        GenericInputDescriptor<KV<String, PageViewEvent>> pageView = isd.getInputDescriptor("PageView", pvSerde);
        KVSerde<String, Long> pvcSerde = KVSerde.of(new StringSerde(), new LongSerde());
        GenericSystemDescriptor osd = new GenericSystemDescriptor("kafka", "mockSystemFactoryClass");
        GenericOutputDescriptor<KV<String, Long>> pageViewCount = osd.getOutputDescriptor("PageViewCount", pvcSerde);
        MessageStream<KV<String, PageViewEvent>> inputStream = appDesc.getInputStream(pageView);
        OutputStream<KV<String, Long>> outputStream = appDesc.getOutputStream(pageViewCount);
        inputStream.partitionBy(kv -> kv.getValue().getCountry(), kv -> kv.getValue(), pvSerde, "keyed-by-country").window(Windows.keyedTumblingWindow(kv -> kv.getValue().getCountry(), Duration.ofSeconds(10L), () -> 0L, (m, c) -> c + 1L, new StringSerde(), new LongSerde()), "count-by-country").map(pane -> new KV<>(pane.getKey().getKey(), pane.getMessage())).sendTo(outputStream);
    }, config);
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    ExecutionPlan plan = planner.plan(graphSpec);
    String json = plan.getPlanAsJson();
    System.out.println(json);
    // deserialize
    ObjectMapper mapper = new ObjectMapper();
    JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
    JobGraphJsonGenerator.OperatorGraphJson operatorGraphJson = nodes.jobs.get(0).operatorGraph;
    assertEquals(2, operatorGraphJson.inputStreams.size());
    assertEquals(4, operatorGraphJson.operators.size());
    assertEquals(1, nodes.sourceStreams.size());
    assertEquals(1, nodes.sinkStreams.size());
    assertEquals(1, nodes.intermediateStreams.size());
    // verify partitionBy op output to the intermdiate stream of the same id
    assertEquals(operatorGraphJson.operators.get("test-app-1-partition_by-keyed-by-country").get("outputStreamId"), "test-app-1-partition_by-keyed-by-country");
    assertEquals(operatorGraphJson.operators.get("test-app-1-send_to-5").get("outputStreamId"), "PageViewCount");
}
Also used : LongSerde(org.apache.samza.serializers.LongSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) StringSerde(org.apache.samza.serializers.StringSerde) HashSet(java.util.HashSet) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) TestExecutionPlanner(org.apache.samza.execution.TestExecutionPlanner) Duration(java.time.Duration) Map(java.util.Map) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) LongSerde(org.apache.samza.serializers.LongSerde) Before(org.junit.Before) Windows(org.apache.samza.operators.windows.Windows) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Set(java.util.Set) Matchers(org.hamcrest.Matchers) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Collectors(java.util.stream.Collectors) Mockito(org.mockito.Mockito) OperatorSpecs(org.apache.samza.operators.spec.OperatorSpecs) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) SystemAdmin(org.apache.samza.system.SystemAdmin) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) Assert(org.junit.Assert) Collections(java.util.Collections) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) StringSerde(org.apache.samza.serializers.StringSerde) HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) TestExecutionPlanner(org.apache.samza.execution.TestExecutionPlanner) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) SystemAdmins(org.apache.samza.system.SystemAdmins) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) KV(org.apache.samza.operators.KV) SystemAdmin(org.apache.samza.system.SystemAdmin) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Aggregations

GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)31 Test (org.junit.Test)29 NoOpSerde (org.apache.samza.serializers.NoOpSerde)23 KV (org.apache.samza.operators.KV)20 KVSerde (org.apache.samza.serializers.KVSerde)20 GenericSystemDescriptor (org.apache.samza.system.descriptors.GenericSystemDescriptor)20 HashMap (java.util.HashMap)19 Duration (java.time.Duration)18 Map (java.util.Map)17 MapConfig (org.apache.samza.config.MapConfig)17 Serde (org.apache.samza.serializers.Serde)17 List (java.util.List)16 IntegerSerde (org.apache.samza.serializers.IntegerSerde)16 ArrayList (java.util.ArrayList)14 Function (java.util.function.Function)12 StreamApplication (org.apache.samza.application.StreamApplication)12 JobConfig (org.apache.samza.config.JobConfig)12 Collectors (java.util.stream.Collectors)11 Config (org.apache.samza.config.Config)11 StringSerde (org.apache.samza.serializers.StringSerde)11