Search in sources :

Example 51 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class EndOfStreamIntegrationTest method testPipeline.

@Test
public void testPipeline() {
    class PipelineApplication implements StreamApplication {

        @Override
        public void describe(StreamApplicationDescriptor appDescriptor) {
            DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor("test");
            GenericInputDescriptor<KV<String, PageView>> isd = sd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
            appDescriptor.getInputStream(isd).map(KV::getValue).partitionBy(PageView::getMemberId, pv -> pv, KVSerde.of(new IntegerSerde(), new TestTableData.PageViewJsonSerde()), "p1").sink((m, collector, coordinator) -> {
                RECEIVED.add(m.getValue());
            });
        }
    }
    int numPageViews = 40;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<TestTableData.PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(new PipelineApplication()).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
    assertEquals(RECEIVED.size(), numPageViews);
}
Also used : InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) TestRunner(org.apache.samza.test.framework.TestRunner) List(java.util.List) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData(org.apache.samza.test.table.TestTableData) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) KVSerde(org.apache.samza.serializers.KVSerde) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Assert.assertEquals(org.junit.Assert.assertEquals) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestTableData(org.apache.samza.test.table.TestTableData) Test(org.junit.Test)

Example 52 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class StreamApplicationIntegrationTest method testStatefulJoinWithLocalTable.

@Test
public void testStatefulJoinWithLocalTable() {
    Random random = new Random();
    List<KV<String, TestTableData.PageView>> pageViews = Arrays.asList(TestTableData.generatePageViews(10)).stream().map(x -> KV.of(PAGEKEYS[random.nextInt(PAGEKEYS.length)], x)).collect(Collectors.toList());
    List<KV<String, TestTableData.Profile>> profiles = Arrays.asList(TestTableData.generateProfiles(10)).stream().map(x -> KV.of(PAGEKEYS[random.nextInt(PAGEKEYS.length)], x)).collect(Collectors.toList());
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<KV<String, TestTableData.PageView>> pageViewStreamDesc = isd.getInputDescriptor("PageView", new NoOpSerde<KV<String, TestTableData.PageView>>());
    InMemoryInputDescriptor<KV<String, TestTableData.Profile>> profileStreamDesc = isd.getInputDescriptor("Profile", new NoOpSerde<KV<String, TestTableData.Profile>>()).shouldBootstrap();
    InMemoryOutputDescriptor<TestTableData.EnrichedPageView> outputStreamDesc = isd.getOutputDescriptor("EnrichedPageView", new NoOpSerde<>());
    InMemoryOutputDescriptor<String> joinKeysDescriptor = isd.getOutputDescriptor("JoinPageKeys", new NoOpSerde<>());
    TestRunner.of(new PageViewProfileViewJoinApplication()).addInputStream(pageViewStreamDesc, pageViews).addInputStream(profileStreamDesc, profiles).addOutputStream(outputStreamDesc, 1).addOutputStream(joinKeysDescriptor, 1).run(Duration.ofSeconds(2));
    Assert.assertEquals(10, TestRunner.consumeStream(outputStreamDesc, Duration.ofSeconds(1)).get(0).size());
    Assert.assertEquals(10, TestRunner.consumeStream(joinKeysDescriptor, Duration.ofSeconds(1)).get(0).size());
}
Also used : RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) PageView(org.apache.samza.test.controlmessages.TestData.PageView) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) TestTableData(org.apache.samza.test.table.TestTableData) SystemStream(org.apache.samza.system.SystemStream) Duration(java.time.Duration) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) Logger(org.slf4j.Logger) TestData(org.apache.samza.test.controlmessages.TestData) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) Test(org.junit.Test) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) PageViewToProfileJoinFunction(org.apache.samza.test.table.PageViewToProfileJoinFunction) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Random(java.util.Random) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestTableData(org.apache.samza.test.table.TestTableData) Test(org.junit.Test)

Example 53 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestContext method testSyncTaskWithMultiplePartitionMultithreadedWithCustomIME.

@Test
public void testSyncTaskWithMultiplePartitionMultithreadedWithCustomIME() throws Exception {
    Map<Integer, List<KV>> inputPartitionData = new HashMap<>();
    Map<Integer, List<KV>> inputPartitionIME = new HashMap<>();
    Map<Integer, List<Integer>> expectedOutputPartitionData = new HashMap<>();
    genData(inputPartitionData, expectedOutputPartitionData);
    for (Map.Entry<Integer, List<KV>> entry : inputPartitionData.entrySet()) {
        Integer partitionId = entry.getKey();
        List<KV> messages = entry.getValue();
        SystemStreamPartition ssp = new SystemStreamPartition("test", "input", new Partition(partitionId));
        inputPartitionIME.put(partitionId, new ArrayList<>());
        int offset = 0;
        for (KV message : messages) {
            IncomingMessageEnvelope ime = new IncomingMessageEnvelope(ssp, String.valueOf(offset++), message.key, message.getValue());
            inputPartitionIME.get(partitionId).add(KV.of(message.key, ime));
        }
    }
    syncTaskWithMultiplePartitionMultithreadedHelper(inputPartitionIME, expectedOutputPartitionData);
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) KV(org.apache.samza.operators.KV) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 54 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestContext method syncTaskWithMultiplePartitionMultithreadedHelper.

void syncTaskWithMultiplePartitionMultithreadedHelper(Map<Integer, List<KV>> inputPartitionData, Map<Integer, List<Integer>> expectedOutputPartitionData) throws Exception {
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<KV> imid = isd.getInputDescriptor("input", new NoOpSerde<KV>());
    InMemoryOutputDescriptor<Integer> imod = isd.getOutputDescriptor("output", new NoOpSerde<Integer>());
    TestRunner.of(MyStreamTestTask.class).addInputStream(imid, inputPartitionData).addOutputStream(imod, 5).addConfig("job.container.thread.pool.size", "4").addExternalContext(new TestContext(10)).run(Duration.ofSeconds(2));
    StreamAssert.containsInOrder(expectedOutputPartitionData, imod, Duration.ofMillis(1000));
}
Also used : KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)

Example 55 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestContext method testSyncTaskWithMultiplePartition.

@Test
public void testSyncTaskWithMultiplePartition() throws Exception {
    Map<Integer, List<KV>> inputPartitionData = new HashMap<>();
    Map<Integer, List<Integer>> expectedOutputPartitionData = new HashMap<>();
    genData(inputPartitionData, expectedOutputPartitionData);
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<KV> imid = isd.getInputDescriptor("input", new NoOpSerde<KV>());
    InMemoryOutputDescriptor<Integer> imod = isd.getOutputDescriptor("output", new NoOpSerde<Integer>());
    TestRunner.of(MyStreamTestTask.class).addInputStream(imid, inputPartitionData).addOutputStream(imod, 5).addExternalContext(new TestContext(10)).run(Duration.ofSeconds(2));
    StreamAssert.containsInOrder(expectedOutputPartitionData, imod, Duration.ofMillis(1000));
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test)

Aggregations

KV (org.apache.samza.operators.KV)68 Test (org.junit.Test)38 StringSerde (org.apache.samza.serializers.StringSerde)33 KVSerde (org.apache.samza.serializers.KVSerde)30 HashMap (java.util.HashMap)28 NoOpSerde (org.apache.samza.serializers.NoOpSerde)26 List (java.util.List)25 Duration (java.time.Duration)24 ArrayList (java.util.ArrayList)24 StreamApplication (org.apache.samza.application.StreamApplication)22 Config (org.apache.samza.config.Config)22 Map (java.util.Map)20 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)20 Table (org.apache.samza.table.Table)19 MapConfig (org.apache.samza.config.MapConfig)18 MessageStream (org.apache.samza.operators.MessageStream)18 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)18 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)17 Collectors (java.util.stream.Collectors)16 SamzaException (org.apache.samza.SamzaException)16