Search in sources :

Example 21 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class MyStatefulApplication method describe.

@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(inputSystem);
    KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());
    KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(inputTopic, serde);
    TaskApplicationDescriptor desc = appDescriptor.withInputStream(isd).withTaskFactory((StreamTaskFactory) () -> new MyTask(storeToChangelog.keySet()));
    storeToChangelog.forEach((storeName, changelogTopic) -> {
        RocksDbTableDescriptor<String, String> td = new RocksDbTableDescriptor<>(storeName, serde).withChangelogStream(changelogTopic).withChangelogReplicationFactor(1);
        desc.withTable(td);
    });
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) TaskApplicationDescriptor(org.apache.samza.application.descriptors.TaskApplicationDescriptor) RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)

Example 22 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestRepartitionWindowApp method testRepartitionedSessionWindowCounter.

@Test
public void testRepartitionedSessionWindowCounter() throws Exception {
    Map<Integer, List<KV<String, PageView>>> pageViews = new HashMap<>();
    pageViews.put(0, ImmutableList.of(KV.of("userId1", new PageView("india", "5.com", "userId1")), KV.of("userId1", new PageView("india", "2.com", "userId1"))));
    pageViews.put(1, ImmutableList.of(KV.of("userId2", new PageView("china", "4.com", "userId2")), KV.of("userId1", new PageView("india", "3.com", "userId1"))));
    pageViews.put(2, ImmutableList.of(KV.of("userId1", new PageView("india", "1.com", "userId1"))));
    InMemorySystemDescriptor sd = new InMemorySystemDescriptor(SYSTEM);
    InMemoryInputDescriptor<KV<String, PageView>> inputDescriptor = sd.getInputDescriptor(INPUT_TOPIC, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
    /*
     * Technically, this should have a message type of KV, because a KV is passed to sendTo, but
     * StreamAssert.containsInAnyOrder requires the type to match the output type of the actual messages. In
     * high-level, sendTo splits up the KV, so the actual messages are just the "V" part of the KV.
     * TestRunner only uses NoOpSerde anyways, so it doesn't matter if the typing isn't KV.
     */
    InMemoryOutputDescriptor<String> outputDescriptor = sd.getOutputDescriptor(OUTPUT_TOPIC, new NoOpSerde<>());
    TestRunner.of(new RepartitionWindowApp()).addInputStream(inputDescriptor, pageViews).addOutputStream(outputDescriptor, 1).addConfig("task.window.ms", "1000").run(Duration.ofSeconds(10));
    StreamAssert.containsInAnyOrder(Arrays.asList("userId1 4", "userId2 1"), outputDescriptor, Duration.ofSeconds(1));
}
Also used : PageView(org.apache.samza.test.operator.data.PageView) HashMap(java.util.HashMap) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Test(org.junit.Test)

Example 23 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class SessionWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 24 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TumblingWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 25 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestAvroRelConversion method testComplexUnionConversionShouldWorkWithBothStringAndIntTypes.

@Test
public void testComplexUnionConversionShouldWorkWithBothStringAndIntTypes() throws Exception {
    // ComplexUnion is a nested avro non-nullable union-type with both String and Integer type
    // Test the complex-union conversion for String type.
    GenericData.Record record = new GenericData.Record(ComplexUnion.SCHEMA$);
    record.put("non_nullable_union_value", testStrValue);
    ComplexUnion complexUnion = new ComplexUnion();
    complexUnion.non_nullable_union_value = testStrValue;
    byte[] serializedData = bytesFromGenericRecord(record);
    GenericRecord genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    SamzaSqlRelMessage message = complexUnionAvroRelConverter.convertToRelMessage(new KV<>("key", genericRecord));
    Assert.assertEquals(testStrValue, message.getSamzaSqlRelRecord().getField("non_nullable_union_value").get().toString());
    serializedData = encodeAvroSpecificRecord(ComplexUnion.class, complexUnion);
    genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    Assert.assertEquals(testStrValue, genericRecord.get("non_nullable_union_value").toString());
    // Testing the complex-union conversion for Integer type
    record.put("non_nullable_union_value", Integer.valueOf(123));
    complexUnion.non_nullable_union_value = Integer.valueOf(123);
    serializedData = bytesFromGenericRecord(record);
    genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    message = complexUnionAvroRelConverter.convertToRelMessage(new KV<>("key", genericRecord));
    Assert.assertEquals(Integer.valueOf(123), message.getSamzaSqlRelRecord().getField("non_nullable_union_value").get());
    serializedData = encodeAvroSpecificRecord(ComplexUnion.class, complexUnion);
    genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    Assert.assertEquals(Integer.valueOf(123), genericRecord.get("non_nullable_union_value"));
}
Also used : ComplexUnion(org.apache.samza.sql.avro.schemas.ComplexUnion) ComplexRecord(org.apache.samza.sql.avro.schemas.ComplexRecord) StreetNumRecord(org.apache.samza.sql.avro.schemas.StreetNumRecord) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) SimpleRecord(org.apache.samza.sql.avro.schemas.SimpleRecord) AddressRecord(org.apache.samza.sql.avro.schemas.AddressRecord) KV(org.apache.samza.operators.KV) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Test(org.junit.Test)

Aggregations

KV (org.apache.samza.operators.KV)68 Test (org.junit.Test)38 StringSerde (org.apache.samza.serializers.StringSerde)33 KVSerde (org.apache.samza.serializers.KVSerde)30 HashMap (java.util.HashMap)28 NoOpSerde (org.apache.samza.serializers.NoOpSerde)26 List (java.util.List)25 Duration (java.time.Duration)24 ArrayList (java.util.ArrayList)24 StreamApplication (org.apache.samza.application.StreamApplication)22 Config (org.apache.samza.config.Config)22 Map (java.util.Map)20 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)20 Table (org.apache.samza.table.Table)19 MapConfig (org.apache.samza.config.MapConfig)18 MessageStream (org.apache.samza.operators.MessageStream)18 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)18 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)17 Collectors (java.util.stream.Collectors)16 SamzaException (org.apache.samza.SamzaException)16