Search in sources :

Example 26 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class MergeExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<KV<String, PageViewEvent>> isd1 = trackingSystem.getInputDescriptor("pageViewStream1", serde);
    KafkaInputDescriptor<KV<String, PageViewEvent>> isd2 = trackingSystem.getInputDescriptor("pageViewStream2", serde);
    KafkaInputDescriptor<KV<String, PageViewEvent>> isd3 = trackingSystem.getInputDescriptor("pageViewStream3", serde);
    KafkaOutputDescriptor<KV<String, PageViewEvent>> osd = trackingSystem.getOutputDescriptor("mergedStream", serde);
    MessageStream.mergeAll(ImmutableList.of(appDescriptor.getInputStream(isd1), appDescriptor.getInputStream(isd2), appDescriptor.getInputStream(isd3))).sendTo(appDescriptor.getOutputStream(osd));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)

Example 27 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class PageViewCounterExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));
    MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);
    SupplierFunction<Integer> initialValue = () -> 0;
    FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
    pageViewEvents.window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null).setEarlyTrigger(Triggers.repeat(Triggers.count(5))).setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane))).sendTo(pageViewEventPerMemberStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) Triggers(org.apache.samza.operators.triggers.Triggers) WindowPane(org.apache.samza.operators.windows.WindowPane) StringSerde(org.apache.samza.serializers.StringSerde) PageViewCount(org.apache.samza.example.models.PageViewCount) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) AccumulationMode(org.apache.samza.operators.windows.AccumulationMode) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2)

Example 28 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestSamzaSqlRemoteTableJoinFunction method testWithInnerJoinWithTableOnRight.

@Test
public void testWithInnerJoinWithTableOnRight() {
    Map<String, String> props = new HashMap<>();
    SystemStream ss = new SystemStream("test", "nestedRecord");
    props.put(String.format(ConfigBasedAvroRelSchemaProviderFactory.CFG_SOURCE_SCHEMA, ss.getSystem(), ss.getStream()), SimpleRecord.SCHEMA$.toString());
    ConfigBasedAvroRelSchemaProviderFactory factory = new ConfigBasedAvroRelSchemaProviderFactory();
    AvroRelSchemaProvider schemaProvider = (AvroRelSchemaProvider) factory.create(ss, new MapConfig(props));
    AvroRelConverter relConverter = new AvroRelConverter(ss, schemaProvider, new MapConfig());
    SamzaRelTableKeyConverter relTableKeyConverter = new SampleRelTableKeyConverter();
    String remoteTableName = "testDb.testTable.$table";
    GenericData.Record tableRecord = new GenericData.Record(SimpleRecord.SCHEMA$);
    tableRecord.put("id", 1);
    tableRecord.put("name", "name1");
    SamzaSqlRelMessage streamMsg = new SamzaSqlRelMessage(streamFieldNames, streamFieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
    SamzaSqlRelMessage tableMsg = relConverter.convertToRelMessage(new KV(tableRecord.get("id"), tableRecord));
    JoinRelType joinRelType = JoinRelType.INNER;
    List<Integer> streamKeyIds = Arrays.asList(1);
    List<Integer> tableKeyIds = Arrays.asList(0);
    KV<Object, GenericRecord> record = KV.of(tableRecord.get("id"), tableRecord);
    JoinInputNode mockTableInputNode = mock(JoinInputNode.class);
    when(mockTableInputNode.getKeyIds()).thenReturn(tableKeyIds);
    when(mockTableInputNode.isPosOnRight()).thenReturn(true);
    when(mockTableInputNode.getFieldNames()).thenReturn(tableMsg.getSamzaSqlRelRecord().getFieldNames());
    when(mockTableInputNode.getSourceName()).thenReturn(remoteTableName);
    JoinInputNode mockStreamInputNode = mock(JoinInputNode.class);
    when(mockStreamInputNode.getKeyIds()).thenReturn(streamKeyIds);
    when(mockStreamInputNode.isPosOnRight()).thenReturn(false);
    when(mockStreamInputNode.getFieldNames()).thenReturn(streamFieldNames);
    SamzaSqlRemoteTableJoinFunction joinFn = new SamzaSqlRemoteTableJoinFunction(relConverter, relTableKeyConverter, mockStreamInputNode, mockTableInputNode, joinRelType, 0);
    SamzaSqlRelMessage outMsg = joinFn.apply(streamMsg, record);
    Assert.assertEquals(outMsg.getSamzaSqlRelRecord().getFieldValues().size(), outMsg.getSamzaSqlRelRecord().getFieldNames().size());
    List<String> expectedFieldNames = new ArrayList<>(streamFieldNames);
    expectedFieldNames.addAll(tableMsg.getSamzaSqlRelRecord().getFieldNames());
    List<Object> expectedFieldValues = new ArrayList<>(streamFieldValues);
    expectedFieldValues.addAll(tableMsg.getSamzaSqlRelRecord().getFieldValues());
    Assert.assertEquals(expectedFieldNames, outMsg.getSamzaSqlRelRecord().getFieldNames());
    Assert.assertEquals(expectedFieldValues, outMsg.getSamzaSqlRelRecord().getFieldValues());
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AvroRelConverter(org.apache.samza.sql.avro.AvroRelConverter) SampleRelTableKeyConverter(org.apache.samza.sql.util.SampleRelTableKeyConverter) GenericRecord(org.apache.avro.generic.GenericRecord) SimpleRecord(org.apache.samza.sql.avro.schemas.SimpleRecord) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) SamzaRelTableKeyConverter(org.apache.samza.sql.interfaces.SamzaRelTableKeyConverter) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) GenericData(org.apache.avro.generic.GenericData) JoinRelType(org.apache.calcite.rel.core.JoinRelType) AvroRelSchemaProvider(org.apache.samza.sql.avro.AvroRelSchemaProvider) ConfigBasedAvroRelSchemaProviderFactory(org.apache.samza.sql.avro.ConfigBasedAvroRelSchemaProviderFactory) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Test(org.junit.Test)

Example 29 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class KeyValueStoreExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));
    appDescriptor.withDefaultSystem(trackingSystem);
    MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);
    pageViewEvents.partitionBy(pve -> pve.getMemberId(), pve -> pve, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy").map(KV::getValue).flatMap(new MyStatsCounter()).map(stats -> KV.of(stats.memberId, stats)).sendTo(pageViewEventPerMember);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) Collection(java.util.Collection) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) TimeUnit(java.util.concurrent.TimeUnit) Context(org.apache.samza.context.Context) List(java.util.List) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) MessageStream(org.apache.samza.operators.MessageStream) StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2)

Example 30 with KV

use of org.apache.samza.operators.KV in project beam by apache.

the class SamzaTestStreamTranslator method createInputDescriptor.

@SuppressWarnings("unchecked")
private static <T> GenericInputDescriptor<KV<?, OpMessage<T>>> createInputDescriptor(String id, String encodedTestStream, SerializableFunction<String, TestStream<T>> testStreamDecoder) {
    final Map<String, String> systemConfig = ImmutableMap.of(ENCODED_TEST_STREAM, encodedTestStream, TEST_STREAM_DECODER, Base64Serializer.serializeUnchecked(testStreamDecoder));
    final GenericSystemDescriptor systemDescriptor = new GenericSystemDescriptor(id, SamzaTestStreamSystemFactory.class.getName()).withSystemConfigs(systemConfig);
    // The KvCoder is needed here for Samza not to crop the key.
    final Serde<KV<?, OpMessage<T>>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>());
    return systemDescriptor.getInputDescriptor(id, kvSerde);
}
Also used : NoOpSerde(org.apache.samza.serializers.NoOpSerde) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KV(org.apache.samza.operators.KV) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor)

Aggregations

KV (org.apache.samza.operators.KV)68 Test (org.junit.Test)38 StringSerde (org.apache.samza.serializers.StringSerde)33 KVSerde (org.apache.samza.serializers.KVSerde)30 HashMap (java.util.HashMap)28 NoOpSerde (org.apache.samza.serializers.NoOpSerde)26 List (java.util.List)25 Duration (java.time.Duration)24 ArrayList (java.util.ArrayList)24 StreamApplication (org.apache.samza.application.StreamApplication)22 Config (org.apache.samza.config.Config)22 Map (java.util.Map)20 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)20 Table (org.apache.samza.table.Table)19 MapConfig (org.apache.samza.config.MapConfig)18 MessageStream (org.apache.samza.operators.MessageStream)18 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)18 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)17 Collectors (java.util.stream.Collectors)16 SamzaException (org.apache.samza.SamzaException)16