Search in sources :

Example 26 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class TestLargeMessageSafeKeyValueStores method setup.

@Before
public void setup() {
    KeyValueStore<byte[], byte[]> kvStore;
    switch(typeOfStore) {
        case "inmemory":
            {
                kvStore = new InMemoryKeyValueStore(keyValueStoreMetrics);
                break;
            }
        case "rocksdb":
            {
                kvStore = new RocksDbKeyValueStore(dir, new org.rocksdb.Options().setCreateIfMissing(true).setCompressionType(org.rocksdb.CompressionType.SNAPPY_COMPRESSION), new MapConfig(), false, storeName, new WriteOptions(), new FlushOptions(), keyValueStoreMetrics);
                break;
            }
        default:
            throw new IllegalArgumentException("Type of store undefined: " + typeOfStore);
    }
    MessageCollector collector = envelope -> {
        int messageLength = ((byte[]) envelope.getMessage()).length;
        if (messageLength > maxMessageSize) {
            throw new SamzaException("Logged store message size " + messageLength + " for store " + storeName + " was larger than the maximum allowed message size " + maxMessageSize + ".");
        }
    };
    loggedStore = new LoggedStore<>(kvStore, systemStreamPartition, collector, loggedStoreMetrics);
    switch(storeConfig) {
        case "serde":
            {
                KeyValueStore<byte[], byte[]> largeMessageSafeStore = new LargeMessageSafeStore(loggedStore, storeName, dropLargeMessage, maxMessageSize);
                store = new SerializedKeyValueStore<>(largeMessageSafeStore, stringSerde, stringSerde, serializedKeyValueStoreMetrics);
                break;
            }
        case "cache-then-serde":
            {
                KeyValueStore<byte[], byte[]> toBeSerializedStore = loggedStore;
                if (dropLargeMessage) {
                    toBeSerializedStore = new LargeMessageSafeStore(loggedStore, storeName, dropLargeMessage, maxMessageSize);
                }
                KeyValueStore<String, String> serializedStore = new SerializedKeyValueStore<>(toBeSerializedStore, stringSerde, stringSerde, serializedKeyValueStoreMetrics);
                store = new CachedStore<>(serializedStore, cacheSize, batchSize, cachedStoreMetrics);
                break;
            }
        // large messages are expected and StorageConfig.DISALLOW_LARGE_MESSAGES is true.
        case "serde-then-cache":
            {
                KeyValueStore<byte[], byte[]> cachedStore = new CachedStore<>(loggedStore, cacheSize, batchSize, cachedStoreMetrics);
                KeyValueStore<byte[], byte[]> largeMessageSafeStore = new LargeMessageSafeStore(cachedStore, storeName, dropLargeMessage, maxMessageSize);
                store = new SerializedKeyValueStore<>(largeMessageSafeStore, stringSerde, stringSerde, serializedKeyValueStoreMetrics);
                break;
            }
        default:
            throw new IllegalArgumentException("Store config undefined: " + storeConfig);
    }
    store = new NullSafeKeyValueStore<>(store);
}
Also used : Arrays(java.util.Arrays) RunWith(org.junit.runner.RunWith) FlushOptions(org.rocksdb.FlushOptions) Random(java.util.Random) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) MessageCollector(org.apache.samza.task.MessageCollector) After(org.junit.After) MapConfig(org.apache.samza.config.MapConfig) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) Int(scala.Int) Collection(java.util.Collection) Partition(org.apache.samza.Partition) Test(org.junit.Test) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) WriteOptions(org.rocksdb.WriteOptions) File(java.io.File) SamzaException(org.apache.samza.SamzaException) List(java.util.List) InMemoryKeyValueStore(org.apache.samza.storage.kv.inmemory.InMemoryKeyValueStore) Assert(org.junit.Assert) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) InMemoryKeyValueStore(org.apache.samza.storage.kv.inmemory.InMemoryKeyValueStore) FlushOptions(org.rocksdb.FlushOptions) SamzaException(org.apache.samza.SamzaException) WriteOptions(org.rocksdb.WriteOptions) MessageCollector(org.apache.samza.task.MessageCollector) MapConfig(org.apache.samza.config.MapConfig) InMemoryKeyValueStore(org.apache.samza.storage.kv.inmemory.InMemoryKeyValueStore) Before(org.junit.Before)

Example 27 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class SessionWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 28 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class TumblingWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 29 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class MergeExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<KV<String, PageViewEvent>> isd1 = trackingSystem.getInputDescriptor("pageViewStream1", serde);
    KafkaInputDescriptor<KV<String, PageViewEvent>> isd2 = trackingSystem.getInputDescriptor("pageViewStream2", serde);
    KafkaInputDescriptor<KV<String, PageViewEvent>> isd3 = trackingSystem.getInputDescriptor("pageViewStream3", serde);
    KafkaOutputDescriptor<KV<String, PageViewEvent>> osd = trackingSystem.getOutputDescriptor("mergedStream", serde);
    MessageStream.mergeAll(ImmutableList.of(appDescriptor.getInputStream(isd1), appDescriptor.getInputStream(isd2), appDescriptor.getInputStream(isd3))).sendTo(appDescriptor.getOutputStream(osd));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)

Example 30 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class PageViewCounterExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));
    MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);
    SupplierFunction<Integer> initialValue = () -> 0;
    FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
    pageViewEvents.window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null).setEarlyTrigger(Triggers.repeat(Triggers.count(5))).setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane))).sendTo(pageViewEventPerMemberStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) Triggers(org.apache.samza.operators.triggers.Triggers) WindowPane(org.apache.samza.operators.windows.WindowPane) StringSerde(org.apache.samza.serializers.StringSerde) PageViewCount(org.apache.samza.example.models.PageViewCount) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) AccumulationMode(org.apache.samza.operators.windows.AccumulationMode) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2)

Aggregations

StringSerde (org.apache.samza.serializers.StringSerde)52 Test (org.junit.Test)32 KV (org.apache.samza.operators.KV)25 KVSerde (org.apache.samza.serializers.KVSerde)19 KafkaSystemDescriptor (org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)14 Config (org.apache.samza.config.Config)13 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)13 StreamApplication (org.apache.samza.application.StreamApplication)11 Duration (java.time.Duration)10 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)10 MessageStream (org.apache.samza.operators.MessageStream)10 KafkaInputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor)10 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)9 ApplicationRunners (org.apache.samza.runtime.ApplicationRunners)9 NoOpSerde (org.apache.samza.serializers.NoOpSerde)9 KafkaOutputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor)9 TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)9 CommandLine (org.apache.samza.util.CommandLine)9 OutputStream (org.apache.samza.operators.OutputStream)8 Windows (org.apache.samza.operators.windows.Windows)8