Search in sources :

Example 21 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class TestTimeSeriesStoreImpl method testGetOnTimestampBoundaries.

@Test
public void testGetOnTimestampBoundaries() {
    TimeSeriesStore<String, byte[]> timeSeriesStore = newTimeSeriesStore(new StringSerde("UTF-8"), true);
    // insert an entry with key "hello" at timestamps "1" and "2"
    timeSeriesStore.put("hello", "world-1".getBytes(), 1L);
    timeSeriesStore.put("hello", "world-1".getBytes(), 2L);
    timeSeriesStore.put("hello", "world-2".getBytes(), 2L);
    // read from time-range
    List<TimestampedValue<byte[]>> values = readStore(timeSeriesStore, "hello", 0L, 1L);
    Assert.assertEquals(0, values.size());
    // read from time-range [1,2) should return one entry
    values = readStore(timeSeriesStore, "hello", 1L, 2L);
    Assert.assertEquals(1, values.size());
    Assert.assertEquals("world-1", new String(values.get(0).getValue()));
    // read from time-range [2,3) should return two entries
    values = readStore(timeSeriesStore, "hello", 2L, 3L);
    Assert.assertEquals(2, values.size());
    Assert.assertEquals("world-1", new String(values.get(0).getValue()));
    Assert.assertEquals(2L, values.get(0).getTimestamp());
    // read from time-range [0,3) should return three entries
    values = readStore(timeSeriesStore, "hello", 0L, 3L);
    Assert.assertEquals(3, values.size());
    // read from time-range [2,999999) should return two entries
    values = readStore(timeSeriesStore, "hello", 2L, 999999L);
    Assert.assertEquals(2, values.size());
    // read from time-range [3,4) should return no entries
    values = readStore(timeSeriesStore, "hello", 3L, 4L);
    Assert.assertEquals(0, values.size());
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) TimestampedValue(org.apache.samza.util.TimestampedValue) Test(org.junit.Test)

Example 22 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class TestTimeSeriesStoreImpl method testGetOnTimestampBoundariesWithOverwriteMode.

@Test
public void testGetOnTimestampBoundariesWithOverwriteMode() {
    // instantiate a store in overwrite mode
    TimeSeriesStore<String, byte[]> timeSeriesStore = newTimeSeriesStore(new StringSerde("UTF-8"), false);
    // insert an entry with key "hello" at timestamps "1" and "2"
    timeSeriesStore.put("hello", "world-1".getBytes(), 1L);
    timeSeriesStore.put("hello", "world-1".getBytes(), 2L);
    timeSeriesStore.put("hello", "world-2".getBytes(), 2L);
    // read from time-range
    List<TimestampedValue<byte[]>> values = readStore(timeSeriesStore, "hello", 0L, 1L);
    Assert.assertEquals(0, values.size());
    // read from time-range [1,2) should return one entry
    values = readStore(timeSeriesStore, "hello", 1L, 2L);
    Assert.assertEquals(1, values.size());
    Assert.assertEquals("world-1", new String(values.get(0).getValue()));
    // read from time-range [2,3) should return the most recent entry
    values = readStore(timeSeriesStore, "hello", 2L, 3L);
    Assert.assertEquals(1, values.size());
    Assert.assertEquals("world-2", new String(values.get(0).getValue()));
    Assert.assertEquals(2L, values.get(0).getTimestamp());
    // read from time-range [0,3) should return two entries
    values = readStore(timeSeriesStore, "hello", 0L, 3L);
    Assert.assertEquals(2, values.size());
    // read from time-range [2,999999) should return one entry
    values = readStore(timeSeriesStore, "hello", 2L, 999999L);
    Assert.assertEquals(1, values.size());
    // read from time-range [3,4) should return no entries
    values = readStore(timeSeriesStore, "hello", 3L, 4L);
    Assert.assertEquals(0, values.size());
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) TimestampedValue(org.apache.samza.util.TimestampedValue) Test(org.junit.Test)

Example 23 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class TestCouchbaseRemoteTableEndToEnd method testEndToEnd.

@Test
public void testEndToEnd() {
    Bucket inputBucket = cluster.openBucket(inputBucketName);
    inputBucket.upsert(ByteArrayDocument.create("Alice", "20".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("Bob", "30".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("Chris", "40".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("David", "50".getBytes()));
    inputBucket.close();
    List<String> users = Arrays.asList("Alice", "Bob", "Chris", "David");
    final StreamApplication app = appDesc -> {
        DelegatingSystemDescriptor inputSystemDescriptor = new DelegatingSystemDescriptor("test");
        GenericInputDescriptor<String> inputDescriptor = inputSystemDescriptor.getInputDescriptor("User", new NoOpSerde<>());
        CouchbaseTableReadFunction<String> readFunction = new CouchbaseTableReadFunction<>(inputBucketName, String.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(inputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort()).withSerde(new StringSerde());
        CouchbaseTableWriteFunction<JsonObject> writeFunction = new CouchbaseTableWriteFunction<>(outputBucketName, JsonObject.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(outputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort());
        RemoteTableDescriptor inputTableDesc = new RemoteTableDescriptor<String, String, Void>("input-table").withReadFunction(readFunction).withRateLimiterDisabled();
        Table<KV<String, String>> inputTable = appDesc.getTable(inputTableDesc);
        RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<String, JsonObject, Object>("output-table").withReadFunction(new NoOpTableReadFunction<>()).withWriteFunction(writeFunction).withRateLimiterDisabled();
        Table<KV<String, JsonObject>> outputTable = appDesc.getTable(outputTableDesc);
        appDesc.getInputStream(inputDescriptor).map(k -> KV.of(k, k)).join(inputTable, new JoinFunction()).sendTo(outputTable);
    };
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<TestTableData.PageView> inputDescriptor = isd.getInputDescriptor("User", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, users).run(Duration.ofSeconds(10));
    Bucket outputBucket = cluster.openBucket(outputBucketName);
    Assert.assertEquals("{\"name\":\"Alice\",\"age\":\"20\"}", outputBucket.get("Alice").content().toString());
    Assert.assertEquals("{\"name\":\"Bob\",\"age\":\"30\"}", outputBucket.get("Bob").content().toString());
    Assert.assertEquals("{\"name\":\"Chris\",\"age\":\"40\"}", outputBucket.get("Chris").content().toString());
    Assert.assertEquals("{\"name\":\"David\",\"age\":\"50\"}", outputBucket.get("David").content().toString());
    outputBucket.close();
}
Also used : CouchbaseTableWriteFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableWriteFunction) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) CouchbaseEnvironment(com.couchbase.client.java.env.CouchbaseEnvironment) CouchbaseTableReadFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableReadFunction) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) JsonObject(com.couchbase.client.java.document.json.JsonObject) BucketConfiguration(com.couchbase.mock.BucketConfiguration) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) DefaultCouchbaseEnvironment(com.couchbase.client.java.env.DefaultCouchbaseEnvironment) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) CouchbaseMock(com.couchbase.mock.CouchbaseMock) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) After(org.junit.After) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Before(org.junit.Before) Table(org.apache.samza.table.Table) ByteArrayDocument(com.couchbase.client.java.document.ByteArrayDocument) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Test(org.junit.Test) TestRunner(org.apache.samza.test.framework.TestRunner) Bucket(com.couchbase.client.java.Bucket) List(java.util.List) CouchbaseCluster(com.couchbase.client.java.CouchbaseCluster) Cluster(com.couchbase.client.java.Cluster) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) JsonObject(com.couchbase.client.java.document.json.JsonObject) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Bucket(com.couchbase.client.java.Bucket) CouchbaseTableWriteFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableWriteFunction) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) NoOpSerde(org.apache.samza.serializers.NoOpSerde) CouchbaseTableReadFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableReadFunction) Test(org.junit.Test)

Example 24 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class RepartitionJoinWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
    // before the application is run
    Config config = appDescriptor.getConfig();
    String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
    String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
    String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
    KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
    MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
    MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
    MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
    MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
    MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
    MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
    MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
    userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
    });
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
Also used : Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) JoinFunction(org.apache.samza.operators.functions.JoinFunction) PageView(org.apache.samza.test.operator.data.PageView) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SystemStream(org.apache.samza.system.SystemStream) Duration(java.time.Duration) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) Config(org.apache.samza.config.Config) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope)

Example 25 with StringSerde

use of org.apache.samza.serializers.StringSerde in project samza by apache.

the class MyStatefulApplication method describe.

@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(inputSystem);
    KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());
    KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(inputTopic, serde);
    TaskApplicationDescriptor desc = appDescriptor.withInputStream(isd).withTaskFactory((StreamTaskFactory) () -> new MyTask(storeToChangelog.keySet()));
    storeToChangelog.forEach((storeName, changelogTopic) -> {
        RocksDbTableDescriptor<String, String> td = new RocksDbTableDescriptor<>(storeName, serde).withChangelogStream(changelogTopic).withChangelogReplicationFactor(1);
        desc.withTable(td);
    });
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) TaskApplicationDescriptor(org.apache.samza.application.descriptors.TaskApplicationDescriptor) RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)

Aggregations

StringSerde (org.apache.samza.serializers.StringSerde)52 Test (org.junit.Test)32 KV (org.apache.samza.operators.KV)25 KVSerde (org.apache.samza.serializers.KVSerde)19 KafkaSystemDescriptor (org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)14 Config (org.apache.samza.config.Config)13 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)13 StreamApplication (org.apache.samza.application.StreamApplication)11 Duration (java.time.Duration)10 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)10 MessageStream (org.apache.samza.operators.MessageStream)10 KafkaInputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor)10 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)9 ApplicationRunners (org.apache.samza.runtime.ApplicationRunners)9 NoOpSerde (org.apache.samza.serializers.NoOpSerde)9 KafkaOutputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor)9 TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)9 CommandLine (org.apache.samza.util.CommandLine)9 OutputStream (org.apache.samza.operators.OutputStream)8 Windows (org.apache.samza.operators.windows.Windows)8