use of org.apache.samza.serializers.StringSerde in project samza by apache.
the class TestTimeSeriesStoreImpl method testGetOnTimestampBoundaries.
@Test
public void testGetOnTimestampBoundaries() {
TimeSeriesStore<String, byte[]> timeSeriesStore = newTimeSeriesStore(new StringSerde("UTF-8"), true);
// insert an entry with key "hello" at timestamps "1" and "2"
timeSeriesStore.put("hello", "world-1".getBytes(), 1L);
timeSeriesStore.put("hello", "world-1".getBytes(), 2L);
timeSeriesStore.put("hello", "world-2".getBytes(), 2L);
// read from time-range
List<TimestampedValue<byte[]>> values = readStore(timeSeriesStore, "hello", 0L, 1L);
Assert.assertEquals(0, values.size());
// read from time-range [1,2) should return one entry
values = readStore(timeSeriesStore, "hello", 1L, 2L);
Assert.assertEquals(1, values.size());
Assert.assertEquals("world-1", new String(values.get(0).getValue()));
// read from time-range [2,3) should return two entries
values = readStore(timeSeriesStore, "hello", 2L, 3L);
Assert.assertEquals(2, values.size());
Assert.assertEquals("world-1", new String(values.get(0).getValue()));
Assert.assertEquals(2L, values.get(0).getTimestamp());
// read from time-range [0,3) should return three entries
values = readStore(timeSeriesStore, "hello", 0L, 3L);
Assert.assertEquals(3, values.size());
// read from time-range [2,999999) should return two entries
values = readStore(timeSeriesStore, "hello", 2L, 999999L);
Assert.assertEquals(2, values.size());
// read from time-range [3,4) should return no entries
values = readStore(timeSeriesStore, "hello", 3L, 4L);
Assert.assertEquals(0, values.size());
}
use of org.apache.samza.serializers.StringSerde in project samza by apache.
the class TestTimeSeriesStoreImpl method testGetOnTimestampBoundariesWithOverwriteMode.
@Test
public void testGetOnTimestampBoundariesWithOverwriteMode() {
// instantiate a store in overwrite mode
TimeSeriesStore<String, byte[]> timeSeriesStore = newTimeSeriesStore(new StringSerde("UTF-8"), false);
// insert an entry with key "hello" at timestamps "1" and "2"
timeSeriesStore.put("hello", "world-1".getBytes(), 1L);
timeSeriesStore.put("hello", "world-1".getBytes(), 2L);
timeSeriesStore.put("hello", "world-2".getBytes(), 2L);
// read from time-range
List<TimestampedValue<byte[]>> values = readStore(timeSeriesStore, "hello", 0L, 1L);
Assert.assertEquals(0, values.size());
// read from time-range [1,2) should return one entry
values = readStore(timeSeriesStore, "hello", 1L, 2L);
Assert.assertEquals(1, values.size());
Assert.assertEquals("world-1", new String(values.get(0).getValue()));
// read from time-range [2,3) should return the most recent entry
values = readStore(timeSeriesStore, "hello", 2L, 3L);
Assert.assertEquals(1, values.size());
Assert.assertEquals("world-2", new String(values.get(0).getValue()));
Assert.assertEquals(2L, values.get(0).getTimestamp());
// read from time-range [0,3) should return two entries
values = readStore(timeSeriesStore, "hello", 0L, 3L);
Assert.assertEquals(2, values.size());
// read from time-range [2,999999) should return one entry
values = readStore(timeSeriesStore, "hello", 2L, 999999L);
Assert.assertEquals(1, values.size());
// read from time-range [3,4) should return no entries
values = readStore(timeSeriesStore, "hello", 3L, 4L);
Assert.assertEquals(0, values.size());
}
use of org.apache.samza.serializers.StringSerde in project samza by apache.
the class TestCouchbaseRemoteTableEndToEnd method testEndToEnd.
@Test
public void testEndToEnd() {
Bucket inputBucket = cluster.openBucket(inputBucketName);
inputBucket.upsert(ByteArrayDocument.create("Alice", "20".getBytes()));
inputBucket.upsert(ByteArrayDocument.create("Bob", "30".getBytes()));
inputBucket.upsert(ByteArrayDocument.create("Chris", "40".getBytes()));
inputBucket.upsert(ByteArrayDocument.create("David", "50".getBytes()));
inputBucket.close();
List<String> users = Arrays.asList("Alice", "Bob", "Chris", "David");
final StreamApplication app = appDesc -> {
DelegatingSystemDescriptor inputSystemDescriptor = new DelegatingSystemDescriptor("test");
GenericInputDescriptor<String> inputDescriptor = inputSystemDescriptor.getInputDescriptor("User", new NoOpSerde<>());
CouchbaseTableReadFunction<String> readFunction = new CouchbaseTableReadFunction<>(inputBucketName, String.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(inputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort()).withSerde(new StringSerde());
CouchbaseTableWriteFunction<JsonObject> writeFunction = new CouchbaseTableWriteFunction<>(outputBucketName, JsonObject.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(outputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort());
RemoteTableDescriptor inputTableDesc = new RemoteTableDescriptor<String, String, Void>("input-table").withReadFunction(readFunction).withRateLimiterDisabled();
Table<KV<String, String>> inputTable = appDesc.getTable(inputTableDesc);
RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<String, JsonObject, Object>("output-table").withReadFunction(new NoOpTableReadFunction<>()).withWriteFunction(writeFunction).withRateLimiterDisabled();
Table<KV<String, JsonObject>> outputTable = appDesc.getTable(outputTableDesc);
appDesc.getInputStream(inputDescriptor).map(k -> KV.of(k, k)).join(inputTable, new JoinFunction()).sendTo(outputTable);
};
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<TestTableData.PageView> inputDescriptor = isd.getInputDescriptor("User", new NoOpSerde<>());
TestRunner.of(app).addInputStream(inputDescriptor, users).run(Duration.ofSeconds(10));
Bucket outputBucket = cluster.openBucket(outputBucketName);
Assert.assertEquals("{\"name\":\"Alice\",\"age\":\"20\"}", outputBucket.get("Alice").content().toString());
Assert.assertEquals("{\"name\":\"Bob\",\"age\":\"30\"}", outputBucket.get("Bob").content().toString());
Assert.assertEquals("{\"name\":\"Chris\",\"age\":\"40\"}", outputBucket.get("Chris").content().toString());
Assert.assertEquals("{\"name\":\"David\",\"age\":\"50\"}", outputBucket.get("David").content().toString());
outputBucket.close();
}
use of org.apache.samza.serializers.StringSerde in project samza by apache.
the class RepartitionJoinWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
// offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
// before the application is run
Config config = appDescriptor.getConfig();
String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
});
intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
use of org.apache.samza.serializers.StringSerde in project samza by apache.
the class MyStatefulApplication method describe.
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(inputSystem);
KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());
KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(inputTopic, serde);
TaskApplicationDescriptor desc = appDescriptor.withInputStream(isd).withTaskFactory((StreamTaskFactory) () -> new MyTask(storeToChangelog.keySet()));
storeToChangelog.forEach((storeName, changelogTopic) -> {
RocksDbTableDescriptor<String, String> td = new RocksDbTableDescriptor<>(storeName, serde).withChangelogStream(changelogTopic).withChangelogReplicationFactor(1);
desc.withTable(td);
});
}
Aggregations