use of org.apache.samza.application.descriptors.StreamApplicationDescriptor in project samza by apache.
the class WatermarkIntegrationTest method testWatermark.
@Test
public void testWatermark() throws Exception {
Map<String, String> configs = new HashMap<>();
configs.put(ApplicationConfig.APP_RUNNER_CLASS, MockLocalApplicationRunner.class.getName());
configs.put("systems.test.samza.factory", TestSystemFactory.class.getName());
configs.put("streams.PageView.samza.system", "test");
configs.put("streams.PageView.partitionCount", String.valueOf(PARTITION_COUNT));
configs.put(JobConfig.JOB_NAME, "test-watermark-job");
configs.put(JobConfig.PROCESSOR_ID, "1");
configs.put(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, PassthroughJobCoordinatorFactory.class.getName());
configs.put(TaskConfig.GROUPER_FACTORY, SingleContainerGrouperFactory.class.getName());
configs.put("systems.kafka.samza.factory", "org.apache.samza.system.kafka.KafkaSystemFactory");
configs.put("systems.kafka.producer.bootstrap.servers", bootstrapUrl());
configs.put("systems.kafka.consumer.zookeeper.connect", zkConnect());
configs.put("systems.kafka.samza.key.serde", "int");
configs.put("systems.kafka.samza.msg.serde", "json");
configs.put("systems.kafka.default.stream.replication.factor", "1");
configs.put("job.default.system", "kafka");
configs.put("serializers.registry.int.class", IntegerSerdeFactory.class.getName());
configs.put("serializers.registry.string.class", StringSerdeFactory.class.getName());
configs.put("serializers.registry.json.class", PageViewJsonSerdeFactory.class.getName());
List<PageView> received = new ArrayList<>();
class TestStreamApp implements StreamApplication {
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor("test");
GenericInputDescriptor<KV<String, PageView>> isd = sd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
appDescriptor.getInputStream(isd).map(KV::getValue).partitionBy(pv -> pv.getMemberId(), pv -> pv, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "p1").sink((m, collector, coordinator) -> {
received.add(m.getValue());
});
}
}
Config config = new MapConfig(configs);
final ApplicationRunner runner = ApplicationRunners.getApplicationRunner(new TestStreamApp(), config);
executeRun(runner, config);
// processors are only available when the app is running
Map<String, StreamOperatorTask> tasks = getTaskOperationGraphs((MockLocalApplicationRunner) runner);
runner.waitForFinish();
// wait for the completion to ensure that all tasks are actually initialized and the OperatorImplGraph is initialized
StreamOperatorTask task0 = tasks.get("Partition 0");
OperatorImplGraph graph = TestStreamOperatorTask.getOperatorImplGraph(task0);
OperatorImpl pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
assertEquals(TestOperatorImpl.getInputWatermark(pb), 4);
assertEquals(TestOperatorImpl.getOutputWatermark(pb), 4);
OperatorImpl sink = getOperator(graph, OperatorSpec.OpCode.SINK);
assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
StreamOperatorTask task1 = tasks.get("Partition 1");
graph = TestStreamOperatorTask.getOperatorImplGraph(task1);
pb = getOperator(graph, OperatorSpec.OpCode.PARTITION_BY);
assertEquals(TestOperatorImpl.getInputWatermark(pb), 3);
assertEquals(TestOperatorImpl.getOutputWatermark(pb), 3);
sink = getOperator(graph, OperatorSpec.OpCode.SINK);
assertEquals(TestOperatorImpl.getInputWatermark(sink), 3);
assertEquals(TestOperatorImpl.getOutputWatermark(sink), 3);
}
use of org.apache.samza.application.descriptors.StreamApplicationDescriptor in project samza by apache.
the class QueryTranslator method sendToOutputStream.
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
if (!tableDescriptor.isPresent()) {
KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
String systemName = sinkConfig.getSystemName();
DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
outputStream.sendTo(stm);
// Process system events only if the output is a stream.
if (sqlConfig.isProcessSystemEvents()) {
for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
systemEventStream.sendTo(stm);
}
}
} else {
Table outputTable = appDesc.getTable(tableDescriptor.get());
if (outputTable == null) {
String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
throw new SamzaException(msg);
}
outputStream.sendTo(outputTable);
}
}
use of org.apache.samza.application.descriptors.StreamApplicationDescriptor in project samza by apache.
the class ScanTranslator method translate.
// ScanMapFunction
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
List<String> tableNameParts = tableScan.getTable().getQualifiedName();
String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);
Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
final String systemName = sqlIOConfig.getSystemName();
final String streamId = sqlIOConfig.getStreamId();
final String source = sqlIOConfig.getSource();
final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
// descriptor to load the local table.
if (isRemoteTable) {
return;
}
// set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
if (systemDescriptor == null) {
systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
systemDescriptors.put(systemName, systemDescriptor);
} else {
/* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
* in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
* used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
if (systemDescriptor.getTransformer().isPresent()) {
InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
throw new SamzaException("SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
}
}
}
InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());
if (!inputMsgStreams.containsKey(source)) {
MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
}
MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source).filter(new FilterSystemMessageFunction(sourceName, queryId)).map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));
context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
use of org.apache.samza.application.descriptors.StreamApplicationDescriptor in project samza by apache.
the class TestSchedulerFunction method testImmediateTimer.
@Test
public void testImmediateTimer() {
final InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
final InMemoryInputDescriptor<Integer> imid = isd.getInputDescriptor("test-input", new IntegerSerde());
StreamApplication app = new StreamApplication() {
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
appDescriptor.getInputStream(imid).map(new TestFunction());
}
};
TestRunner.of(app).addInputStream(imid, Arrays.asList(1, 2, 3, 4, 5)).run(Duration.ofSeconds(1));
assertTrue(timerFired.get());
}
use of org.apache.samza.application.descriptors.StreamApplicationDescriptor in project samza by apache.
the class RepartitionJoinWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
// offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
// before the application is run
Config config = appDescriptor.getConfig();
String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
});
intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
Aggregations