use of org.apache.samza.operators.KV in project samza by apache.
the class AvroRelConverter method convertToRelMessage.
/**
* Converts the nested avro object in SamzaMessage to relational message corresponding to
* the tableName with relational schema.
*/
@Override
public SamzaSqlRelMessage convertToRelMessage(KV<Object, Object> samzaMessage) {
List<String> payloadFieldNames = new ArrayList<>();
List<Object> payloadFieldValues = new ArrayList<>();
Object value = samzaMessage.getValue();
if (value instanceof IndexedRecord) {
fetchFieldNamesAndValuesFromIndexedRecord((IndexedRecord) value, payloadFieldNames, payloadFieldValues, payloadSchema);
} else if (value == null) {
// If the payload is null, set each record value as null
payloadFieldNames.addAll(payloadSchema.getFields().stream().map(Schema.Field::name).collect(Collectors.toList()));
IntStream.range(0, payloadFieldNames.size()).forEach(x -> payloadFieldValues.add(null));
} else {
String msg = "Avro message converter doesn't support messages of type " + value.getClass();
LOG.error(msg);
throw new SamzaException(msg);
}
return new SamzaSqlRelMessage(samzaMessage.getKey(), payloadFieldNames, payloadFieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestStandaloneIntegrationApplication method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
String systemName = "testSystemName";
String inputStreamName = appDescriptor.getConfig().get("input.stream.name");
String outputStreamName = "standaloneIntegrationTestKafkaOutputTopic";
LOGGER.info("Publishing message from: {} to: {}.", inputStreamName, outputStreamName);
KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(systemName);
KVSerde<Object, Object> noOpSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
KafkaInputDescriptor<KV<Object, Object>> isd = kafkaSystemDescriptor.getInputDescriptor(inputStreamName, noOpSerde);
KafkaOutputDescriptor<KV<Object, Object>> osd = kafkaSystemDescriptor.getOutputDescriptor(outputStreamName, noOpSerde);
appDescriptor.getInputStream(isd).sendTo(appDescriptor.getOutputStream(osd));
}
use of org.apache.samza.operators.KV in project samza by apache.
the class QueryTranslator method sendToOutputStream.
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
if (!tableDescriptor.isPresent()) {
KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
String systemName = sinkConfig.getSystemName();
DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
outputStream.sendTo(stm);
// Process system events only if the output is a stream.
if (sqlConfig.isProcessSystemEvents()) {
for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
systemEventStream.sendTo(stm);
}
}
} else {
Table outputTable = appDesc.getTable(tableDescriptor.get());
if (outputTable == null) {
String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
throw new SamzaException(msg);
}
outputStream.sendTo(outputTable);
}
}
use of org.apache.samza.operators.KV in project samza by apache.
the class RepartitionJoinWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
// offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
// before the application is run
Config config = appDescriptor.getConfig();
String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
});
intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestRunner method initializeInMemoryInputStream.
/**
* Creates an in memory stream with {@link InMemorySystemFactory} and feeds its partition with stream of messages
* @param partitionData key of the map represents partitionId and value represents messages in the partition
* @param descriptor describes a stream to initialize with the in memory system
*/
private <StreamMessageType> void initializeInMemoryInputStream(InMemoryInputDescriptor<?> descriptor, Map<Integer, Iterable<StreamMessageType>> partitionData) {
String systemName = descriptor.getSystemName();
String streamName = (String) descriptor.getPhysicalName().orElse(descriptor.getStreamId());
if (this.app instanceof LegacyTaskApplication) {
// for legacy applications that only specify task.class.
if (configs.containsKey(TaskConfig.INPUT_STREAMS)) {
configs.put(TaskConfig.INPUT_STREAMS, configs.get(TaskConfig.INPUT_STREAMS).concat("," + systemName + "." + streamName));
} else {
configs.put(TaskConfig.INPUT_STREAMS, systemName + "." + streamName);
}
}
InMemorySystemDescriptor imsd = (InMemorySystemDescriptor) descriptor.getSystemDescriptor();
imsd.withInMemoryScope(this.inMemoryScope);
addConfig(descriptor.toConfig());
addConfig(descriptor.getSystemDescriptor().toConfig());
addSerdeConfigs(descriptor);
StreamSpec spec = new StreamSpec(descriptor.getStreamId(), streamName, systemName, partitionData.size());
SystemFactory factory = new InMemorySystemFactory();
Config config = new MapConfig(descriptor.toConfig(), descriptor.getSystemDescriptor().toConfig());
factory.getAdmin(systemName, config).createStream(spec);
InMemorySystemProducer producer = (InMemorySystemProducer) factory.getProducer(systemName, config, null);
SystemStream sysStream = new SystemStream(systemName, streamName);
partitionData.forEach((partitionId, partition) -> {
partition.forEach(e -> {
Object key = e instanceof KV ? ((KV) e).getKey() : null;
Object value = e instanceof KV ? ((KV) e).getValue() : e;
if (value instanceof IncomingMessageEnvelope) {
producer.send((IncomingMessageEnvelope) value);
} else {
producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), key, value));
}
});
producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), null, new EndOfStreamMessage(null)));
});
}
Aggregations