Search in sources :

Example 16 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class AvroRelConverter method convertToRelMessage.

/**
 * Converts the nested avro object in SamzaMessage to relational message corresponding to
 * the tableName with relational schema.
 */
@Override
public SamzaSqlRelMessage convertToRelMessage(KV<Object, Object> samzaMessage) {
    List<String> payloadFieldNames = new ArrayList<>();
    List<Object> payloadFieldValues = new ArrayList<>();
    Object value = samzaMessage.getValue();
    if (value instanceof IndexedRecord) {
        fetchFieldNamesAndValuesFromIndexedRecord((IndexedRecord) value, payloadFieldNames, payloadFieldValues, payloadSchema);
    } else if (value == null) {
        // If the payload is null, set each record value as null
        payloadFieldNames.addAll(payloadSchema.getFields().stream().map(Schema.Field::name).collect(Collectors.toList()));
        IntStream.range(0, payloadFieldNames.size()).forEach(x -> payloadFieldValues.add(null));
    } else {
        String msg = "Avro message converter doesn't support messages of type " + value.getClass();
        LOG.error(msg);
        throw new SamzaException(msg);
    }
    return new SamzaSqlRelMessage(samzaMessage.getKey(), payloadFieldNames, payloadFieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
}
Also used : IntStream(java.util.stream.IntStream) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) ByteString(org.apache.calcite.avatica.util.ByteString) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) ByteBuffer(java.nio.ByteBuffer) SamzaException(org.apache.samza.SamzaException) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) List(java.util.List) Validate(org.apache.commons.lang3.Validate) SamzaSqlRelRecord(org.apache.samza.sql.SamzaSqlRelRecord) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Config(org.apache.samza.config.Config) KV(org.apache.samza.operators.KV) IndexedRecord(org.apache.avro.generic.IndexedRecord) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) ByteString(org.apache.calcite.avatica.util.ByteString) SamzaException(org.apache.samza.SamzaException) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 17 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestStandaloneIntegrationApplication method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    String systemName = "testSystemName";
    String inputStreamName = appDescriptor.getConfig().get("input.stream.name");
    String outputStreamName = "standaloneIntegrationTestKafkaOutputTopic";
    LOGGER.info("Publishing message from: {} to: {}.", inputStreamName, outputStreamName);
    KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(systemName);
    KVSerde<Object, Object> noOpSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
    KafkaInputDescriptor<KV<Object, Object>> isd = kafkaSystemDescriptor.getInputDescriptor(inputStreamName, noOpSerde);
    KafkaOutputDescriptor<KV<Object, Object>> osd = kafkaSystemDescriptor.getOutputDescriptor(outputStreamName, noOpSerde);
    appDescriptor.getInputStream(isd).sendTo(appDescriptor.getOutputStream(osd));
}
Also used : KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)

Example 18 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class QueryTranslator method sendToOutputStream.

private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
    SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
    MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
    MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
    Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
    if (!tableDescriptor.isPresent()) {
        KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
        String systemName = sinkConfig.getSystemName();
        DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
        GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
        OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
        outputStream.sendTo(stm);
        // Process system events only if the output is a stream.
        if (sqlConfig.isProcessSystemEvents()) {
            for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
                MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
                systemEventStream.sendTo(stm);
            }
        }
    } else {
        Table outputTable = appDesc.getTable(tableDescriptor.get());
        if (outputTable == null) {
            String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
            throw new SamzaException(msg);
        }
        outputStream.sendTo(outputTable);
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelRoot(org.apache.calcite.rel.RelRoot) TaskContext(org.apache.samza.context.TaskContext) MapFunction(org.apache.samza.operators.functions.MapFunction) Counter(org.apache.samza.metrics.Counter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) QueryPlanner(org.apache.samza.sql.planner.QueryPlanner) ApplicationContainerContext(org.apache.samza.context.ApplicationContainerContext) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Map(java.util.Map) TableModify(org.apache.calcite.rel.core.TableModify) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaHistogram(org.apache.samza.metrics.SamzaHistogram) ExternalContext(org.apache.samza.context.ExternalContext) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) RelNode(org.apache.calcite.rel.RelNode) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) Context(org.apache.samza.context.Context) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Validate(org.apache.commons.lang3.Validate) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) Optional(java.util.Optional) SamzaSqlApplicationContext(org.apache.samza.sql.runner.SamzaSqlApplicationContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) Table(org.apache.samza.table.Table) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaException(org.apache.samza.SamzaException) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 19 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class RepartitionJoinWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
    // before the application is run
    Config config = appDescriptor.getConfig();
    String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
    String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
    String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
    KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
    MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
    MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
    MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
    MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
    MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
    MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
    MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
    userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
    });
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
Also used : Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) JoinFunction(org.apache.samza.operators.functions.JoinFunction) PageView(org.apache.samza.test.operator.data.PageView) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SystemStream(org.apache.samza.system.SystemStream) Duration(java.time.Duration) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) Config(org.apache.samza.config.Config) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope)

Example 20 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestRunner method initializeInMemoryInputStream.

/**
 * Creates an in memory stream with {@link InMemorySystemFactory} and feeds its partition with stream of messages
 * @param partitionData key of the map represents partitionId and value represents messages in the partition
 * @param descriptor describes a stream to initialize with the in memory system
 */
private <StreamMessageType> void initializeInMemoryInputStream(InMemoryInputDescriptor<?> descriptor, Map<Integer, Iterable<StreamMessageType>> partitionData) {
    String systemName = descriptor.getSystemName();
    String streamName = (String) descriptor.getPhysicalName().orElse(descriptor.getStreamId());
    if (this.app instanceof LegacyTaskApplication) {
        // for legacy applications that only specify task.class.
        if (configs.containsKey(TaskConfig.INPUT_STREAMS)) {
            configs.put(TaskConfig.INPUT_STREAMS, configs.get(TaskConfig.INPUT_STREAMS).concat("," + systemName + "." + streamName));
        } else {
            configs.put(TaskConfig.INPUT_STREAMS, systemName + "." + streamName);
        }
    }
    InMemorySystemDescriptor imsd = (InMemorySystemDescriptor) descriptor.getSystemDescriptor();
    imsd.withInMemoryScope(this.inMemoryScope);
    addConfig(descriptor.toConfig());
    addConfig(descriptor.getSystemDescriptor().toConfig());
    addSerdeConfigs(descriptor);
    StreamSpec spec = new StreamSpec(descriptor.getStreamId(), streamName, systemName, partitionData.size());
    SystemFactory factory = new InMemorySystemFactory();
    Config config = new MapConfig(descriptor.toConfig(), descriptor.getSystemDescriptor().toConfig());
    factory.getAdmin(systemName, config).createStream(spec);
    InMemorySystemProducer producer = (InMemorySystemProducer) factory.getProducer(systemName, config, null);
    SystemStream sysStream = new SystemStream(systemName, streamName);
    partitionData.forEach((partitionId, partition) -> {
        partition.forEach(e -> {
            Object key = e instanceof KV ? ((KV) e).getKey() : null;
            Object value = e instanceof KV ? ((KV) e).getValue() : e;
            if (value instanceof IncomingMessageEnvelope) {
                producer.send((IncomingMessageEnvelope) value);
            } else {
                producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), key, value));
            }
        });
        producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), null, new EndOfStreamMessage(null)));
    });
}
Also used : StreamSpec(org.apache.samza.system.StreamSpec) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) SystemFactory(org.apache.samza.system.SystemFactory) MapConfig(org.apache.samza.config.MapConfig) InMemorySystemConfig(org.apache.samza.config.InMemorySystemConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) StreamConfig(org.apache.samza.config.StreamConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) SystemStream(org.apache.samza.system.SystemStream) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) MapConfig(org.apache.samza.config.MapConfig) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) InMemorySystemProducer(org.apache.samza.system.inmemory.InMemorySystemProducer)

Aggregations

KV (org.apache.samza.operators.KV)68 Test (org.junit.Test)38 StringSerde (org.apache.samza.serializers.StringSerde)33 KVSerde (org.apache.samza.serializers.KVSerde)30 HashMap (java.util.HashMap)28 NoOpSerde (org.apache.samza.serializers.NoOpSerde)26 List (java.util.List)25 Duration (java.time.Duration)24 ArrayList (java.util.ArrayList)24 StreamApplication (org.apache.samza.application.StreamApplication)22 Config (org.apache.samza.config.Config)22 Map (java.util.Map)20 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)20 Table (org.apache.samza.table.Table)19 MapConfig (org.apache.samza.config.MapConfig)18 MessageStream (org.apache.samza.operators.MessageStream)18 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)18 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)17 Collectors (java.util.stream.Collectors)16 SamzaException (org.apache.samza.SamzaException)16