Search in sources :

Example 1 with SamzaSqlRelMessage

use of org.apache.samza.sql.data.SamzaSqlRelMessage in project samza by apache.

the class AvroRelConverter method convertToRelMessage.

/**
 * Converts the nested avro object in SamzaMessage to relational message corresponding to
 * the tableName with relational schema.
 */
@Override
public SamzaSqlRelMessage convertToRelMessage(KV<Object, Object> samzaMessage) {
    List<String> payloadFieldNames = new ArrayList<>();
    List<Object> payloadFieldValues = new ArrayList<>();
    Object value = samzaMessage.getValue();
    if (value instanceof IndexedRecord) {
        fetchFieldNamesAndValuesFromIndexedRecord((IndexedRecord) value, payloadFieldNames, payloadFieldValues, payloadSchema);
    } else if (value == null) {
        // If the payload is null, set each record value as null
        payloadFieldNames.addAll(payloadSchema.getFields().stream().map(Schema.Field::name).collect(Collectors.toList()));
        IntStream.range(0, payloadFieldNames.size()).forEach(x -> payloadFieldValues.add(null));
    } else {
        String msg = "Avro message converter doesn't support messages of type " + value.getClass();
        LOG.error(msg);
        throw new SamzaException(msg);
    }
    return new SamzaSqlRelMessage(samzaMessage.getKey(), payloadFieldNames, payloadFieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
}
Also used : IntStream(java.util.stream.IntStream) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) ByteString(org.apache.calcite.avatica.util.ByteString) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) ByteBuffer(java.nio.ByteBuffer) SamzaException(org.apache.samza.SamzaException) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) List(java.util.List) Validate(org.apache.commons.lang3.Validate) SamzaSqlRelRecord(org.apache.samza.sql.SamzaSqlRelRecord) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Config(org.apache.samza.config.Config) KV(org.apache.samza.operators.KV) IndexedRecord(org.apache.avro.generic.IndexedRecord) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) ByteString(org.apache.calcite.avatica.util.ByteString) SamzaException(org.apache.samza.SamzaException) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 2 with SamzaSqlRelMessage

use of org.apache.samza.sql.data.SamzaSqlRelMessage in project samza by apache.

the class LogicalAggregateTranslator method translate.

void translate(final LogicalAggregate aggregate, final TranslatorContext context) {
    validateAggregateFunctions(aggregate);
    MessageStream<SamzaSqlRelMessage> inputStream = context.getMessageStream(aggregate.getInput().getId());
    // At this point, the assumption is that only count function is supported.
    SupplierFunction<Long> initialValue = () -> (long) 0;
    FoldLeftFunction<SamzaSqlRelMessage, Long> foldCountFn = (m, c) -> c + 1;
    final ArrayList<String> aggFieldNames = getAggFieldNames(aggregate);
    MessageStream<SamzaSqlRelMessage> outputStream = inputStream.map(new TranslatorInputMetricsMapFunction(logicalOpId)).window(Windows.keyedTumblingWindow(m -> m, Duration.ofMillis(context.getExecutionContext().getSamzaSqlApplicationConfig().getWindowDurationMs()), initialValue, foldCountFn, new SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde(), new LongSerde()).setAccumulationMode(AccumulationMode.DISCARDING), changeLogStorePrefix + "_tumblingWindow_" + logicalOpId).map(windowPane -> {
        List<String> fieldNames = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldNames();
        List<Object> fieldValues = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldValues();
        fieldNames.add(aggFieldNames.get(0));
        fieldValues.add(windowPane.getMessage());
        return new SamzaSqlRelMessage(fieldNames, fieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
    });
    context.registerMessageStream(aggregate.getId(), outputStream);
    outputStream.map(new TranslatorOutputMetricsMapFunction(logicalOpId));
}
Also used : SqlKind(org.apache.calcite.sql.SqlKind) Windows(org.apache.samza.operators.windows.Windows) Logger(org.slf4j.Logger) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LoggerFactory(org.slf4j.LoggerFactory) SamzaException(org.apache.samza.SamzaException) ArrayList(java.util.ArrayList) List(java.util.List) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) AccumulationMode(org.apache.samza.operators.windows.AccumulationMode) Duration(java.time.Duration) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) LongSerde(org.apache.samza.serializers.LongSerde) LongSerde(org.apache.samza.serializers.LongSerde) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 3 with SamzaSqlRelMessage

use of org.apache.samza.sql.data.SamzaSqlRelMessage in project samza by apache.

the class QueryTranslator method sendToOutputStream.

private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
    SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
    MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
    MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
    Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
    if (!tableDescriptor.isPresent()) {
        KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
        String systemName = sinkConfig.getSystemName();
        DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
        GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
        OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
        outputStream.sendTo(stm);
        // Process system events only if the output is a stream.
        if (sqlConfig.isProcessSystemEvents()) {
            for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
                MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
                systemEventStream.sendTo(stm);
            }
        }
    } else {
        Table outputTable = appDesc.getTable(tableDescriptor.get());
        if (outputTable == null) {
            String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
            throw new SamzaException(msg);
        }
        outputStream.sendTo(outputTable);
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelRoot(org.apache.calcite.rel.RelRoot) TaskContext(org.apache.samza.context.TaskContext) MapFunction(org.apache.samza.operators.functions.MapFunction) Counter(org.apache.samza.metrics.Counter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) QueryPlanner(org.apache.samza.sql.planner.QueryPlanner) ApplicationContainerContext(org.apache.samza.context.ApplicationContainerContext) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Map(java.util.Map) TableModify(org.apache.calcite.rel.core.TableModify) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaHistogram(org.apache.samza.metrics.SamzaHistogram) ExternalContext(org.apache.samza.context.ExternalContext) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) RelNode(org.apache.calcite.rel.RelNode) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) Context(org.apache.samza.context.Context) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Validate(org.apache.commons.lang3.Validate) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) Optional(java.util.Optional) SamzaSqlApplicationContext(org.apache.samza.sql.runner.SamzaSqlApplicationContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) Table(org.apache.samza.table.Table) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaException(org.apache.samza.SamzaException) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 4 with SamzaSqlRelMessage

use of org.apache.samza.sql.data.SamzaSqlRelMessage in project samza by apache.

the class ScanTranslator method translate.

// ScanMapFunction
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
    StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
    List<String> tableNameParts = tableScan.getTable().getQualifiedName();
    String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);
    Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
    SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
    final String systemName = sqlIOConfig.getSystemName();
    final String streamId = sqlIOConfig.getStreamId();
    final String source = sqlIOConfig.getSource();
    final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
    // descriptor to load the local table.
    if (isRemoteTable) {
        return;
    }
    // set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
    DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
    if (systemDescriptor == null) {
        systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
        systemDescriptors.put(systemName, systemDescriptor);
    } else {
        /* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
       * in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
       * used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
        if (systemDescriptor.getTransformer().isPresent()) {
            InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
            if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
                throw new SamzaException("SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
            }
        }
    }
    InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());
    if (!inputMsgStreams.containsKey(source)) {
        MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
        inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
    }
    MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source).filter(new FilterSystemMessageFunction(sourceName, queryId)).map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));
    context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) InputTransformer(org.apache.samza.system.descriptors.InputTransformer) SamzaException(org.apache.samza.SamzaException) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 5 with SamzaSqlRelMessage

use of org.apache.samza.sql.data.SamzaSqlRelMessage in project samza by apache.

the class TestAvroRelConversion method testComplexUnionConversionShouldWorkWithBothStringAndIntTypes.

@Test
public void testComplexUnionConversionShouldWorkWithBothStringAndIntTypes() throws Exception {
    // ComplexUnion is a nested avro non-nullable union-type with both String and Integer type
    // Test the complex-union conversion for String type.
    GenericData.Record record = new GenericData.Record(ComplexUnion.SCHEMA$);
    record.put("non_nullable_union_value", testStrValue);
    ComplexUnion complexUnion = new ComplexUnion();
    complexUnion.non_nullable_union_value = testStrValue;
    byte[] serializedData = bytesFromGenericRecord(record);
    GenericRecord genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    SamzaSqlRelMessage message = complexUnionAvroRelConverter.convertToRelMessage(new KV<>("key", genericRecord));
    Assert.assertEquals(testStrValue, message.getSamzaSqlRelRecord().getField("non_nullable_union_value").get().toString());
    serializedData = encodeAvroSpecificRecord(ComplexUnion.class, complexUnion);
    genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    Assert.assertEquals(testStrValue, genericRecord.get("non_nullable_union_value").toString());
    // Testing the complex-union conversion for Integer type
    record.put("non_nullable_union_value", Integer.valueOf(123));
    complexUnion.non_nullable_union_value = Integer.valueOf(123);
    serializedData = bytesFromGenericRecord(record);
    genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    message = complexUnionAvroRelConverter.convertToRelMessage(new KV<>("key", genericRecord));
    Assert.assertEquals(Integer.valueOf(123), message.getSamzaSqlRelRecord().getField("non_nullable_union_value").get());
    serializedData = encodeAvroSpecificRecord(ComplexUnion.class, complexUnion);
    genericRecord = genericRecordFromBytes(serializedData, ComplexUnion.SCHEMA$);
    Assert.assertEquals(Integer.valueOf(123), genericRecord.get("non_nullable_union_value"));
}
Also used : ComplexUnion(org.apache.samza.sql.avro.schemas.ComplexUnion) ComplexRecord(org.apache.samza.sql.avro.schemas.ComplexRecord) StreetNumRecord(org.apache.samza.sql.avro.schemas.StreetNumRecord) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) SimpleRecord(org.apache.samza.sql.avro.schemas.SimpleRecord) AddressRecord(org.apache.samza.sql.avro.schemas.AddressRecord) KV(org.apache.samza.operators.KV) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Test(org.junit.Test)

Aggregations

SamzaSqlRelMessage (org.apache.samza.sql.data.SamzaSqlRelMessage)28 Test (org.junit.Test)19 SamzaSqlRelMsgMetadata (org.apache.samza.sql.data.SamzaSqlRelMsgMetadata)14 ArrayList (java.util.ArrayList)13 GenericData (org.apache.avro.generic.GenericData)10 GenericRecord (org.apache.avro.generic.GenericRecord)10 HashMap (java.util.HashMap)9 IndexedRecord (org.apache.avro.generic.IndexedRecord)7 AddressRecord (org.apache.samza.sql.avro.schemas.AddressRecord)7 SimpleRecord (org.apache.samza.sql.avro.schemas.SimpleRecord)7 StreetNumRecord (org.apache.samza.sql.avro.schemas.StreetNumRecord)7 Schema (org.apache.avro.Schema)6 KV (org.apache.samza.operators.KV)6 MessageStream (org.apache.samza.operators.MessageStream)6 SamzaSqlRelRecord (org.apache.samza.sql.SamzaSqlRelRecord)6 List (java.util.List)5 ByteString (org.apache.calcite.avatica.util.ByteString)5 RelNode (org.apache.calcite.rel.RelNode)5 JoinRelType (org.apache.calcite.rel.core.JoinRelType)5 Logger (org.slf4j.Logger)5