Search in sources :

Example 1 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class SamzaExecutor method getTableSchema.

@Override
public SqlSchema getTableSchema(ExecutionContext context, String tableName) throws ExecutorException {
    /*
     *  currently Shell works only for systems that has Avro schemas
     */
    int execId = execIdSeq.incrementAndGet();
    Map<String, String> staticConfigs = fetchSamzaSqlConfig(execId);
    Config samzaSqlConfig = new MapConfig(staticConfigs);
    SqlSchema sqlSchema;
    try {
        SqlIOResolver ioResolver = SamzaSqlApplicationConfig.createIOResolver(samzaSqlConfig);
        SqlIOConfig sourceInfo = ioResolver.fetchSourceInfo(tableName);
        RelSchemaProvider schemaProvider = SamzaSqlApplicationConfig.initializePlugin("RelSchemaProvider", sourceInfo.getRelSchemaProviderName(), samzaSqlConfig, SamzaSqlApplicationConfig.CFG_FMT_REL_SCHEMA_PROVIDER_DOMAIN, (o, c) -> ((RelSchemaProviderFactory) o).create(sourceInfo.getSystemStream(), c));
        sqlSchema = schemaProvider.getSqlSchema();
    } catch (SamzaException ex) {
        throw new ExecutorException(ex);
    }
    return sqlSchema;
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) ExecutorException(org.apache.samza.sql.client.exceptions.ExecutorException) SqlSchema(org.apache.samza.sql.schema.SqlSchema) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) SqlIOResolver(org.apache.samza.sql.interfaces.SqlIOResolver) RelSchemaProvider(org.apache.samza.sql.interfaces.RelSchemaProvider) SamzaException(org.apache.samza.SamzaException)

Example 2 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class QueryPlanner method registerSourceSchemas.

private void registerSourceSchemas(SchemaPlus rootSchema) {
    RelSchemaConverter relSchemaConverter = new RelSchemaConverter();
    for (SqlIOConfig ssc : systemStreamConfigBySource.values()) {
        SchemaPlus previousLevelSchema = rootSchema;
        List<String> sourceParts = ssc.getSourceParts();
        RelSchemaProvider relSchemaProvider = relSchemaProviders.get(ssc.getSource());
        for (int sourcePartIndex = 0; sourcePartIndex < sourceParts.size(); sourcePartIndex++) {
            String sourcePart = sourceParts.get(sourcePartIndex);
            if (sourcePartIndex < sourceParts.size() - 1) {
                SchemaPlus sourcePartSchema = previousLevelSchema.getSubSchema(sourcePart);
                if (sourcePartSchema == null) {
                    sourcePartSchema = previousLevelSchema.add(sourcePart, new AbstractSchema());
                }
                previousLevelSchema = sourcePartSchema;
            } else {
                // If the source part is the last one, then fetch the schema corresponding to the stream and register.
                RelDataType relationalSchema = getSourceRelSchema(relSchemaProvider, relSchemaConverter);
                previousLevelSchema.add(sourcePart, createTableFromRelSchema(relationalSchema));
                break;
            }
        }
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) AbstractSchema(org.apache.calcite.schema.impl.AbstractSchema) SchemaPlus(org.apache.calcite.schema.SchemaPlus) RelSchemaProvider(org.apache.samza.sql.interfaces.RelSchemaProvider) RelDataType(org.apache.calcite.rel.type.RelDataType)

Example 3 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class QueryTranslator method sendToOutputStream.

private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
    SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
    MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
    MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
    Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
    if (!tableDescriptor.isPresent()) {
        KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
        String systemName = sinkConfig.getSystemName();
        DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
        GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
        OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
        outputStream.sendTo(stm);
        // Process system events only if the output is a stream.
        if (sqlConfig.isProcessSystemEvents()) {
            for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
                MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
                systemEventStream.sendTo(stm);
            }
        }
    } else {
        Table outputTable = appDesc.getTable(tableDescriptor.get());
        if (outputTable == null) {
            String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
            throw new SamzaException(msg);
        }
        outputStream.sendTo(outputTable);
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelRoot(org.apache.calcite.rel.RelRoot) TaskContext(org.apache.samza.context.TaskContext) MapFunction(org.apache.samza.operators.functions.MapFunction) Counter(org.apache.samza.metrics.Counter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) QueryPlanner(org.apache.samza.sql.planner.QueryPlanner) ApplicationContainerContext(org.apache.samza.context.ApplicationContainerContext) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Map(java.util.Map) TableModify(org.apache.calcite.rel.core.TableModify) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaHistogram(org.apache.samza.metrics.SamzaHistogram) ExternalContext(org.apache.samza.context.ExternalContext) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) RelNode(org.apache.calcite.rel.RelNode) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) Context(org.apache.samza.context.Context) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Validate(org.apache.commons.lang3.Validate) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) Optional(java.util.Optional) SamzaSqlApplicationContext(org.apache.samza.sql.runner.SamzaSqlApplicationContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) Table(org.apache.samza.table.Table) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaException(org.apache.samza.SamzaException) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 4 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class ScanTranslator method translate.

// ScanMapFunction
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
    StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
    List<String> tableNameParts = tableScan.getTable().getQualifiedName();
    String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);
    Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
    SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
    final String systemName = sqlIOConfig.getSystemName();
    final String streamId = sqlIOConfig.getStreamId();
    final String source = sqlIOConfig.getSource();
    final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
    // descriptor to load the local table.
    if (isRemoteTable) {
        return;
    }
    // set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
    DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
    if (systemDescriptor == null) {
        systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
        systemDescriptors.put(systemName, systemDescriptor);
    } else {
        /* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
       * in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
       * used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
        if (systemDescriptor.getTransformer().isPresent()) {
            InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
            if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
                throw new SamzaException("SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
            }
        }
    }
    InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());
    if (!inputMsgStreams.containsKey(source)) {
        MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
        inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
    }
    MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source).filter(new FilterSystemMessageFunction(sourceName, queryId)).map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));
    context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) InputTransformer(org.apache.samza.system.descriptors.InputTransformer) SamzaException(org.apache.samza.SamzaException) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 5 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class SamzaSqlApplicationRunner method computeSamzaConfigs.

public static Config computeSamzaConfigs(Boolean localRunner, Config config) {
    Map<String, String> newConfig = new HashMap<>();
    // TODO: Introduce an API to return a dsl string containing one or more sql statements
    List<String> dslStmts = SamzaSqlDslConverter.fetchSqlFromConfig(config);
    // This is needed because the SQL file may not be available in all the node managers.
    String sqlJson = SamzaSqlApplicationConfig.serializeSqlStmts(dslStmts);
    newConfig.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, sqlJson);
    List<String> inputSystemStreams = new LinkedList<>();
    List<String> outputSystemStreams = new LinkedList<>();
    SamzaSqlApplicationConfig.populateSystemStreamsAndGetRelRoots(dslStmts, config, inputSystemStreams, outputSystemStreams);
    SqlIOResolver ioResolver = SamzaSqlApplicationConfig.createIOResolver(config);
    // Populate stream to system mapping config for input and output system streams
    for (String source : inputSystemStreams) {
        SqlIOConfig inputSystemStreamConfig = ioResolver.fetchSourceInfo(source);
        newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, inputSystemStreamConfig.getStreamId()), inputSystemStreamConfig.getSystemName());
        newConfig.putAll(inputSystemStreamConfig.getConfig());
    }
    for (String sink : outputSystemStreams) {
        SqlIOConfig outputSystemStreamConfig = ioResolver.fetchSinkInfo(sink);
        newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, outputSystemStreamConfig.getStreamId()), outputSystemStreamConfig.getSystemName());
        newConfig.putAll(outputSystemStreamConfig.getConfig());
    }
    newConfig.putAll(config);
    if (localRunner) {
        newConfig.put(ApplicationConfig.APP_RUNNER_CLASS, LocalApplicationRunner.class.getName());
    } else {
        newConfig.put(ApplicationConfig.APP_RUNNER_CLASS, RemoteApplicationRunner.class.getName());
    }
    LOG.info("New Samza configs: " + newConfig);
    return new MapConfig(newConfig);
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) RemoteApplicationRunner(org.apache.samza.runtime.RemoteApplicationRunner) HashMap(java.util.HashMap) SqlIOResolver(org.apache.samza.sql.interfaces.SqlIOResolver) MapConfig(org.apache.samza.config.MapConfig) LinkedList(java.util.LinkedList) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner)

Aggregations

SqlIOConfig (org.apache.samza.sql.interfaces.SqlIOConfig)8 SamzaException (org.apache.samza.SamzaException)5 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)4 SamzaSqlRelMessage (org.apache.samza.sql.data.SamzaSqlRelMessage)4 RelNode (org.apache.calcite.rel.RelNode)3 TableScan (org.apache.calcite.rel.core.TableScan)3 LogicalFilter (org.apache.calcite.rel.logical.LogicalFilter)3 LogicalProject (org.apache.calcite.rel.logical.LogicalProject)3 MessageStream (org.apache.samza.operators.MessageStream)3 SamzaSqlApplicationConfig (org.apache.samza.sql.runner.SamzaSqlApplicationConfig)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 HepRelVertex (org.apache.calcite.plan.hep.HepRelVertex)2 RelDataType (org.apache.calcite.rel.type.RelDataType)2 RexCall (org.apache.calcite.rex.RexCall)2 RexInputRef (org.apache.calcite.rex.RexInputRef)2 RexNode (org.apache.calcite.rex.RexNode)2