use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.
the class SamzaExecutor method getTableSchema.
@Override
public SqlSchema getTableSchema(ExecutionContext context, String tableName) throws ExecutorException {
/*
* currently Shell works only for systems that has Avro schemas
*/
int execId = execIdSeq.incrementAndGet();
Map<String, String> staticConfigs = fetchSamzaSqlConfig(execId);
Config samzaSqlConfig = new MapConfig(staticConfigs);
SqlSchema sqlSchema;
try {
SqlIOResolver ioResolver = SamzaSqlApplicationConfig.createIOResolver(samzaSqlConfig);
SqlIOConfig sourceInfo = ioResolver.fetchSourceInfo(tableName);
RelSchemaProvider schemaProvider = SamzaSqlApplicationConfig.initializePlugin("RelSchemaProvider", sourceInfo.getRelSchemaProviderName(), samzaSqlConfig, SamzaSqlApplicationConfig.CFG_FMT_REL_SCHEMA_PROVIDER_DOMAIN, (o, c) -> ((RelSchemaProviderFactory) o).create(sourceInfo.getSystemStream(), c));
sqlSchema = schemaProvider.getSqlSchema();
} catch (SamzaException ex) {
throw new ExecutorException(ex);
}
return sqlSchema;
}
use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.
the class QueryPlanner method registerSourceSchemas.
private void registerSourceSchemas(SchemaPlus rootSchema) {
RelSchemaConverter relSchemaConverter = new RelSchemaConverter();
for (SqlIOConfig ssc : systemStreamConfigBySource.values()) {
SchemaPlus previousLevelSchema = rootSchema;
List<String> sourceParts = ssc.getSourceParts();
RelSchemaProvider relSchemaProvider = relSchemaProviders.get(ssc.getSource());
for (int sourcePartIndex = 0; sourcePartIndex < sourceParts.size(); sourcePartIndex++) {
String sourcePart = sourceParts.get(sourcePartIndex);
if (sourcePartIndex < sourceParts.size() - 1) {
SchemaPlus sourcePartSchema = previousLevelSchema.getSubSchema(sourcePart);
if (sourcePartSchema == null) {
sourcePartSchema = previousLevelSchema.add(sourcePart, new AbstractSchema());
}
previousLevelSchema = sourcePartSchema;
} else {
// If the source part is the last one, then fetch the schema corresponding to the stream and register.
RelDataType relationalSchema = getSourceRelSchema(relSchemaProvider, relSchemaConverter);
previousLevelSchema.add(sourcePart, createTableFromRelSchema(relationalSchema));
break;
}
}
}
}
use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.
the class QueryTranslator method sendToOutputStream.
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
if (!tableDescriptor.isPresent()) {
KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
String systemName = sinkConfig.getSystemName();
DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
outputStream.sendTo(stm);
// Process system events only if the output is a stream.
if (sqlConfig.isProcessSystemEvents()) {
for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
systemEventStream.sendTo(stm);
}
}
} else {
Table outputTable = appDesc.getTable(tableDescriptor.get());
if (outputTable == null) {
String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
throw new SamzaException(msg);
}
outputStream.sendTo(outputTable);
}
}
use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.
the class ScanTranslator method translate.
// ScanMapFunction
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
List<String> tableNameParts = tableScan.getTable().getQualifiedName();
String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);
Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
final String systemName = sqlIOConfig.getSystemName();
final String streamId = sqlIOConfig.getStreamId();
final String source = sqlIOConfig.getSource();
final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
// descriptor to load the local table.
if (isRemoteTable) {
return;
}
// set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
if (systemDescriptor == null) {
systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
systemDescriptors.put(systemName, systemDescriptor);
} else {
/* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
* in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
* used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
if (systemDescriptor.getTransformer().isPresent()) {
InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
throw new SamzaException("SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
}
}
}
InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());
if (!inputMsgStreams.containsKey(source)) {
MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
}
MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source).filter(new FilterSystemMessageFunction(sourceName, queryId)).map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));
context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.
the class SamzaSqlApplicationRunner method computeSamzaConfigs.
public static Config computeSamzaConfigs(Boolean localRunner, Config config) {
Map<String, String> newConfig = new HashMap<>();
// TODO: Introduce an API to return a dsl string containing one or more sql statements
List<String> dslStmts = SamzaSqlDslConverter.fetchSqlFromConfig(config);
// This is needed because the SQL file may not be available in all the node managers.
String sqlJson = SamzaSqlApplicationConfig.serializeSqlStmts(dslStmts);
newConfig.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, sqlJson);
List<String> inputSystemStreams = new LinkedList<>();
List<String> outputSystemStreams = new LinkedList<>();
SamzaSqlApplicationConfig.populateSystemStreamsAndGetRelRoots(dslStmts, config, inputSystemStreams, outputSystemStreams);
SqlIOResolver ioResolver = SamzaSqlApplicationConfig.createIOResolver(config);
// Populate stream to system mapping config for input and output system streams
for (String source : inputSystemStreams) {
SqlIOConfig inputSystemStreamConfig = ioResolver.fetchSourceInfo(source);
newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, inputSystemStreamConfig.getStreamId()), inputSystemStreamConfig.getSystemName());
newConfig.putAll(inputSystemStreamConfig.getConfig());
}
for (String sink : outputSystemStreams) {
SqlIOConfig outputSystemStreamConfig = ioResolver.fetchSinkInfo(sink);
newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, outputSystemStreamConfig.getStreamId()), outputSystemStreamConfig.getSystemName());
newConfig.putAll(outputSystemStreamConfig.getConfig());
}
newConfig.putAll(config);
if (localRunner) {
newConfig.put(ApplicationConfig.APP_RUNNER_CLASS, LocalApplicationRunner.class.getName());
} else {
newConfig.put(ApplicationConfig.APP_RUNNER_CLASS, RemoteApplicationRunner.class.getName());
}
LOG.info("New Samza configs: " + newConfig);
return new MapConfig(newConfig);
}
Aggregations