use of org.apache.samza.operators.spec.StoreDescriptor in project samza by apache.
the class JobNodeConfigurationGenerator method generateJobConfig.
JobConfig generateJobConfig(JobNode jobNode, String executionPlanJson) {
if (jobNode.isLegacyTaskApplication()) {
return new JobConfig(jobNode.getConfig());
}
Map<String, String> generatedConfig = new HashMap<>();
// set up job name and job ID
generatedConfig.put(JobConfig.JOB_NAME, jobNode.getJobName());
generatedConfig.put(JobConfig.JOB_ID, jobNode.getJobId());
Map<String, StreamEdge> inEdges = jobNode.getInEdges();
Map<String, StreamEdge> outEdges = jobNode.getOutEdges();
Collection<OperatorSpec> reachableOperators = jobNode.getReachableOperators();
List<StoreDescriptor> stores = getStoreDescriptors(reachableOperators);
Map<String, TableDescriptor> reachableTables = getReachableTables(reachableOperators, jobNode);
// config passed by the JobPlanner. user-provided + system-stream descriptor config + misc. other config
Config originalConfig = jobNode.getConfig();
// check all inputs to the node for broadcast and input streams
final Set<String> inputs = new HashSet<>();
final Set<String> broadcastInputs = new HashSet<>();
for (StreamEdge inEdge : inEdges.values()) {
String formattedSystemStream = inEdge.getName();
if (inEdge.isBroadcast()) {
if (inEdge.getPartitionCount() > 1) {
broadcastInputs.add(formattedSystemStream + "#[0-" + (inEdge.getPartitionCount() - 1) + "]");
} else {
broadcastInputs.add(formattedSystemStream + "#0");
}
} else {
inputs.add(formattedSystemStream);
}
}
configureBroadcastInputs(generatedConfig, originalConfig, broadcastInputs);
// compute window and join operator intervals in this node
configureWindowInterval(generatedConfig, originalConfig, reachableOperators);
// set store configuration for stateful operators.
stores.forEach(sd -> generatedConfig.putAll(sd.getStorageConfigs()));
// set the execution plan in json
generatedConfig.put(CONFIG_INTERNAL_EXECUTION_PLAN, executionPlanJson);
// write intermediate input/output streams to configs
inEdges.values().stream().filter(StreamEdge::isIntermediate).forEach(intermediateEdge -> generatedConfig.putAll(intermediateEdge.generateConfig()));
// write serialized serde instances and stream, store, and table serdes to configs
// serde configuration generation has to happen before table configuration, since the serde configuration
// is required when generating configurations for some TableProvider (i.e. local store backed tables)
configureSerdes(generatedConfig, inEdges, outEdges, stores, reachableTables.keySet(), jobNode);
// generate table configuration and potential side input configuration
configureTables(generatedConfig, originalConfig, reachableTables, inputs);
// generate the task.inputs configuration
generatedConfig.put(TaskConfig.INPUT_STREAMS, Joiner.on(',').join(inputs));
LOG.info("Job {} has generated configs {}", jobNode.getJobNameAndId(), generatedConfig);
return new JobConfig(mergeConfig(originalConfig, generatedConfig));
}
Aggregations