Search in sources :

Example 1 with StoreDescriptor

use of org.apache.samza.operators.spec.StoreDescriptor in project samza by apache.

the class JobNodeConfigurationGenerator method generateJobConfig.

JobConfig generateJobConfig(JobNode jobNode, String executionPlanJson) {
    if (jobNode.isLegacyTaskApplication()) {
        return new JobConfig(jobNode.getConfig());
    }
    Map<String, String> generatedConfig = new HashMap<>();
    // set up job name and job ID
    generatedConfig.put(JobConfig.JOB_NAME, jobNode.getJobName());
    generatedConfig.put(JobConfig.JOB_ID, jobNode.getJobId());
    Map<String, StreamEdge> inEdges = jobNode.getInEdges();
    Map<String, StreamEdge> outEdges = jobNode.getOutEdges();
    Collection<OperatorSpec> reachableOperators = jobNode.getReachableOperators();
    List<StoreDescriptor> stores = getStoreDescriptors(reachableOperators);
    Map<String, TableDescriptor> reachableTables = getReachableTables(reachableOperators, jobNode);
    // config passed by the JobPlanner. user-provided + system-stream descriptor config + misc. other config
    Config originalConfig = jobNode.getConfig();
    // check all inputs to the node for broadcast and input streams
    final Set<String> inputs = new HashSet<>();
    final Set<String> broadcastInputs = new HashSet<>();
    for (StreamEdge inEdge : inEdges.values()) {
        String formattedSystemStream = inEdge.getName();
        if (inEdge.isBroadcast()) {
            if (inEdge.getPartitionCount() > 1) {
                broadcastInputs.add(formattedSystemStream + "#[0-" + (inEdge.getPartitionCount() - 1) + "]");
            } else {
                broadcastInputs.add(formattedSystemStream + "#0");
            }
        } else {
            inputs.add(formattedSystemStream);
        }
    }
    configureBroadcastInputs(generatedConfig, originalConfig, broadcastInputs);
    // compute window and join operator intervals in this node
    configureWindowInterval(generatedConfig, originalConfig, reachableOperators);
    // set store configuration for stateful operators.
    stores.forEach(sd -> generatedConfig.putAll(sd.getStorageConfigs()));
    // set the execution plan in json
    generatedConfig.put(CONFIG_INTERNAL_EXECUTION_PLAN, executionPlanJson);
    // write intermediate input/output streams to configs
    inEdges.values().stream().filter(StreamEdge::isIntermediate).forEach(intermediateEdge -> generatedConfig.putAll(intermediateEdge.generateConfig()));
    // write serialized serde instances and stream, store, and table serdes to configs
    // serde configuration generation has to happen before table configuration, since the serde configuration
    // is required when generating configurations for some TableProvider (i.e. local store backed tables)
    configureSerdes(generatedConfig, inEdges, outEdges, stores, reachableTables.keySet(), jobNode);
    // generate table configuration and potential side input configuration
    configureTables(generatedConfig, originalConfig, reachableTables, inputs);
    // generate the task.inputs configuration
    generatedConfig.put(TaskConfig.INPUT_STREAMS, Joiner.on(',').join(inputs));
    LOG.info("Job {} has generated configs {}", jobNode.getJobNameAndId(), generatedConfig);
    return new JobConfig(mergeConfig(originalConfig, generatedConfig));
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) StorageConfig(org.apache.samza.config.StorageConfig) SerializerConfig(org.apache.samza.config.SerializerConfig) TaskConfig(org.apache.samza.config.TaskConfig) JavaTableConfig(org.apache.samza.config.JavaTableConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) StoreDescriptor(org.apache.samza.operators.spec.StoreDescriptor) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StatefulOperatorSpec(org.apache.samza.operators.spec.StatefulOperatorSpec) HashSet(java.util.HashSet)

Aggregations

HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 ApplicationConfig (org.apache.samza.config.ApplicationConfig)1 Config (org.apache.samza.config.Config)1 JavaTableConfig (org.apache.samza.config.JavaTableConfig)1 JobConfig (org.apache.samza.config.JobConfig)1 MapConfig (org.apache.samza.config.MapConfig)1 SerializerConfig (org.apache.samza.config.SerializerConfig)1 StorageConfig (org.apache.samza.config.StorageConfig)1 StreamConfig (org.apache.samza.config.StreamConfig)1 TaskConfig (org.apache.samza.config.TaskConfig)1 JoinOperatorSpec (org.apache.samza.operators.spec.JoinOperatorSpec)1 OperatorSpec (org.apache.samza.operators.spec.OperatorSpec)1 StatefulOperatorSpec (org.apache.samza.operators.spec.StatefulOperatorSpec)1 StoreDescriptor (org.apache.samza.operators.spec.StoreDescriptor)1 WindowOperatorSpec (org.apache.samza.operators.spec.WindowOperatorSpec)1 LocalTableDescriptor (org.apache.samza.table.descriptors.LocalTableDescriptor)1 TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)1