Search in sources :

Example 36 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class DagManagerUtils method getNext.

/**
 * Traverse the dag to determine the next set of nodes to be executed. It starts with the startNodes of the dag and
 * identifies each node yet to be executed and for which each of its parent nodes is in the {@link ExecutionStatus#COMPLETE}
 * state.
 */
static Set<DagNode<JobExecutionPlan>> getNext(Dag<JobExecutionPlan> dag) {
    Set<DagNode<JobExecutionPlan>> nextNodesToExecute = new HashSet<>();
    LinkedList<DagNode<JobExecutionPlan>> nodesToExpand = Lists.newLinkedList(dag.getStartNodes());
    FailureOption failureOption = getFailureOption(dag);
    while (!nodesToExpand.isEmpty()) {
        DagNode<JobExecutionPlan> node = nodesToExpand.poll();
        ExecutionStatus executionStatus = getExecutionStatus(node);
        boolean addFlag = true;
        if (executionStatus == ExecutionStatus.PENDING || executionStatus == ExecutionStatus.PENDING_RETRY || executionStatus == ExecutionStatus.PENDING_RESUME) {
            // Add a node to be executed next, only if all of its parent nodes are COMPLETE.
            List<DagNode<JobExecutionPlan>> parentNodes = dag.getParents(node);
            for (DagNode<JobExecutionPlan> parentNode : parentNodes) {
                if (getExecutionStatus(parentNode) != ExecutionStatus.COMPLETE) {
                    addFlag = false;
                    break;
                }
            }
            if (addFlag) {
                nextNodesToExecute.add(node);
            }
        } else if (executionStatus == ExecutionStatus.COMPLETE) {
            // Explore the children of COMPLETED node as next candidates for execution.
            nodesToExpand.addAll(dag.getChildren(node));
        } else if ((executionStatus == ExecutionStatus.FAILED) || (executionStatus == ExecutionStatus.CANCELLED)) {
            switch(failureOption) {
                case FINISH_RUNNING:
                    return new HashSet<>();
                case FINISH_ALL_POSSIBLE:
                default:
                    break;
            }
        }
    }
    return nextNodesToExecute;
}
Also used : FailureOption(org.apache.gobblin.service.modules.orchestration.DagManager.FailureOption) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) ExecutionStatus(org.apache.gobblin.service.ExecutionStatus) HashSet(java.util.HashSet)

Example 37 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class FlowGraphPath method concatenate.

/**
 * Concatenate two {@link Dag}s. Modify the {@link ConfigurationKeys#JOB_DEPENDENCIES} in the {@link JobSpec}s of the child
 * {@link Dag} to reflect the concatenation operation.
 * @param dagLeft The parent dag.
 * @param dagRight The child dag.
 * @return The concatenated dag with modified {@link ConfigurationKeys#JOB_DEPENDENCIES}.
 */
@VisibleForTesting
static Dag<JobExecutionPlan> concatenate(Dag<JobExecutionPlan> dagLeft, Dag<JobExecutionPlan> dagRight) {
    // Compute the fork nodes - set of nodes with no dependents in the concatenated dag.
    Set<DagNode<JobExecutionPlan>> forkNodes = dagLeft.getEndNodes().stream().filter(endNode -> isNodeForkable(endNode)).collect(Collectors.toSet());
    Set<DagNode<JobExecutionPlan>> dependencyNodes = dagLeft.getDependencyNodes(forkNodes);
    if (!dependencyNodes.isEmpty()) {
        List<String> dependenciesList = dependencyNodes.stream().map(dagNode -> dagNode.getValue().getJobSpec().getConfig().getString(ConfigurationKeys.JOB_NAME_KEY)).collect(Collectors.toList());
        String dependencies = Joiner.on(",").join(dependenciesList);
        for (DagNode<JobExecutionPlan> childNode : dagRight.getStartNodes()) {
            JobSpec jobSpec = childNode.getValue().getJobSpec();
            jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef(dependencies)));
        }
    }
    return dagLeft.concatenate(dagRight, forkNodes);
}
Also used : JobExecutionPlanDagFactory(org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory) Getter(lombok.Getter) FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) URISyntaxException(java.net.URISyntaxException) ConfigValueFactory(com.typesafe.config.ConfigValueFactory) ConfigUtils(org.apache.gobblin.util.ConfigUtils) ArrayList(java.util.ArrayList) DatasetDescriptor(org.apache.gobblin.service.modules.dataset.DatasetDescriptor) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Files(com.google.common.io.Files) Optional(com.google.common.base.Optional) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) JobTemplate(org.apache.gobblin.runtime.api.JobTemplate) URI(java.net.URI) FlowEdge(org.apache.gobblin.service.modules.flowgraph.FlowEdge) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) Iterator(java.util.Iterator) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) Config(com.typesafe.config.Config) Set(java.util.Set) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) List(java.util.List) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) VisibleForTesting(com.google.common.annotations.VisibleForTesting) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Joiner(com.google.common.base.Joiner) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) JobSpec(org.apache.gobblin.runtime.api.JobSpec) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 38 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class FlowGraphPath method convertHopToDag.

/**
 * Given an instance of {@link FlowEdge}, this method returns a {@link Dag < JobExecutionPlan >} that moves data
 * from the source of the {@link FlowEdge} to the destination of the {@link FlowEdge}.
 * @param flowEdgeContext an instance of {@link FlowEdgeContext}.
 * @param sysConfig environment config.
 * @return a {@link Dag} of {@link JobExecutionPlan}s associated with the {@link FlowEdge}.
 */
private Dag<JobExecutionPlan> convertHopToDag(FlowEdgeContext flowEdgeContext, Config sysConfig) throws SpecNotFoundException, JobTemplate.TemplateException, URISyntaxException {
    FlowTemplate flowTemplate = flowEdgeContext.getEdge().getFlowTemplate();
    DatasetDescriptor inputDatasetDescriptor = flowEdgeContext.getInputDatasetDescriptor();
    DatasetDescriptor outputDatasetDescriptor = flowEdgeContext.getOutputDatasetDescriptor();
    Config mergedConfig = flowEdgeContext.getMergedConfig();
    SpecExecutor specExecutor = flowEdgeContext.getSpecExecutor();
    // Get resolved job configs from the flow template
    List<Config> resolvedJobConfigs = flowTemplate.getResolvedJobConfigs(mergedConfig, inputDatasetDescriptor, outputDatasetDescriptor);
    List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>(resolvedJobConfigs.size());
    Map<String, String> templateToJobNameMap = Maps.newHashMapWithExpectedSize(resolvedJobConfigs.size());
    // Iterate over each resolved job config and convert the config to a JobSpec.
    for (Config resolvedJobConfig : resolvedJobConfigs) {
        JobExecutionPlan jobExecutionPlan = new JobExecutionPlan.Factory().createPlan(flowSpec, resolvedJobConfig, specExecutor, flowExecutionId, sysConfig);
        jobExecutionPlans.add(jobExecutionPlan);
        templateToJobNameMap.put(getJobTemplateName(jobExecutionPlan), jobExecutionPlan.getJobSpec().getConfig().getString(ConfigurationKeys.JOB_NAME_KEY));
    }
    updateJobDependencies(jobExecutionPlans, templateToJobNameMap);
    return new JobExecutionPlanDagFactory().createDag(jobExecutionPlans);
}
Also used : FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) DatasetDescriptor(org.apache.gobblin.service.modules.dataset.DatasetDescriptor) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) JobExecutionPlanDagFactory(org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory)

Example 39 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class MockedSpecCompiler method compileFlow.

@Override
public Dag<JobExecutionPlan> compileFlow(Spec spec) {
    String flowName = (String) ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_NAME_KEY);
    if (flowName.equalsIgnoreCase(UNCOMPILABLE_FLOW)) {
        return null;
    }
    List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>();
    long flowExecutionId = System.currentTimeMillis();
    int i = 0;
    while (i++ < NUMBER_OF_JOBS) {
        String specUri = "/foo/bar/spec/" + i;
        Properties properties = new Properties();
        properties.put(ConfigurationKeys.FLOW_NAME_KEY, flowName);
        properties.put(ConfigurationKeys.FLOW_GROUP_KEY, ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_GROUP_KEY));
        properties.put(ConfigurationKeys.JOB_NAME_KEY, ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_NAME_KEY) + "_" + i);
        properties.put(ConfigurationKeys.JOB_GROUP_KEY, ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_GROUP_KEY) + "_" + i);
        properties.put(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, flowExecutionId);
        JobSpec jobSpec = JobSpec.builder(specUri).withConfig(ConfigUtils.propertiesToConfig(properties)).withVersion("1").withDescription("Spec Description").build();
        jobExecutionPlans.add(new JobExecutionPlan(jobSpec, new InMemorySpecExecutor(ConfigFactory.empty())));
    }
    return new JobExecutionPlanDagFactory().createDag(jobExecutionPlans);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) ArrayList(java.util.ArrayList) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Properties(java.util.Properties) InMemorySpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor) JobExecutionPlanDagFactory(org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory)

Aggregations

JobExecutionPlan (org.apache.gobblin.service.modules.spec.JobExecutionPlan)39 Config (com.typesafe.config.Config)22 FlowSpec (org.apache.gobblin.runtime.api.FlowSpec)21 Test (org.testng.annotations.Test)21 JobSpec (org.apache.gobblin.runtime.api.JobSpec)15 ArrayList (java.util.ArrayList)12 Dag (org.apache.gobblin.service.modules.flowgraph.Dag)12 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)10 AzkabanProjectConfig (org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig)8 JobExecutionPlanDagFactory (org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory)8 URI (java.net.URI)7 Spec (org.apache.gobblin.runtime.api.Spec)6 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)6 IOException (java.io.IOException)5 DagNode (org.apache.gobblin.service.modules.flowgraph.Dag.DagNode)5 File (java.io.File)4 HashSet (java.util.HashSet)4 Path (org.apache.hadoop.fs.Path)4 Joiner (com.google.common.base.Joiner)3 Optional (com.google.common.base.Optional)3