use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class DagManagerUtils method getNext.
/**
* Traverse the dag to determine the next set of nodes to be executed. It starts with the startNodes of the dag and
* identifies each node yet to be executed and for which each of its parent nodes is in the {@link ExecutionStatus#COMPLETE}
* state.
*/
static Set<DagNode<JobExecutionPlan>> getNext(Dag<JobExecutionPlan> dag) {
Set<DagNode<JobExecutionPlan>> nextNodesToExecute = new HashSet<>();
LinkedList<DagNode<JobExecutionPlan>> nodesToExpand = Lists.newLinkedList(dag.getStartNodes());
FailureOption failureOption = getFailureOption(dag);
while (!nodesToExpand.isEmpty()) {
DagNode<JobExecutionPlan> node = nodesToExpand.poll();
ExecutionStatus executionStatus = getExecutionStatus(node);
boolean addFlag = true;
if (executionStatus == ExecutionStatus.PENDING || executionStatus == ExecutionStatus.PENDING_RETRY || executionStatus == ExecutionStatus.PENDING_RESUME) {
// Add a node to be executed next, only if all of its parent nodes are COMPLETE.
List<DagNode<JobExecutionPlan>> parentNodes = dag.getParents(node);
for (DagNode<JobExecutionPlan> parentNode : parentNodes) {
if (getExecutionStatus(parentNode) != ExecutionStatus.COMPLETE) {
addFlag = false;
break;
}
}
if (addFlag) {
nextNodesToExecute.add(node);
}
} else if (executionStatus == ExecutionStatus.COMPLETE) {
// Explore the children of COMPLETED node as next candidates for execution.
nodesToExpand.addAll(dag.getChildren(node));
} else if ((executionStatus == ExecutionStatus.FAILED) || (executionStatus == ExecutionStatus.CANCELLED)) {
switch(failureOption) {
case FINISH_RUNNING:
return new HashSet<>();
case FINISH_ALL_POSSIBLE:
default:
break;
}
}
}
return nextNodesToExecute;
}
use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class FlowGraphPath method concatenate.
/**
* Concatenate two {@link Dag}s. Modify the {@link ConfigurationKeys#JOB_DEPENDENCIES} in the {@link JobSpec}s of the child
* {@link Dag} to reflect the concatenation operation.
* @param dagLeft The parent dag.
* @param dagRight The child dag.
* @return The concatenated dag with modified {@link ConfigurationKeys#JOB_DEPENDENCIES}.
*/
@VisibleForTesting
static Dag<JobExecutionPlan> concatenate(Dag<JobExecutionPlan> dagLeft, Dag<JobExecutionPlan> dagRight) {
// Compute the fork nodes - set of nodes with no dependents in the concatenated dag.
Set<DagNode<JobExecutionPlan>> forkNodes = dagLeft.getEndNodes().stream().filter(endNode -> isNodeForkable(endNode)).collect(Collectors.toSet());
Set<DagNode<JobExecutionPlan>> dependencyNodes = dagLeft.getDependencyNodes(forkNodes);
if (!dependencyNodes.isEmpty()) {
List<String> dependenciesList = dependencyNodes.stream().map(dagNode -> dagNode.getValue().getJobSpec().getConfig().getString(ConfigurationKeys.JOB_NAME_KEY)).collect(Collectors.toList());
String dependencies = Joiner.on(",").join(dependenciesList);
for (DagNode<JobExecutionPlan> childNode : dagRight.getStartNodes()) {
JobSpec jobSpec = childNode.getValue().getJobSpec();
jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef(dependencies)));
}
}
return dagLeft.concatenate(dagRight, forkNodes);
}
use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class FlowGraphPath method convertHopToDag.
/**
* Given an instance of {@link FlowEdge}, this method returns a {@link Dag < JobExecutionPlan >} that moves data
* from the source of the {@link FlowEdge} to the destination of the {@link FlowEdge}.
* @param flowEdgeContext an instance of {@link FlowEdgeContext}.
* @param sysConfig environment config.
* @return a {@link Dag} of {@link JobExecutionPlan}s associated with the {@link FlowEdge}.
*/
private Dag<JobExecutionPlan> convertHopToDag(FlowEdgeContext flowEdgeContext, Config sysConfig) throws SpecNotFoundException, JobTemplate.TemplateException, URISyntaxException {
FlowTemplate flowTemplate = flowEdgeContext.getEdge().getFlowTemplate();
DatasetDescriptor inputDatasetDescriptor = flowEdgeContext.getInputDatasetDescriptor();
DatasetDescriptor outputDatasetDescriptor = flowEdgeContext.getOutputDatasetDescriptor();
Config mergedConfig = flowEdgeContext.getMergedConfig();
SpecExecutor specExecutor = flowEdgeContext.getSpecExecutor();
// Get resolved job configs from the flow template
List<Config> resolvedJobConfigs = flowTemplate.getResolvedJobConfigs(mergedConfig, inputDatasetDescriptor, outputDatasetDescriptor);
List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>(resolvedJobConfigs.size());
Map<String, String> templateToJobNameMap = Maps.newHashMapWithExpectedSize(resolvedJobConfigs.size());
// Iterate over each resolved job config and convert the config to a JobSpec.
for (Config resolvedJobConfig : resolvedJobConfigs) {
JobExecutionPlan jobExecutionPlan = new JobExecutionPlan.Factory().createPlan(flowSpec, resolvedJobConfig, specExecutor, flowExecutionId, sysConfig);
jobExecutionPlans.add(jobExecutionPlan);
templateToJobNameMap.put(getJobTemplateName(jobExecutionPlan), jobExecutionPlan.getJobSpec().getConfig().getString(ConfigurationKeys.JOB_NAME_KEY));
}
updateJobDependencies(jobExecutionPlans, templateToJobNameMap);
return new JobExecutionPlanDagFactory().createDag(jobExecutionPlans);
}
use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class MockedSpecCompiler method compileFlow.
@Override
public Dag<JobExecutionPlan> compileFlow(Spec spec) {
String flowName = (String) ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_NAME_KEY);
if (flowName.equalsIgnoreCase(UNCOMPILABLE_FLOW)) {
return null;
}
List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>();
long flowExecutionId = System.currentTimeMillis();
int i = 0;
while (i++ < NUMBER_OF_JOBS) {
String specUri = "/foo/bar/spec/" + i;
Properties properties = new Properties();
properties.put(ConfigurationKeys.FLOW_NAME_KEY, flowName);
properties.put(ConfigurationKeys.FLOW_GROUP_KEY, ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_GROUP_KEY));
properties.put(ConfigurationKeys.JOB_NAME_KEY, ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_NAME_KEY) + "_" + i);
properties.put(ConfigurationKeys.JOB_GROUP_KEY, ((FlowSpec) spec).getConfigAsProperties().get(ConfigurationKeys.FLOW_GROUP_KEY) + "_" + i);
properties.put(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, flowExecutionId);
JobSpec jobSpec = JobSpec.builder(specUri).withConfig(ConfigUtils.propertiesToConfig(properties)).withVersion("1").withDescription("Spec Description").build();
jobExecutionPlans.add(new JobExecutionPlan(jobSpec, new InMemorySpecExecutor(ConfigFactory.empty())));
}
return new JobExecutionPlanDagFactory().createDag(jobExecutionPlans);
}
Aggregations