use of org.apache.gobblin.service.modules.flowgraph.FlowGraph in project incubator-gobblin by apache.
the class MultiHopFlowCompiler method compileFlow.
/**
* j
* @param spec an instance of {@link FlowSpec}.
* @return A DAG of {@link JobExecutionPlan}s, which encapsulates the compiled {@link org.apache.gobblin.runtime.api.JobSpec}s
* together with the {@link SpecExecutor} where the job can be executed.
*/
@Override
public Dag<JobExecutionPlan> compileFlow(Spec spec) {
Preconditions.checkNotNull(spec);
Preconditions.checkArgument(spec instanceof FlowSpec, "MultiHopFlowCompiler only accepts FlowSpecs");
long startTime = System.nanoTime();
FlowSpec flowSpec = (FlowSpec) spec;
String source = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY, "");
String destination = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY, "");
DataNode sourceNode = this.flowGraph.getNode(source);
if (sourceNode == null) {
flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", source));
return null;
}
List<String> destNodeIds = ConfigUtils.getStringList(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY);
List<DataNode> destNodes = destNodeIds.stream().map(this.flowGraph::getNode).collect(Collectors.toList());
if (destNodes.contains(null)) {
flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", destNodeIds.get(destNodes.indexOf(null))));
return null;
}
log.info(String.format("Compiling flow for source: %s and destination: %s", source, destination));
List<FlowSpec> flowSpecs = splitFlowSpec(flowSpec);
Dag<JobExecutionPlan> jobExecutionPlanDag = new Dag<>(new ArrayList<>());
try {
this.rwLock.readLock().lock();
for (FlowSpec datasetFlowSpec : flowSpecs) {
for (DataNode destNode : destNodes) {
long authStartTime = System.nanoTime();
try {
boolean authorized = this.dataMovementAuthorizer.isMovementAuthorized(flowSpec, sourceNode, destNode);
Instrumented.updateTimer(dataAuthorizationTimer, System.nanoTime() - authStartTime, TimeUnit.NANOSECONDS);
if (!authorized) {
String message = String.format("Data movement is not authorized for flow: %s, source: %s, destination: %s", flowSpec.getUri().toString(), source, destination);
log.error(message);
datasetFlowSpec.addCompilationError(source, destination, message);
return null;
}
} catch (Exception e) {
Instrumented.markMeter(flowCompilationFailedMeter);
datasetFlowSpec.addCompilationError(source, destination, Throwables.getStackTraceAsString(e));
return null;
}
}
// Compute the path from source to destination.
FlowGraphPath flowGraphPath = flowGraph.findPath(datasetFlowSpec);
if (flowGraphPath != null) {
// Convert the path into a Dag of JobExecutionPlans.
jobExecutionPlanDag = jobExecutionPlanDag.merge(flowGraphPath.asDag(this.config));
}
}
if (jobExecutionPlanDag.isEmpty()) {
Instrumented.markMeter(flowCompilationFailedMeter);
String message = String.format("No path found from source: %s and destination: %s", source, destination);
log.info(message);
if (!flowSpec.getCompilationErrors().stream().anyMatch(compilationError -> compilationError.errorPriority == 0)) {
flowSpec.addCompilationError(source, destination, message);
}
return null;
}
} catch (PathFinder.PathFinderException | SpecNotFoundException | JobTemplate.TemplateException | URISyntaxException | ReflectiveOperationException e) {
Instrumented.markMeter(flowCompilationFailedMeter);
String message = String.format("Exception encountered while compiling flow for source: %s and destination: %s, %s", source, destination, Throwables.getStackTraceAsString(e));
log.error(message, e);
flowSpec.addCompilationError(source, destination, message);
return null;
} finally {
this.rwLock.readLock().unlock();
}
Instrumented.markMeter(flowCompilationSuccessFulMeter);
Instrumented.updateTimer(flowCompilationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
return jobExecutionPlanDag;
}
Aggregations