use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class MultiHopFlowCompilerTest method testUnresolvedFlow.
@Test(dependsOnMethods = "testCompileCombinedDatasetFlow")
public void testUnresolvedFlow() throws Exception {
FlowSpec spec = createFlowSpec("flow/flow5.conf", "HDFS-1", "HDFS-3", false, false);
Dag<JobExecutionPlan> dag = specCompiler.compileFlow(spec);
Assert.assertNull(dag);
Assert.assertEquals(spec.getCompilationErrors().stream().map(c -> c.errorMessage).collect(Collectors.toSet()).size(), 1);
spec.getCompilationErrors().stream().anyMatch(s -> s.errorMessage.contains(AzkabanProjectConfig.USER_TO_PROXY));
}
use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class IdentityFlowToJobSpecCompilerTest method testCompilerWithoutTemplateCatalog.
@Test
public void testCompilerWithoutTemplateCatalog() {
FlowSpec flowSpec = initFlowSpec();
// Run compiler on flowSpec
Dag<JobExecutionPlan> jobExecutionPlanDag = this.compilerWithoutTemplateCalague.compileFlow(flowSpec);
// Assert pre-requisites
Assert.assertNotNull(jobExecutionPlanDag, "Expected non null dag.");
Assert.assertTrue(jobExecutionPlanDag.getNodes().size() == 1, "Exepected 1 executor for FlowSpec.");
// Assert FlowSpec compilation
Assert.assertEquals(jobExecutionPlanDag.getStartNodes().size(), 1);
Dag.DagNode<JobExecutionPlan> dagNode = jobExecutionPlanDag.getStartNodes().get(0);
Spec spec = dagNode.getValue().getJobSpec();
Assert.assertTrue(spec instanceof JobSpec, "Expected JobSpec compiled from FlowSpec.");
// Assert JobSpec properties
JobSpec jobSpec = (JobSpec) spec;
Assert.assertTrue(!jobSpec.getConfig().hasPath("testProperty1"));
Assert.assertTrue(!jobSpec.getConfig().hasPath("testProperty2"));
Assert.assertTrue(!jobSpec.getConfig().hasPath("testProperty3"));
Assert.assertEquals(jobSpec.getConfig().getString(ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY), TEST_SOURCE_NAME);
Assert.assertFalse(jobSpec.getConfig().hasPath(ConfigurationKeys.JOB_SCHEDULE_KEY));
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_NAME_KEY), TEST_FLOW_NAME);
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_GROUP_KEY), TEST_FLOW_GROUP);
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_NAME_KEY), TEST_FLOW_NAME);
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_GROUP_KEY), TEST_FLOW_GROUP);
Assert.assertTrue(jobSpec.getConfig().hasPath(ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
// Assert the start node has no children.
Assert.assertEquals(jobExecutionPlanDag.getChildren(dagNode).size(), 0);
}
use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class FSDagStateStoreTest method testWriteCheckpoint.
@Test
public void testWriteCheckpoint() throws IOException, URISyntaxException {
long flowExecutionId = System.currentTimeMillis();
String flowGroupId = "0";
Dag<JobExecutionPlan> dag = DagTestUtils.buildDag(flowGroupId, flowExecutionId);
this._dagStateStore.writeCheckpoint(dag);
String fileName = DagManagerUtils.generateDagId(dag) + FSDagStateStore.DAG_FILE_EXTENSION;
File dagFile = new File(this.checkpointDir, fileName);
Dag<JobExecutionPlan> dagDeserialized = ((FSDagStateStore) this._dagStateStore).getDag(dagFile);
Assert.assertEquals(dagDeserialized.getNodes().size(), 2);
Assert.assertEquals(dagDeserialized.getStartNodes().size(), 1);
Assert.assertEquals(dagDeserialized.getEndNodes().size(), 1);
Dag.DagNode<JobExecutionPlan> child = dagDeserialized.getEndNodes().get(0);
Dag.DagNode<JobExecutionPlan> parent = dagDeserialized.getStartNodes().get(0);
Assert.assertEquals(dagDeserialized.getParentChildMap().size(), 1);
Assert.assertTrue(dagDeserialized.getParentChildMap().get(parent).contains(child));
for (int i = 0; i < 2; i++) {
JobExecutionPlan plan = dagDeserialized.getNodes().get(i).getValue();
Config jobConfig = plan.getJobSpec().getConfig();
Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY), "group" + flowGroupId);
Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_NAME_KEY), "flow" + flowGroupId);
Assert.assertEquals(jobConfig.getLong(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), flowExecutionId);
Assert.assertEquals(plan.getExecutionStatus(), ExecutionStatus.RUNNING);
}
}
use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class Orchestrator method orchestrate.
public void orchestrate(Spec spec) throws Exception {
// Add below waiting because TopologyCatalog and FlowCatalog service can be launched at the same time
this.topologyCatalog.get().getInitComplete().await();
// Wait for the SpecCompiler to become healthy.
this.getSpecCompiler().awaitHealthy();
long startTime = System.nanoTime();
if (spec instanceof FlowSpec) {
Config flowConfig = ((FlowSpec) spec).getConfig();
String flowGroup = flowConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY);
String flowName = flowConfig.getString(ConfigurationKeys.FLOW_NAME_KEY);
if (!flowGauges.containsKey(spec.getUri().toString())) {
String flowCompiledGaugeName = MetricRegistry.name(ServiceMetricNames.GOBBLIN_SERVICE_PREFIX, flowGroup, flowName, ServiceMetricNames.COMPILED);
flowGauges.put(spec.getUri().toString(), new FlowCompiledState());
ContextAwareGauge<Integer> gauge = RootMetricContext.get().newContextAwareGauge(flowCompiledGaugeName, () -> flowGauges.get(spec.getUri().toString()).state.value);
RootMetricContext.get().register(flowCompiledGaugeName, gauge);
}
// If the FlowSpec disallows concurrent executions, then check if another instance of the flow is already
// running. If so, return immediately.
boolean allowConcurrentExecution = ConfigUtils.getBoolean(flowConfig, ConfigurationKeys.FLOW_ALLOW_CONCURRENT_EXECUTION, this.flowConcurrencyFlag);
if (!canRun(flowName, flowGroup, allowConcurrentExecution)) {
_log.warn("Another instance of flowGroup: {}, flowName: {} running; Skipping flow execution since " + "concurrent executions are disabled for this flow.", flowGroup, flowName);
flowGauges.get(spec.getUri().toString()).setState(CompiledState.SKIPPED);
Instrumented.markMeter(this.skippedFlowsMeter);
// Send FLOW_FAILED event
Map<String, String> flowMetadata = TimingEventUtils.getFlowMetadata((FlowSpec) spec);
flowMetadata.put(TimingEvent.METADATA_MESSAGE, "Flow failed because another instance is running and concurrent " + "executions are disabled. Set flow.allowConcurrentExecution to true in the flow spec to change this behaviour.");
if (this.eventSubmitter.isPresent()) {
new TimingEvent(this.eventSubmitter.get(), TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata);
}
return;
}
Optional<TimingEvent> flowCompilationTimer = this.eventSubmitter.transform(submitter -> new TimingEvent(submitter, TimingEvent.FlowTimings.FLOW_COMPILED));
Dag<JobExecutionPlan> jobExecutionPlanDag = specCompiler.compileFlow(spec);
Map<String, String> flowMetadata = TimingEventUtils.getFlowMetadata((FlowSpec) spec);
if (jobExecutionPlanDag == null || jobExecutionPlanDag.isEmpty()) {
// For scheduled flows, we do not insert the flowExecutionId into the FlowSpec. As a result, if the flow
// compilation fails (i.e. we are unable to find a path), the metadata will not have flowExecutionId.
// In this case, the current time is used as the flow executionId.
flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, Long.toString(System.currentTimeMillis()));
String message = "Flow was not compiled successfully.";
if (!((FlowSpec) spec).getCompilationErrors().isEmpty()) {
message = message + " Compilation errors encountered: " + ((FlowSpec) spec).getCompilationErrors();
}
flowMetadata.put(TimingEvent.METADATA_MESSAGE, message);
Optional<TimingEvent> flowCompileFailedTimer = this.eventSubmitter.transform(submitter -> new TimingEvent(submitter, TimingEvent.FlowTimings.FLOW_COMPILE_FAILED));
Instrumented.markMeter(this.flowOrchestrationFailedMeter);
flowGauges.get(spec.getUri().toString()).setState(CompiledState.FAILED);
_log.warn("Cannot determine an executor to run on for Spec: " + spec);
if (flowCompileFailedTimer.isPresent()) {
flowCompileFailedTimer.get().stop(flowMetadata);
}
return;
} else {
flowGauges.get(spec.getUri().toString()).setState(CompiledState.SUCCESSFUL);
}
// If it is a scheduled flow (and hence, does not have flowExecutionId in the FlowSpec) and the flow compilation is successful,
// retrieve the flowExecutionId from the JobSpec.
flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, jobExecutionPlanDag.getNodes().get(0).getValue().getJobSpec().getConfigAsProperties().getProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
if (flowCompilationTimer.isPresent()) {
flowCompilationTimer.get().stop(flowMetadata);
}
if (this.dagManager.isPresent()) {
try {
// Send the dag to the DagManager.
this.dagManager.get().addDag(jobExecutionPlanDag, true, true);
} catch (Exception ex) {
if (this.eventSubmitter.isPresent()) {
// pronounce failed before stack unwinds, to ensure flow not marooned in `COMPILED` state; (failure likely attributable to DB connection/failover)
String failureMessage = "Failed to add Job Execution Plan due to: " + ex.getMessage();
flowMetadata.put(TimingEvent.METADATA_MESSAGE, failureMessage);
new TimingEvent(this.eventSubmitter.get(), TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata);
}
throw ex;
}
} else {
// Schedule all compiled JobSpecs on their respective Executor
for (Dag.DagNode<JobExecutionPlan> dagNode : jobExecutionPlanDag.getNodes()) {
DagManagerUtils.incrementJobAttempt(dagNode);
JobExecutionPlan jobExecutionPlan = dagNode.getValue();
// Run this spec on selected executor
SpecProducer producer = null;
try {
producer = jobExecutionPlan.getSpecExecutor().getProducer().get();
Spec jobSpec = jobExecutionPlan.getJobSpec();
if (!((JobSpec) jobSpec).getConfig().hasPath(ConfigurationKeys.FLOW_EXECUTION_ID_KEY)) {
_log.warn("JobSpec does not contain flowExecutionId.");
}
Map<String, String> jobMetadata = TimingEventUtils.getJobMetadata(flowMetadata, jobExecutionPlan);
_log.info(String.format("Going to orchestrate JobSpec: %s on Executor: %s", jobSpec, producer));
Optional<TimingEvent> jobOrchestrationTimer = this.eventSubmitter.transform(submitter -> new TimingEvent(submitter, TimingEvent.LauncherTimings.JOB_ORCHESTRATED));
producer.addSpec(jobSpec);
if (jobOrchestrationTimer.isPresent()) {
jobOrchestrationTimer.get().stop(jobMetadata);
}
} catch (Exception e) {
_log.error("Cannot successfully setup spec: " + jobExecutionPlan.getJobSpec() + " on executor: " + producer + " for flow: " + spec, e);
}
}
}
} else {
Instrumented.markMeter(this.flowOrchestrationFailedMeter);
throw new RuntimeException("Spec not of type FlowSpec, cannot orchestrate: " + spec);
}
Instrumented.markMeter(this.flowOrchestrationSuccessFulMeter);
Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
}
use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class MultiHopFlowCompiler method compileFlow.
/**
* j
* @param spec an instance of {@link FlowSpec}.
* @return A DAG of {@link JobExecutionPlan}s, which encapsulates the compiled {@link org.apache.gobblin.runtime.api.JobSpec}s
* together with the {@link SpecExecutor} where the job can be executed.
*/
@Override
public Dag<JobExecutionPlan> compileFlow(Spec spec) {
Preconditions.checkNotNull(spec);
Preconditions.checkArgument(spec instanceof FlowSpec, "MultiHopFlowCompiler only accepts FlowSpecs");
long startTime = System.nanoTime();
FlowSpec flowSpec = (FlowSpec) spec;
String source = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY, "");
String destination = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY, "");
DataNode sourceNode = this.flowGraph.getNode(source);
if (sourceNode == null) {
flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", source));
return null;
}
List<String> destNodeIds = ConfigUtils.getStringList(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY);
List<DataNode> destNodes = destNodeIds.stream().map(this.flowGraph::getNode).collect(Collectors.toList());
if (destNodes.contains(null)) {
flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", destNodeIds.get(destNodes.indexOf(null))));
return null;
}
log.info(String.format("Compiling flow for source: %s and destination: %s", source, destination));
List<FlowSpec> flowSpecs = splitFlowSpec(flowSpec);
Dag<JobExecutionPlan> jobExecutionPlanDag = new Dag<>(new ArrayList<>());
try {
this.rwLock.readLock().lock();
for (FlowSpec datasetFlowSpec : flowSpecs) {
for (DataNode destNode : destNodes) {
long authStartTime = System.nanoTime();
try {
boolean authorized = this.dataMovementAuthorizer.isMovementAuthorized(flowSpec, sourceNode, destNode);
Instrumented.updateTimer(dataAuthorizationTimer, System.nanoTime() - authStartTime, TimeUnit.NANOSECONDS);
if (!authorized) {
String message = String.format("Data movement is not authorized for flow: %s, source: %s, destination: %s", flowSpec.getUri().toString(), source, destination);
log.error(message);
datasetFlowSpec.addCompilationError(source, destination, message);
return null;
}
} catch (Exception e) {
Instrumented.markMeter(flowCompilationFailedMeter);
datasetFlowSpec.addCompilationError(source, destination, Throwables.getStackTraceAsString(e));
return null;
}
}
// Compute the path from source to destination.
FlowGraphPath flowGraphPath = flowGraph.findPath(datasetFlowSpec);
if (flowGraphPath != null) {
// Convert the path into a Dag of JobExecutionPlans.
jobExecutionPlanDag = jobExecutionPlanDag.merge(flowGraphPath.asDag(this.config));
}
}
if (jobExecutionPlanDag.isEmpty()) {
Instrumented.markMeter(flowCompilationFailedMeter);
String message = String.format("No path found from source: %s and destination: %s", source, destination);
log.info(message);
if (!flowSpec.getCompilationErrors().stream().anyMatch(compilationError -> compilationError.errorPriority == 0)) {
flowSpec.addCompilationError(source, destination, message);
}
return null;
}
} catch (PathFinder.PathFinderException | SpecNotFoundException | JobTemplate.TemplateException | URISyntaxException | ReflectiveOperationException e) {
Instrumented.markMeter(flowCompilationFailedMeter);
String message = String.format("Exception encountered while compiling flow for source: %s and destination: %s, %s", source, destination, Throwables.getStackTraceAsString(e));
log.error(message, e);
flowSpec.addCompilationError(source, destination, message);
return null;
} finally {
this.rwLock.readLock().unlock();
}
Instrumented.markMeter(flowCompilationSuccessFulMeter);
Instrumented.updateTimer(flowCompilationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
return jobExecutionPlanDag;
}
Aggregations