Search in sources :

Example 11 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class MultiHopFlowCompilerTest method testCompileMultiDatasetFlow.

@Test(dependsOnMethods = "testMulticastPath")
public void testCompileMultiDatasetFlow() throws Exception {
    FlowSpec spec = createFlowSpec("flow/flow3.conf", "HDFS-1", "HDFS-3", true, false);
    Dag<JobExecutionPlan> dag = specCompiler.compileFlow(spec);
    // Should be 3 parallel jobs, one for each dataset, with copy -> retention
    Assert.assertEquals(dag.getNodes().size(), 6);
    Assert.assertEquals(dag.getEndNodes().size(), 3);
    Assert.assertEquals(dag.getStartNodes().size(), 3);
    String copyJobName = Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "HDFS-1", "HDFS-3", "hdfsToHdfs");
    for (DagNode<JobExecutionPlan> dagNode : dag.getStartNodes()) {
        Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
        String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
        Assert.assertTrue(jobName.startsWith(copyJobName));
    }
    String retentionJobName = Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "SnapshotRetention", "HDFS-3", "HDFS-3", "hdfsRetention");
    for (DagNode<JobExecutionPlan> dagNode : dag.getEndNodes()) {
        Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
        String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
        Assert.assertTrue(jobName.startsWith(retentionJobName));
    }
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) AzkabanProjectConfig(org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig) Config(com.typesafe.config.Config) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Test(org.testng.annotations.Test)

Example 12 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class FlowGraphPathTest method buildDag.

/**
 * A method to create a {@link Dag <JobExecutionPlan>}.
 * @return a Dag.
 */
public Dag<JobExecutionPlan> buildDag(int numNodes, int startNodeId, boolean isForkable) throws URISyntaxException {
    List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>();
    Config baseConfig = ConfigBuilder.create().addPrimitive(ConfigurationKeys.FLOW_GROUP_KEY, "group0").addPrimitive(ConfigurationKeys.FLOW_NAME_KEY, "flow0").addPrimitive(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, System.currentTimeMillis()).addPrimitive(ConfigurationKeys.JOB_GROUP_KEY, "group0").build();
    for (int i = startNodeId; i < startNodeId + numNodes; i++) {
        String suffix = Integer.toString(i);
        Config jobConfig = baseConfig.withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef("job" + suffix));
        if (isForkable && (i == startNodeId + numNodes - 1)) {
            jobConfig = jobConfig.withValue(ConfigurationKeys.JOB_FORK_ON_CONCAT, ConfigValueFactory.fromAnyRef(true));
        }
        if (i > startNodeId) {
            jobConfig = jobConfig.withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef("job" + (i - 1)));
        }
        JobSpec js = JobSpec.builder("test_job" + suffix).withVersion(suffix).withConfig(jobConfig).withTemplate(new URI("job" + suffix)).build();
        SpecExecutor specExecutor = InMemorySpecExecutor.createDummySpecExecutor(new URI("job" + i));
        JobExecutionPlan jobExecutionPlan = new JobExecutionPlan(js, specExecutor);
        jobExecutionPlans.add(jobExecutionPlan);
    }
    return new JobExecutionPlanDagFactory().createDag(jobExecutionPlans);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) InMemorySpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor) JobSpec(org.apache.gobblin.runtime.api.JobSpec) URI(java.net.URI) JobExecutionPlanDagFactory(org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory)

Example 13 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class FSDagStateStoreTest method testWriteCheckpoint.

@Test
public void testWriteCheckpoint() throws IOException, URISyntaxException {
    long flowExecutionId = System.currentTimeMillis();
    String flowGroupId = "0";
    Dag<JobExecutionPlan> dag = DagTestUtils.buildDag(flowGroupId, flowExecutionId);
    this._dagStateStore.writeCheckpoint(dag);
    String fileName = DagManagerUtils.generateDagId(dag) + FSDagStateStore.DAG_FILE_EXTENSION;
    File dagFile = new File(this.checkpointDir, fileName);
    Dag<JobExecutionPlan> dagDeserialized = ((FSDagStateStore) this._dagStateStore).getDag(dagFile);
    Assert.assertEquals(dagDeserialized.getNodes().size(), 2);
    Assert.assertEquals(dagDeserialized.getStartNodes().size(), 1);
    Assert.assertEquals(dagDeserialized.getEndNodes().size(), 1);
    Dag.DagNode<JobExecutionPlan> child = dagDeserialized.getEndNodes().get(0);
    Dag.DagNode<JobExecutionPlan> parent = dagDeserialized.getStartNodes().get(0);
    Assert.assertEquals(dagDeserialized.getParentChildMap().size(), 1);
    Assert.assertTrue(dagDeserialized.getParentChildMap().get(parent).contains(child));
    for (int i = 0; i < 2; i++) {
        JobExecutionPlan plan = dagDeserialized.getNodes().get(i).getValue();
        Config jobConfig = plan.getJobSpec().getConfig();
        Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY), "group" + flowGroupId);
        Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_NAME_KEY), "flow" + flowGroupId);
        Assert.assertEquals(jobConfig.getLong(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), flowExecutionId);
        Assert.assertEquals(plan.getExecutionStatus(), ExecutionStatus.RUNNING);
    }
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Config(com.typesafe.config.Config) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) File(java.io.File) Test(org.testng.annotations.Test)

Example 14 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class FSDagStateStoreTest method testCleanUp.

@Test(dependsOnMethods = "testWriteCheckpoint")
public void testCleanUp() throws IOException, URISyntaxException {
    long flowExecutionId = System.currentTimeMillis();
    String flowGroupId = "0";
    Dag<JobExecutionPlan> dag = DagTestUtils.buildDag(flowGroupId, flowExecutionId);
    this._dagStateStore.writeCheckpoint(dag);
    String fileName = DagManagerUtils.generateDagId(dag) + FSDagStateStore.DAG_FILE_EXTENSION;
    File dagFile = new File(this.checkpointDir, fileName);
    Assert.assertTrue(dagFile.exists());
    this._dagStateStore.cleanUp(dag);
    Assert.assertFalse(dagFile.exists());
    this._dagStateStore.writeCheckpoint(dag);
    Assert.assertTrue(dagFile.exists());
    this._dagStateStore.cleanUp(DagManagerUtils.generateDagId(dag));
    Assert.assertFalse(dagFile.exists());
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) File(java.io.File) Test(org.testng.annotations.Test)

Example 15 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class Orchestrator method orchestrate.

public void orchestrate(Spec spec) throws Exception {
    // Add below waiting because TopologyCatalog and FlowCatalog service can be launched at the same time
    this.topologyCatalog.get().getInitComplete().await();
    // Wait for the SpecCompiler to become healthy.
    this.getSpecCompiler().awaitHealthy();
    long startTime = System.nanoTime();
    if (spec instanceof FlowSpec) {
        Config flowConfig = ((FlowSpec) spec).getConfig();
        String flowGroup = flowConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY);
        String flowName = flowConfig.getString(ConfigurationKeys.FLOW_NAME_KEY);
        if (!flowGauges.containsKey(spec.getUri().toString())) {
            String flowCompiledGaugeName = MetricRegistry.name(ServiceMetricNames.GOBBLIN_SERVICE_PREFIX, flowGroup, flowName, ServiceMetricNames.COMPILED);
            flowGauges.put(spec.getUri().toString(), new FlowCompiledState());
            ContextAwareGauge<Integer> gauge = RootMetricContext.get().newContextAwareGauge(flowCompiledGaugeName, () -> flowGauges.get(spec.getUri().toString()).state.value);
            RootMetricContext.get().register(flowCompiledGaugeName, gauge);
        }
        // If the FlowSpec disallows concurrent executions, then check if another instance of the flow is already
        // running. If so, return immediately.
        boolean allowConcurrentExecution = ConfigUtils.getBoolean(flowConfig, ConfigurationKeys.FLOW_ALLOW_CONCURRENT_EXECUTION, this.flowConcurrencyFlag);
        if (!canRun(flowName, flowGroup, allowConcurrentExecution)) {
            _log.warn("Another instance of flowGroup: {}, flowName: {} running; Skipping flow execution since " + "concurrent executions are disabled for this flow.", flowGroup, flowName);
            flowGauges.get(spec.getUri().toString()).setState(CompiledState.SKIPPED);
            Instrumented.markMeter(this.skippedFlowsMeter);
            // Send FLOW_FAILED event
            Map<String, String> flowMetadata = TimingEventUtils.getFlowMetadata((FlowSpec) spec);
            flowMetadata.put(TimingEvent.METADATA_MESSAGE, "Flow failed because another instance is running and concurrent " + "executions are disabled. Set flow.allowConcurrentExecution to true in the flow spec to change this behaviour.");
            if (this.eventSubmitter.isPresent()) {
                new TimingEvent(this.eventSubmitter.get(), TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata);
            }
            return;
        }
        Optional<TimingEvent> flowCompilationTimer = this.eventSubmitter.transform(submitter -> new TimingEvent(submitter, TimingEvent.FlowTimings.FLOW_COMPILED));
        Dag<JobExecutionPlan> jobExecutionPlanDag = specCompiler.compileFlow(spec);
        Map<String, String> flowMetadata = TimingEventUtils.getFlowMetadata((FlowSpec) spec);
        if (jobExecutionPlanDag == null || jobExecutionPlanDag.isEmpty()) {
            // For scheduled flows, we do not insert the flowExecutionId into the FlowSpec. As a result, if the flow
            // compilation fails (i.e. we are unable to find a path), the metadata will not have flowExecutionId.
            // In this case, the current time is used as the flow executionId.
            flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, Long.toString(System.currentTimeMillis()));
            String message = "Flow was not compiled successfully.";
            if (!((FlowSpec) spec).getCompilationErrors().isEmpty()) {
                message = message + " Compilation errors encountered: " + ((FlowSpec) spec).getCompilationErrors();
            }
            flowMetadata.put(TimingEvent.METADATA_MESSAGE, message);
            Optional<TimingEvent> flowCompileFailedTimer = this.eventSubmitter.transform(submitter -> new TimingEvent(submitter, TimingEvent.FlowTimings.FLOW_COMPILE_FAILED));
            Instrumented.markMeter(this.flowOrchestrationFailedMeter);
            flowGauges.get(spec.getUri().toString()).setState(CompiledState.FAILED);
            _log.warn("Cannot determine an executor to run on for Spec: " + spec);
            if (flowCompileFailedTimer.isPresent()) {
                flowCompileFailedTimer.get().stop(flowMetadata);
            }
            return;
        } else {
            flowGauges.get(spec.getUri().toString()).setState(CompiledState.SUCCESSFUL);
        }
        // If it is a scheduled flow (and hence, does not have flowExecutionId in the FlowSpec) and the flow compilation is successful,
        // retrieve the flowExecutionId from the JobSpec.
        flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, jobExecutionPlanDag.getNodes().get(0).getValue().getJobSpec().getConfigAsProperties().getProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
        if (flowCompilationTimer.isPresent()) {
            flowCompilationTimer.get().stop(flowMetadata);
        }
        if (this.dagManager.isPresent()) {
            try {
                // Send the dag to the DagManager.
                this.dagManager.get().addDag(jobExecutionPlanDag, true, true);
            } catch (Exception ex) {
                if (this.eventSubmitter.isPresent()) {
                    // pronounce failed before stack unwinds, to ensure flow not marooned in `COMPILED` state; (failure likely attributable to DB connection/failover)
                    String failureMessage = "Failed to add Job Execution Plan due to: " + ex.getMessage();
                    flowMetadata.put(TimingEvent.METADATA_MESSAGE, failureMessage);
                    new TimingEvent(this.eventSubmitter.get(), TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata);
                }
                throw ex;
            }
        } else {
            // Schedule all compiled JobSpecs on their respective Executor
            for (Dag.DagNode<JobExecutionPlan> dagNode : jobExecutionPlanDag.getNodes()) {
                DagManagerUtils.incrementJobAttempt(dagNode);
                JobExecutionPlan jobExecutionPlan = dagNode.getValue();
                // Run this spec on selected executor
                SpecProducer producer = null;
                try {
                    producer = jobExecutionPlan.getSpecExecutor().getProducer().get();
                    Spec jobSpec = jobExecutionPlan.getJobSpec();
                    if (!((JobSpec) jobSpec).getConfig().hasPath(ConfigurationKeys.FLOW_EXECUTION_ID_KEY)) {
                        _log.warn("JobSpec does not contain flowExecutionId.");
                    }
                    Map<String, String> jobMetadata = TimingEventUtils.getJobMetadata(flowMetadata, jobExecutionPlan);
                    _log.info(String.format("Going to orchestrate JobSpec: %s on Executor: %s", jobSpec, producer));
                    Optional<TimingEvent> jobOrchestrationTimer = this.eventSubmitter.transform(submitter -> new TimingEvent(submitter, TimingEvent.LauncherTimings.JOB_ORCHESTRATED));
                    producer.addSpec(jobSpec);
                    if (jobOrchestrationTimer.isPresent()) {
                        jobOrchestrationTimer.get().stop(jobMetadata);
                    }
                } catch (Exception e) {
                    _log.error("Cannot successfully setup spec: " + jobExecutionPlan.getJobSpec() + " on executor: " + producer + " for flow: " + spec, e);
                }
            }
        }
    } else {
        Instrumented.markMeter(this.flowOrchestrationFailedMeter);
        throw new RuntimeException("Spec not of type FlowSpec, cannot orchestrate: " + spec);
    }
    Instrumented.markMeter(this.flowOrchestrationSuccessFulMeter);
    Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Config(com.typesafe.config.Config) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) TimingEvent(org.apache.gobblin.metrics.event.TimingEvent) InvocationTargetException(java.lang.reflect.InvocationTargetException) IOException(java.io.IOException) SpecProducer(org.apache.gobblin.runtime.api.SpecProducer) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Spec(org.apache.gobblin.runtime.api.Spec)

Aggregations

JobExecutionPlan (org.apache.gobblin.service.modules.spec.JobExecutionPlan)39 Config (com.typesafe.config.Config)22 FlowSpec (org.apache.gobblin.runtime.api.FlowSpec)21 Test (org.testng.annotations.Test)21 JobSpec (org.apache.gobblin.runtime.api.JobSpec)15 ArrayList (java.util.ArrayList)12 Dag (org.apache.gobblin.service.modules.flowgraph.Dag)12 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)10 AzkabanProjectConfig (org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig)8 JobExecutionPlanDagFactory (org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory)8 URI (java.net.URI)7 Spec (org.apache.gobblin.runtime.api.Spec)6 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)6 IOException (java.io.IOException)5 DagNode (org.apache.gobblin.service.modules.flowgraph.Dag.DagNode)5 File (java.io.File)4 HashSet (java.util.HashSet)4 Path (org.apache.hadoop.fs.Path)4 Joiner (com.google.common.base.Joiner)3 Optional (com.google.common.base.Optional)3