Search in sources :

Example 26 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class DagManagerTest method setUp.

@BeforeClass
public void setUp() throws Exception {
    FileUtils.deleteDirectory(new File(this.dagStateStoreDir));
    Config config = ConfigFactory.empty().withValue(FSDagStateStore.DAG_STATESTORE_DIR, ConfigValueFactory.fromAnyRef(this.dagStateStoreDir));
    this._dagStateStore = new FSDagStateStore(config, new HashMap<>());
    DagStateStore failedDagStateStore = new InMemoryDagStateStore();
    this._jobStatusRetriever = Mockito.mock(JobStatusRetriever.class);
    this.queue = new LinkedBlockingQueue<>();
    this.cancelQueue = new LinkedBlockingQueue<>();
    this.resumeQueue = new LinkedBlockingQueue<>();
    MetricContext metricContext = Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()), getClass());
    this._dagManagerThread = new DagManager.DagManagerThread(_jobStatusRetriever, _dagStateStore, failedDagStateStore, queue, cancelQueue, resumeQueue, true, 5, new HashMap<>(), new HashSet<>(), metricContext.contextAwareMeter("successMeter"), metricContext.contextAwareMeter("failedMeter"), START_SLA_DEFAULT);
    Field jobToDagField = DagManager.DagManagerThread.class.getDeclaredField("jobToDag");
    jobToDagField.setAccessible(true);
    this.jobToDag = (Map<DagNode<JobExecutionPlan>, Dag<JobExecutionPlan>>) jobToDagField.get(this._dagManagerThread);
    Field dagToJobsField = DagManager.DagManagerThread.class.getDeclaredField("dagToJobs");
    dagToJobsField.setAccessible(true);
    this.dagToJobs = (Map<String, LinkedList<DagNode<JobExecutionPlan>>>) dagToJobsField.get(this._dagManagerThread);
    Field dagsField = DagManager.DagManagerThread.class.getDeclaredField("dags");
    dagsField.setAccessible(true);
    this.dags = (Map<String, Dag<JobExecutionPlan>>) dagsField.get(this._dagManagerThread);
    Field failedDagIdsField = DagManager.DagManagerThread.class.getDeclaredField("failedDagIds");
    failedDagIdsField.setAccessible(true);
    this.failedDagIds = (Set<String>) failedDagIdsField.get(this._dagManagerThread);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Config(com.typesafe.config.Config) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) JobStatusRetriever(org.apache.gobblin.service.monitoring.JobStatusRetriever) LinkedList(java.util.LinkedList) Field(java.lang.reflect.Field) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) MetricContext(org.apache.gobblin.metrics.MetricContext) File(java.io.File) HashSet(java.util.HashSet) BeforeClass(org.testng.annotations.BeforeClass)

Example 27 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class DagManagerTest method testDagManagerWithBadFlowSLAConfig.

@Test(dependsOnMethods = "testJobStartSLAKilledDag")
public void testDagManagerWithBadFlowSLAConfig() throws URISyntaxException, IOException {
    long flowExecutionId = System.currentTimeMillis();
    String flowGroup = "group0";
    String flowName = "flow0";
    String jobName = "job0";
    // Create a config with an improperly formatted Flow SLA time e.g. "1h"
    Config jobConfig = ConfigBuilder.create().addPrimitive(ConfigurationKeys.FLOW_GROUP_KEY, "group" + flowGroup).addPrimitive(ConfigurationKeys.FLOW_NAME_KEY, "flow" + flowName).addPrimitive(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, flowExecutionId).addPrimitive(ConfigurationKeys.JOB_GROUP_KEY, flowGroup).addPrimitive(ConfigurationKeys.JOB_NAME_KEY, jobName).addPrimitive(ConfigurationKeys.FLOW_FAILURE_OPTION, "FINISH_RUNNING").addPrimitive(ConfigurationKeys.GOBBLIN_FLOW_SLA_TIME, "1h").build();
    List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>();
    JobSpec js = JobSpec.builder("test_job" + jobName).withVersion("0").withConfig(jobConfig).withTemplate(new URI(jobName)).build();
    SpecExecutor specExecutor = MockedSpecExecutor.createDummySpecExecutor(new URI(jobName));
    JobExecutionPlan jobExecutionPlan = new JobExecutionPlan(js, specExecutor);
    jobExecutionPlans.add(jobExecutionPlan);
    Dag<JobExecutionPlan> dag = (new JobExecutionPlanDagFactory()).createDag(jobExecutionPlans);
    String dagId = DagManagerUtils.generateDagId(dag);
    // Add a dag to the queue of dags
    this.queue.offer(dag);
    // Job should have been run normally without breaking on SLA check, so we can just mark as completed for status
    Iterator<JobStatus> jobStatusIterator1 = getMockJobStatus(flowName, flowGroup, flowExecutionId + 1, jobName, flowGroup, String.valueOf(ExecutionStatus.COMPLETE));
    Mockito.when(_jobStatusRetriever.getJobStatusesForFlowExecution(Mockito.anyString(), Mockito.anyString(), Mockito.anyLong(), Mockito.anyString(), Mockito.anyString())).thenReturn(jobStatusIterator1);
    // Run the thread once. Job should run without crashing thread on SLA check and cleanup
    this._dagManagerThread.run();
    Assert.assertEquals(this.dags.size(), 0);
    Assert.assertEquals(this.jobToDag.size(), 0);
    Assert.assertEquals(this.dagToJobs.size(), 0);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) URI(java.net.URI) JobStatus(org.apache.gobblin.service.monitoring.JobStatus) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) MockedSpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.MockedSpecExecutor) JobSpec(org.apache.gobblin.runtime.api.JobSpec) JobExecutionPlanDagFactory(org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory) Test(org.testng.annotations.Test)

Example 28 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class IdentityFlowToJobSpecCompilerTest method testNoJobSpecCompilation.

@Test
public void testNoJobSpecCompilation() {
    FlowSpec flowSpec = initFlowSpec(TEST_FLOW_GROUP, TEST_FLOW_NAME, "unsupportedSource", "unsupportedSink");
    // Run compiler on flowSpec
    Dag<JobExecutionPlan> jobExecutionPlanDag = this.compilerWithTemplateCalague.compileFlow(flowSpec);
    // Assert pre-requisites
    Assert.assertNotNull(jobExecutionPlanDag, "Expected non null dag.");
    Assert.assertTrue(jobExecutionPlanDag.getNodes().size() == 0, "Exepected 1 executor for FlowSpec.");
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Test(org.testng.annotations.Test)

Example 29 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class IdentityFlowToJobSpecCompilerTest method testCompilerWithTemplateCatalog.

@Test
public void testCompilerWithTemplateCatalog() {
    FlowSpec flowSpec = initFlowSpec();
    // Run compiler on flowSpec
    Dag<JobExecutionPlan> jobExecutionPlanDag = this.compilerWithTemplateCalague.compileFlow(flowSpec);
    // Assert pre-requisites
    Assert.assertNotNull(jobExecutionPlanDag, "Expected non null dag.");
    Assert.assertTrue(jobExecutionPlanDag.getNodes().size() == 1, "Exepected 1 executor for FlowSpec.");
    // Assert FlowSpec compilation
    Dag.DagNode<JobExecutionPlan> dagNode = jobExecutionPlanDag.getStartNodes().get(0);
    Spec spec = dagNode.getValue().getJobSpec();
    Assert.assertTrue(spec instanceof JobSpec, "Expected JobSpec compiled from FlowSpec.");
    // Assert JobSpec properties
    JobSpec jobSpec = (JobSpec) spec;
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty1"), "testValue1");
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty2"), "test.Value1");
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty3"), "100");
    Assert.assertEquals(jobSpec.getConfig().getString(ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY), TEST_SOURCE_NAME);
    Assert.assertFalse(jobSpec.getConfig().hasPath(ConfigurationKeys.JOB_SCHEDULE_KEY));
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_NAME_KEY), TEST_FLOW_NAME);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_GROUP_KEY), TEST_FLOW_GROUP);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_NAME_KEY), TEST_FLOW_NAME);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_GROUP_KEY), TEST_FLOW_GROUP);
    Assert.assertTrue(jobSpec.getConfig().hasPath(ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
    // Assert the start node has no children.
    Assert.assertEquals(jobExecutionPlanDag.getChildren(dagNode).size(), 0);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) JobSpec(org.apache.gobblin.runtime.api.JobSpec) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Spec(org.apache.gobblin.runtime.api.Spec) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Test(org.testng.annotations.Test)

Example 30 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class MultiHopFlowCompilerTest method testCompileFlowWithRetention.

@Test(dependsOnMethods = "testCompileFlow")
public void testCompileFlowWithRetention() throws URISyntaxException, IOException {
    FlowSpec spec = createFlowSpec("flow/flow1.conf", "LocalFS-1", "ADLS-1", true, true);
    Dag<JobExecutionPlan> jobDag = this.specCompiler.compileFlow(spec);
    Assert.assertEquals(jobDag.getNodes().size(), 9);
    Assert.assertEquals(jobDag.getStartNodes().size(), 2);
    Assert.assertEquals(jobDag.getEndNodes().size(), 5);
    String flowGroup = "testFlowGroup";
    String flowName = "testFlowName";
    List<DagNode<JobExecutionPlan>> currentHopNodes = jobDag.getStartNodes();
    List<String> expectedJobNames = Lists.newArrayList("SnapshotRetention", "Distcp", "SnapshotRetention", "ConvertToJsonAndEncrypt", "SnapshotRetention", "Distcp", "SnapshotRetention", "DistcpToADL", "SnapshotRetention");
    List<String> sourceNodes = Lists.newArrayList("LocalFS-1", "LocalFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-3", "HDFS-3", "ADLS-1");
    List<String> destinationNodes = Lists.newArrayList("LocalFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-3", "HDFS-3", "ADLS-1", "ADLS-1");
    List<String> edgeNames = Lists.newArrayList("localRetention", "localToHdfs", "hdfsRetention", "hdfsConvertToJsonAndEncrypt", "hdfsRetention", "hdfsToHdfs", "hdfsRetention", "hdfsToAdl", "hdfsRemoteRetention");
    List<DagNode<JobExecutionPlan>> nextHopNodes = new ArrayList<>();
    for (int i = 0; i < 9; i += 2) {
        if (i < 8) {
            Assert.assertEquals(currentHopNodes.size(), 2);
        } else {
            Assert.assertEquals(currentHopNodes.size(), 1);
        }
        Set<String> jobNames = new HashSet<>();
        jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, expectedJobNames.get(i), sourceNodes.get(i), destinationNodes.get(i), edgeNames.get(i)));
        if (i < 8) {
            jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, expectedJobNames.get(i + 1), sourceNodes.get(i + 1), destinationNodes.get(i + 1), edgeNames.get(i + 1)));
        }
        for (DagNode<JobExecutionPlan> dagNode : currentHopNodes) {
            Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
            String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
            Assert.assertTrue(jobNames.stream().anyMatch(jobName::startsWith));
            log.warn(jobName);
            nextHopNodes.addAll(jobDag.getChildren(dagNode));
        }
        currentHopNodes = nextHopNodes;
        nextHopNodes = new ArrayList<>();
    }
    Assert.assertEquals(nextHopNodes.size(), 0);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) AzkabanProjectConfig(org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Aggregations

JobExecutionPlan (org.apache.gobblin.service.modules.spec.JobExecutionPlan)39 Config (com.typesafe.config.Config)22 FlowSpec (org.apache.gobblin.runtime.api.FlowSpec)21 Test (org.testng.annotations.Test)21 JobSpec (org.apache.gobblin.runtime.api.JobSpec)15 ArrayList (java.util.ArrayList)12 Dag (org.apache.gobblin.service.modules.flowgraph.Dag)12 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)10 AzkabanProjectConfig (org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig)8 JobExecutionPlanDagFactory (org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory)8 URI (java.net.URI)7 Spec (org.apache.gobblin.runtime.api.Spec)6 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)6 IOException (java.io.IOException)5 DagNode (org.apache.gobblin.service.modules.flowgraph.Dag.DagNode)5 File (java.io.File)4 HashSet (java.util.HashSet)4 Path (org.apache.hadoop.fs.Path)4 Joiner (com.google.common.base.Joiner)3 Optional (com.google.common.base.Optional)3