Search in sources :

Example 6 with Dag

use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.

the class FSDagStateStore method getDags.

/**
 * {@inheritDoc}
 */
@Override
public List<Dag<JobExecutionPlan>> getDags() throws IOException {
    List<Dag<JobExecutionPlan>> runningDags = Lists.newArrayList();
    File dagCheckpointFolder = new File(this.dagCheckpointDir);
    for (File file : dagCheckpointFolder.listFiles((dir, name) -> name.endsWith(DAG_FILE_EXTENSION))) {
        runningDags.add(getDag(file));
    }
    return runningDags;
}
Also used : Dag(org.apache.gobblin.service.modules.flowgraph.Dag) File(java.io.File)

Example 7 with Dag

use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.

the class JobExecutionPlanDagFactoryTest method testCreateDag.

@Test
public void testCreateDag() throws Exception {
    // Create a list of JobExecutionPlans
    List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>();
    for (JobTemplate jobTemplate : this.jobTemplates) {
        String jobSpecUri = Files.getNameWithoutExtension(new Path(jobTemplate.getUri()).getName());
        jobExecutionPlans.add(new JobExecutionPlan(JobSpec.builder(jobSpecUri).withConfig(jobTemplate.getRawTemplateConfig()).withVersion("1").withTemplate(jobTemplate.getUri()).build(), specExecutor));
    }
    // Create a DAG from job execution plans.
    Dag<JobExecutionPlan> dag = new JobExecutionPlanDagFactory().createDag(jobExecutionPlans);
    // Test DAG properties
    Assert.assertEquals(dag.getStartNodes().size(), 1);
    Assert.assertEquals(dag.getEndNodes().size(), 1);
    Assert.assertEquals(dag.getNodes().size(), 4);
    String startNodeName = new Path(dag.getStartNodes().get(0).getValue().getJobSpec().getUri()).getName();
    Assert.assertEquals(startNodeName, "job1");
    String templateUri = new Path(dag.getStartNodes().get(0).getValue().getJobSpec().getTemplateURI().get()).getName();
    Assert.assertEquals(templateUri, "job1.job");
    String endNodeName = new Path(dag.getEndNodes().get(0).getValue().getJobSpec().getUri()).getName();
    Assert.assertEquals(endNodeName, "job4");
    templateUri = new Path(dag.getEndNodes().get(0).getValue().getJobSpec().getTemplateURI().get()).getName();
    Assert.assertEquals(templateUri, "job4.job");
    Dag.DagNode<JobExecutionPlan> startNode = dag.getStartNodes().get(0);
    List<Dag.DagNode<JobExecutionPlan>> nextNodes = dag.getChildren(startNode);
    Set<String> nodeSet = new HashSet<>();
    for (Dag.DagNode<JobExecutionPlan> node : nextNodes) {
        nodeSet.add(new Path(node.getValue().getJobSpec().getUri()).getName());
        Dag.DagNode<JobExecutionPlan> nextNode = dag.getChildren(node).get(0);
        Assert.assertEquals(new Path(nextNode.getValue().getJobSpec().getUri()).getName(), "job4");
    }
    Assert.assertTrue(nodeSet.contains("job2"));
    Assert.assertTrue(nodeSet.contains("job3"));
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) JobTemplate(org.apache.gobblin.runtime.api.JobTemplate) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 8 with Dag

use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.

the class MysqlDagStateStoreTest method testWriteCheckpointAndGet.

@Test
public void testWriteCheckpointAndGet() throws Exception {
    Dag<JobExecutionPlan> dag_0 = DagTestUtils.buildDag("random_0", 123L);
    Dag<JobExecutionPlan> dag_1 = DagTestUtils.buildDag("random_1", 456L);
    _dagStateStore.writeCheckpoint(dag_0);
    _dagStateStore.writeCheckpoint(dag_1);
    // Verify get one dag
    Dag<JobExecutionPlan> dag = _dagStateStore.getDag(DagManagerUtils.generateDagId(dag_0));
    Assert.assertEquals(dag.getNodes().get(0), dag_0.getNodes().get(0));
    Assert.assertEquals(dag.getNodes().get(1), dag_0.getNodes().get(1));
    // Verify get dagIds
    Set<String> dagIds = _dagStateStore.getDagIds();
    Assert.assertEquals(dagIds.size(), 2);
    Assert.assertTrue(dagIds.contains(DagManagerUtils.generateDagId(dag_0)));
    Assert.assertTrue(dagIds.contains(DagManagerUtils.generateDagId(dag_1)));
    // Verify get all dags
    List<Dag<JobExecutionPlan>> dags = _dagStateStore.getDags();
    Assert.assertEquals(dags.size(), 2);
    // Verify dag contents
    Dag<JobExecutionPlan> dagDeserialized = dags.get(0);
    Assert.assertEquals(dagDeserialized.getNodes().size(), 2);
    Assert.assertEquals(dagDeserialized.getStartNodes().size(), 1);
    Assert.assertEquals(dagDeserialized.getEndNodes().size(), 1);
    Dag.DagNode<JobExecutionPlan> child = dagDeserialized.getEndNodes().get(0);
    Dag.DagNode<JobExecutionPlan> parent = dagDeserialized.getStartNodes().get(0);
    Assert.assertEquals(dagDeserialized.getParentChildMap().size(), 1);
    Assert.assertTrue(dagDeserialized.getParentChildMap().get(parent).contains(child));
    for (int i = 0; i < 2; i++) {
        JobExecutionPlan plan = dagDeserialized.getNodes().get(i).getValue();
        Config jobConfig = plan.getJobSpec().getConfig();
        Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY), "group" + "random_0");
        Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_NAME_KEY), "flow" + "random_0");
        Assert.assertEquals(jobConfig.getLong(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), 123L);
        Assert.assertEquals(plan.getExecutionStatus(), ExecutionStatus.RUNNING);
        Assert.assertTrue(Boolean.parseBoolean(plan.getJobFuture().get().get().toString()));
        Assert.assertTrue(Boolean.parseBoolean(plan.getJobFuture().get().get().toString()));
    }
    dagDeserialized = dags.get(1);
    Assert.assertEquals(dagDeserialized.getNodes().size(), 2);
    Assert.assertEquals(dagDeserialized.getStartNodes().size(), 1);
    Assert.assertEquals(dagDeserialized.getEndNodes().size(), 1);
    child = dagDeserialized.getEndNodes().get(0);
    parent = dagDeserialized.getStartNodes().get(0);
    Assert.assertEquals(dagDeserialized.getParentChildMap().size(), 1);
    Assert.assertTrue(dagDeserialized.getParentChildMap().get(parent).contains(child));
    for (int i = 0; i < 2; i++) {
        JobExecutionPlan plan = dagDeserialized.getNodes().get(i).getValue();
        Config jobConfig = plan.getJobSpec().getConfig();
        Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY), "group" + "random_1");
        Assert.assertEquals(jobConfig.getString(ConfigurationKeys.FLOW_NAME_KEY), "flow" + "random_1");
        Assert.assertEquals(jobConfig.getLong(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), 456L);
        Assert.assertEquals(plan.getExecutionStatus(), ExecutionStatus.RUNNING);
    }
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Config(com.typesafe.config.Config) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) Test(org.testng.annotations.Test)

Example 9 with Dag

use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.

the class DagManagerTest method setUp.

@BeforeClass
public void setUp() throws Exception {
    FileUtils.deleteDirectory(new File(this.dagStateStoreDir));
    Config config = ConfigFactory.empty().withValue(FSDagStateStore.DAG_STATESTORE_DIR, ConfigValueFactory.fromAnyRef(this.dagStateStoreDir));
    this._dagStateStore = new FSDagStateStore(config, new HashMap<>());
    DagStateStore failedDagStateStore = new InMemoryDagStateStore();
    this._jobStatusRetriever = Mockito.mock(JobStatusRetriever.class);
    this.queue = new LinkedBlockingQueue<>();
    this.cancelQueue = new LinkedBlockingQueue<>();
    this.resumeQueue = new LinkedBlockingQueue<>();
    MetricContext metricContext = Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()), getClass());
    this._dagManagerThread = new DagManager.DagManagerThread(_jobStatusRetriever, _dagStateStore, failedDagStateStore, queue, cancelQueue, resumeQueue, true, 5, new HashMap<>(), new HashSet<>(), metricContext.contextAwareMeter("successMeter"), metricContext.contextAwareMeter("failedMeter"), START_SLA_DEFAULT);
    Field jobToDagField = DagManager.DagManagerThread.class.getDeclaredField("jobToDag");
    jobToDagField.setAccessible(true);
    this.jobToDag = (Map<DagNode<JobExecutionPlan>, Dag<JobExecutionPlan>>) jobToDagField.get(this._dagManagerThread);
    Field dagToJobsField = DagManager.DagManagerThread.class.getDeclaredField("dagToJobs");
    dagToJobsField.setAccessible(true);
    this.dagToJobs = (Map<String, LinkedList<DagNode<JobExecutionPlan>>>) dagToJobsField.get(this._dagManagerThread);
    Field dagsField = DagManager.DagManagerThread.class.getDeclaredField("dags");
    dagsField.setAccessible(true);
    this.dags = (Map<String, Dag<JobExecutionPlan>>) dagsField.get(this._dagManagerThread);
    Field failedDagIdsField = DagManager.DagManagerThread.class.getDeclaredField("failedDagIds");
    failedDagIdsField.setAccessible(true);
    this.failedDagIds = (Set<String>) failedDagIdsField.get(this._dagManagerThread);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Config(com.typesafe.config.Config) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) JobStatusRetriever(org.apache.gobblin.service.monitoring.JobStatusRetriever) LinkedList(java.util.LinkedList) Field(java.lang.reflect.Field) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) MetricContext(org.apache.gobblin.metrics.MetricContext) File(java.io.File) HashSet(java.util.HashSet) BeforeClass(org.testng.annotations.BeforeClass)

Example 10 with Dag

use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.

the class IdentityFlowToJobSpecCompilerTest method testCompilerWithTemplateCatalog.

@Test
public void testCompilerWithTemplateCatalog() {
    FlowSpec flowSpec = initFlowSpec();
    // Run compiler on flowSpec
    Dag<JobExecutionPlan> jobExecutionPlanDag = this.compilerWithTemplateCalague.compileFlow(flowSpec);
    // Assert pre-requisites
    Assert.assertNotNull(jobExecutionPlanDag, "Expected non null dag.");
    Assert.assertTrue(jobExecutionPlanDag.getNodes().size() == 1, "Exepected 1 executor for FlowSpec.");
    // Assert FlowSpec compilation
    Dag.DagNode<JobExecutionPlan> dagNode = jobExecutionPlanDag.getStartNodes().get(0);
    Spec spec = dagNode.getValue().getJobSpec();
    Assert.assertTrue(spec instanceof JobSpec, "Expected JobSpec compiled from FlowSpec.");
    // Assert JobSpec properties
    JobSpec jobSpec = (JobSpec) spec;
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty1"), "testValue1");
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty2"), "test.Value1");
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty3"), "100");
    Assert.assertEquals(jobSpec.getConfig().getString(ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY), TEST_SOURCE_NAME);
    Assert.assertFalse(jobSpec.getConfig().hasPath(ConfigurationKeys.JOB_SCHEDULE_KEY));
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_NAME_KEY), TEST_FLOW_NAME);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_GROUP_KEY), TEST_FLOW_GROUP);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_NAME_KEY), TEST_FLOW_NAME);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_GROUP_KEY), TEST_FLOW_GROUP);
    Assert.assertTrue(jobSpec.getConfig().hasPath(ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
    // Assert the start node has no children.
    Assert.assertEquals(jobExecutionPlanDag.getChildren(dagNode).size(), 0);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) JobSpec(org.apache.gobblin.runtime.api.JobSpec) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Spec(org.apache.gobblin.runtime.api.Spec) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Test(org.testng.annotations.Test)

Aggregations

Dag (org.apache.gobblin.service.modules.flowgraph.Dag)14 JobExecutionPlan (org.apache.gobblin.service.modules.spec.JobExecutionPlan)12 Config (com.typesafe.config.Config)7 FlowSpec (org.apache.gobblin.runtime.api.FlowSpec)7 JobSpec (org.apache.gobblin.runtime.api.JobSpec)6 Spec (org.apache.gobblin.runtime.api.Spec)6 Test (org.testng.annotations.Test)6 IOException (java.io.IOException)5 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)5 File (java.io.File)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 Joiner (com.google.common.base.Joiner)3 Optional (com.google.common.base.Optional)3 InvocationTargetException (java.lang.reflect.InvocationTargetException)3 URISyntaxException (java.net.URISyntaxException)3 List (java.util.List)3 Collectors (java.util.stream.Collectors)3 ConfigurationKeys (org.apache.gobblin.configuration.ConfigurationKeys)3 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)3