Search in sources :

Example 1 with DagNode

use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.

the class DagTest method testInitialize.

@Test
public void testInitialize() {
    DagNode<String> dagNode1 = new DagNode<>("val1");
    DagNode<String> dagNode2 = new DagNode<>("val2");
    DagNode<String> dagNode3 = new DagNode<>("val3");
    DagNode<String> dagNode4 = new DagNode<>("val4");
    DagNode<String> dagNode5 = new DagNode<>("val5");
    dagNode2.addParentNode(dagNode1);
    dagNode3.addParentNode(dagNode1);
    dagNode4.addParentNode(dagNode2);
    dagNode4.addParentNode(dagNode3);
    dagNode5.addParentNode(dagNode3);
    List<DagNode<String>> dagNodeList = Lists.newArrayList(dagNode1, dagNode2, dagNode3, dagNode4, dagNode5);
    Dag<String> dag = new Dag<>(dagNodeList);
    // Test startNodes and endNodes
    Assert.assertEquals(dag.getStartNodes().size(), 1);
    Assert.assertEquals(dag.getStartNodes().get(0).getValue(), "val1");
    Assert.assertEquals(dag.getEndNodes().size(), 2);
    Assert.assertEquals(dag.getEndNodes().get(0).getValue(), "val4");
    Assert.assertEquals(dag.getEndNodes().get(1).getValue(), "val5");
    DagNode startNode = dag.getStartNodes().get(0);
    Assert.assertEquals(dag.getChildren(startNode).size(), 2);
    Set<String> childSet = new HashSet<>();
    for (DagNode<String> node : dag.getChildren(startNode)) {
        childSet.add(node.getValue());
    }
    Assert.assertTrue(childSet.contains("val2"));
    Assert.assertTrue(childSet.contains("val3"));
    dagNode2 = dag.getChildren(startNode).get(0);
    dagNode3 = dag.getChildren(startNode).get(1);
    Assert.assertEquals(dag.getChildren(dagNode2).size(), 1);
    Assert.assertEquals(dag.getChildren(dagNode2).get(0).getValue(), "val4");
    for (DagNode<String> node : dag.getChildren(dagNode3)) {
        childSet.add(node.getValue());
    }
    Assert.assertTrue(childSet.contains("val4"));
    Assert.assertTrue(childSet.contains("val5"));
    // Ensure end nodes have no children
    Assert.assertEquals(dag.getChildren(dagNode4).size(), 0);
    Assert.assertEquals(dag.getChildren(dagNode5).size(), 0);
}
Also used : DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 2 with DagNode

use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.

the class MultiHopFlowCompilerTest method testMulticastPath.

@Test(dependsOnMethods = "testCompileFlowSingleHop")
public void testMulticastPath() throws IOException, URISyntaxException {
    FlowSpec spec = createFlowSpec("flow/flow2.conf", "LocalFS-1", "HDFS-3,HDFS-4", false, false);
    Dag<JobExecutionPlan> jobDag = this.specCompiler.compileFlow(spec);
    Assert.assertEquals(jobDag.getNodes().size(), 4);
    Assert.assertEquals(jobDag.getEndNodes().size(), 2);
    Assert.assertEquals(jobDag.getStartNodes().size(), 2);
    // First hop must be from LocalFS to HDFS-1 and HDFS-2
    Set<String> jobNames = new HashSet<>();
    jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "LocalFS-1", "HDFS-1", "localToHdfs"));
    jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "LocalFS-1", "HDFS-2", "localToHdfs"));
    for (DagNode<JobExecutionPlan> dagNode : jobDag.getStartNodes()) {
        Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
        String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
        Assert.assertTrue(jobNames.stream().anyMatch(jobName::startsWith));
    }
    // Second hop must be from HDFS-1/HDFS-2 to HDFS-3/HDFS-4 respectively.
    jobNames = new HashSet<>();
    jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "HDFS-1", "HDFS-3", "hdfsToHdfs"));
    jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "HDFS-2", "HDFS-4", "hdfsToHdfs"));
    for (DagNode<JobExecutionPlan> dagNode : jobDag.getStartNodes()) {
        List<DagNode<JobExecutionPlan>> nextNodes = jobDag.getChildren(dagNode);
        Assert.assertEquals(nextNodes.size(), 1);
        Config jobConfig = nextNodes.get(0).getValue().getJobSpec().getConfig();
        String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
        Assert.assertTrue(jobNames.stream().anyMatch(jobName::startsWith));
    }
}
Also used : DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) AzkabanProjectConfig(org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig) Config(com.typesafe.config.Config) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 3 with DagNode

use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.

the class DagManagerTest method setUp.

@BeforeClass
public void setUp() throws Exception {
    FileUtils.deleteDirectory(new File(this.dagStateStoreDir));
    Config config = ConfigFactory.empty().withValue(FSDagStateStore.DAG_STATESTORE_DIR, ConfigValueFactory.fromAnyRef(this.dagStateStoreDir));
    this._dagStateStore = new FSDagStateStore(config, new HashMap<>());
    DagStateStore failedDagStateStore = new InMemoryDagStateStore();
    this._jobStatusRetriever = Mockito.mock(JobStatusRetriever.class);
    this.queue = new LinkedBlockingQueue<>();
    this.cancelQueue = new LinkedBlockingQueue<>();
    this.resumeQueue = new LinkedBlockingQueue<>();
    MetricContext metricContext = Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()), getClass());
    this._dagManagerThread = new DagManager.DagManagerThread(_jobStatusRetriever, _dagStateStore, failedDagStateStore, queue, cancelQueue, resumeQueue, true, 5, new HashMap<>(), new HashSet<>(), metricContext.contextAwareMeter("successMeter"), metricContext.contextAwareMeter("failedMeter"), START_SLA_DEFAULT);
    Field jobToDagField = DagManager.DagManagerThread.class.getDeclaredField("jobToDag");
    jobToDagField.setAccessible(true);
    this.jobToDag = (Map<DagNode<JobExecutionPlan>, Dag<JobExecutionPlan>>) jobToDagField.get(this._dagManagerThread);
    Field dagToJobsField = DagManager.DagManagerThread.class.getDeclaredField("dagToJobs");
    dagToJobsField.setAccessible(true);
    this.dagToJobs = (Map<String, LinkedList<DagNode<JobExecutionPlan>>>) dagToJobsField.get(this._dagManagerThread);
    Field dagsField = DagManager.DagManagerThread.class.getDeclaredField("dags");
    dagsField.setAccessible(true);
    this.dags = (Map<String, Dag<JobExecutionPlan>>) dagsField.get(this._dagManagerThread);
    Field failedDagIdsField = DagManager.DagManagerThread.class.getDeclaredField("failedDagIds");
    failedDagIdsField.setAccessible(true);
    this.failedDagIds = (Set<String>) failedDagIdsField.get(this._dagManagerThread);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Config(com.typesafe.config.Config) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) JobStatusRetriever(org.apache.gobblin.service.monitoring.JobStatusRetriever) LinkedList(java.util.LinkedList) Field(java.lang.reflect.Field) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) MetricContext(org.apache.gobblin.metrics.MetricContext) File(java.io.File) HashSet(java.util.HashSet) BeforeClass(org.testng.annotations.BeforeClass)

Example 4 with DagNode

use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.

the class MultiHopFlowCompilerTest method testCompileFlowWithRetention.

@Test(dependsOnMethods = "testCompileFlow")
public void testCompileFlowWithRetention() throws URISyntaxException, IOException {
    FlowSpec spec = createFlowSpec("flow/flow1.conf", "LocalFS-1", "ADLS-1", true, true);
    Dag<JobExecutionPlan> jobDag = this.specCompiler.compileFlow(spec);
    Assert.assertEquals(jobDag.getNodes().size(), 9);
    Assert.assertEquals(jobDag.getStartNodes().size(), 2);
    Assert.assertEquals(jobDag.getEndNodes().size(), 5);
    String flowGroup = "testFlowGroup";
    String flowName = "testFlowName";
    List<DagNode<JobExecutionPlan>> currentHopNodes = jobDag.getStartNodes();
    List<String> expectedJobNames = Lists.newArrayList("SnapshotRetention", "Distcp", "SnapshotRetention", "ConvertToJsonAndEncrypt", "SnapshotRetention", "Distcp", "SnapshotRetention", "DistcpToADL", "SnapshotRetention");
    List<String> sourceNodes = Lists.newArrayList("LocalFS-1", "LocalFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-3", "HDFS-3", "ADLS-1");
    List<String> destinationNodes = Lists.newArrayList("LocalFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-3", "HDFS-3", "ADLS-1", "ADLS-1");
    List<String> edgeNames = Lists.newArrayList("localRetention", "localToHdfs", "hdfsRetention", "hdfsConvertToJsonAndEncrypt", "hdfsRetention", "hdfsToHdfs", "hdfsRetention", "hdfsToAdl", "hdfsRemoteRetention");
    List<DagNode<JobExecutionPlan>> nextHopNodes = new ArrayList<>();
    for (int i = 0; i < 9; i += 2) {
        if (i < 8) {
            Assert.assertEquals(currentHopNodes.size(), 2);
        } else {
            Assert.assertEquals(currentHopNodes.size(), 1);
        }
        Set<String> jobNames = new HashSet<>();
        jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, expectedJobNames.get(i), sourceNodes.get(i), destinationNodes.get(i), edgeNames.get(i)));
        if (i < 8) {
            jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, expectedJobNames.get(i + 1), sourceNodes.get(i + 1), destinationNodes.get(i + 1), edgeNames.get(i + 1)));
        }
        for (DagNode<JobExecutionPlan> dagNode : currentHopNodes) {
            Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
            String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
            Assert.assertTrue(jobNames.stream().anyMatch(jobName::startsWith));
            log.warn(jobName);
            nextHopNodes.addAll(jobDag.getChildren(dagNode));
        }
        currentHopNodes = nextHopNodes;
        nextHopNodes = new ArrayList<>();
    }
    Assert.assertEquals(nextHopNodes.size(), 0);
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) AzkabanProjectConfig(org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 5 with DagNode

use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.

the class DagManagerUtils method getNext.

/**
 * Traverse the dag to determine the next set of nodes to be executed. It starts with the startNodes of the dag and
 * identifies each node yet to be executed and for which each of its parent nodes is in the {@link ExecutionStatus#COMPLETE}
 * state.
 */
static Set<DagNode<JobExecutionPlan>> getNext(Dag<JobExecutionPlan> dag) {
    Set<DagNode<JobExecutionPlan>> nextNodesToExecute = new HashSet<>();
    LinkedList<DagNode<JobExecutionPlan>> nodesToExpand = Lists.newLinkedList(dag.getStartNodes());
    FailureOption failureOption = getFailureOption(dag);
    while (!nodesToExpand.isEmpty()) {
        DagNode<JobExecutionPlan> node = nodesToExpand.poll();
        ExecutionStatus executionStatus = getExecutionStatus(node);
        boolean addFlag = true;
        if (executionStatus == ExecutionStatus.PENDING || executionStatus == ExecutionStatus.PENDING_RETRY || executionStatus == ExecutionStatus.PENDING_RESUME) {
            // Add a node to be executed next, only if all of its parent nodes are COMPLETE.
            List<DagNode<JobExecutionPlan>> parentNodes = dag.getParents(node);
            for (DagNode<JobExecutionPlan> parentNode : parentNodes) {
                if (getExecutionStatus(parentNode) != ExecutionStatus.COMPLETE) {
                    addFlag = false;
                    break;
                }
            }
            if (addFlag) {
                nextNodesToExecute.add(node);
            }
        } else if (executionStatus == ExecutionStatus.COMPLETE) {
            // Explore the children of COMPLETED node as next candidates for execution.
            nodesToExpand.addAll(dag.getChildren(node));
        } else if ((executionStatus == ExecutionStatus.FAILED) || (executionStatus == ExecutionStatus.CANCELLED)) {
            switch(failureOption) {
                case FINISH_RUNNING:
                    return new HashSet<>();
                case FINISH_ALL_POSSIBLE:
                default:
                    break;
            }
        }
    }
    return nextNodesToExecute;
}
Also used : FailureOption(org.apache.gobblin.service.modules.orchestration.DagManager.FailureOption) DagNode(org.apache.gobblin.service.modules.flowgraph.Dag.DagNode) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) ExecutionStatus(org.apache.gobblin.service.ExecutionStatus) HashSet(java.util.HashSet)

Aggregations

DagNode (org.apache.gobblin.service.modules.flowgraph.Dag.DagNode)6 HashSet (java.util.HashSet)5 JobExecutionPlan (org.apache.gobblin.service.modules.spec.JobExecutionPlan)5 Config (com.typesafe.config.Config)4 FlowSpec (org.apache.gobblin.runtime.api.FlowSpec)3 Test (org.testng.annotations.Test)3 ArrayList (java.util.ArrayList)2 Dag (org.apache.gobblin.service.modules.flowgraph.Dag)2 AzkabanProjectConfig (org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Joiner (com.google.common.base.Joiner)1 Optional (com.google.common.base.Optional)1 Maps (com.google.common.collect.Maps)1 Files (com.google.common.io.Files)1 ConfigValueFactory (com.typesafe.config.ConfigValueFactory)1 File (java.io.File)1 Field (java.lang.reflect.Field)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 HashMap (java.util.HashMap)1