use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.
the class DagTest method testInitialize.
@Test
public void testInitialize() {
DagNode<String> dagNode1 = new DagNode<>("val1");
DagNode<String> dagNode2 = new DagNode<>("val2");
DagNode<String> dagNode3 = new DagNode<>("val3");
DagNode<String> dagNode4 = new DagNode<>("val4");
DagNode<String> dagNode5 = new DagNode<>("val5");
dagNode2.addParentNode(dagNode1);
dagNode3.addParentNode(dagNode1);
dagNode4.addParentNode(dagNode2);
dagNode4.addParentNode(dagNode3);
dagNode5.addParentNode(dagNode3);
List<DagNode<String>> dagNodeList = Lists.newArrayList(dagNode1, dagNode2, dagNode3, dagNode4, dagNode5);
Dag<String> dag = new Dag<>(dagNodeList);
// Test startNodes and endNodes
Assert.assertEquals(dag.getStartNodes().size(), 1);
Assert.assertEquals(dag.getStartNodes().get(0).getValue(), "val1");
Assert.assertEquals(dag.getEndNodes().size(), 2);
Assert.assertEquals(dag.getEndNodes().get(0).getValue(), "val4");
Assert.assertEquals(dag.getEndNodes().get(1).getValue(), "val5");
DagNode startNode = dag.getStartNodes().get(0);
Assert.assertEquals(dag.getChildren(startNode).size(), 2);
Set<String> childSet = new HashSet<>();
for (DagNode<String> node : dag.getChildren(startNode)) {
childSet.add(node.getValue());
}
Assert.assertTrue(childSet.contains("val2"));
Assert.assertTrue(childSet.contains("val3"));
dagNode2 = dag.getChildren(startNode).get(0);
dagNode3 = dag.getChildren(startNode).get(1);
Assert.assertEquals(dag.getChildren(dagNode2).size(), 1);
Assert.assertEquals(dag.getChildren(dagNode2).get(0).getValue(), "val4");
for (DagNode<String> node : dag.getChildren(dagNode3)) {
childSet.add(node.getValue());
}
Assert.assertTrue(childSet.contains("val4"));
Assert.assertTrue(childSet.contains("val5"));
// Ensure end nodes have no children
Assert.assertEquals(dag.getChildren(dagNode4).size(), 0);
Assert.assertEquals(dag.getChildren(dagNode5).size(), 0);
}
use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.
the class MultiHopFlowCompilerTest method testMulticastPath.
@Test(dependsOnMethods = "testCompileFlowSingleHop")
public void testMulticastPath() throws IOException, URISyntaxException {
FlowSpec spec = createFlowSpec("flow/flow2.conf", "LocalFS-1", "HDFS-3,HDFS-4", false, false);
Dag<JobExecutionPlan> jobDag = this.specCompiler.compileFlow(spec);
Assert.assertEquals(jobDag.getNodes().size(), 4);
Assert.assertEquals(jobDag.getEndNodes().size(), 2);
Assert.assertEquals(jobDag.getStartNodes().size(), 2);
// First hop must be from LocalFS to HDFS-1 and HDFS-2
Set<String> jobNames = new HashSet<>();
jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "LocalFS-1", "HDFS-1", "localToHdfs"));
jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "LocalFS-1", "HDFS-2", "localToHdfs"));
for (DagNode<JobExecutionPlan> dagNode : jobDag.getStartNodes()) {
Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobNames.stream().anyMatch(jobName::startsWith));
}
// Second hop must be from HDFS-1/HDFS-2 to HDFS-3/HDFS-4 respectively.
jobNames = new HashSet<>();
jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "HDFS-1", "HDFS-3", "hdfsToHdfs"));
jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "HDFS-2", "HDFS-4", "hdfsToHdfs"));
for (DagNode<JobExecutionPlan> dagNode : jobDag.getStartNodes()) {
List<DagNode<JobExecutionPlan>> nextNodes = jobDag.getChildren(dagNode);
Assert.assertEquals(nextNodes.size(), 1);
Config jobConfig = nextNodes.get(0).getValue().getJobSpec().getConfig();
String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobNames.stream().anyMatch(jobName::startsWith));
}
}
use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.
the class DagManagerTest method setUp.
@BeforeClass
public void setUp() throws Exception {
FileUtils.deleteDirectory(new File(this.dagStateStoreDir));
Config config = ConfigFactory.empty().withValue(FSDagStateStore.DAG_STATESTORE_DIR, ConfigValueFactory.fromAnyRef(this.dagStateStoreDir));
this._dagStateStore = new FSDagStateStore(config, new HashMap<>());
DagStateStore failedDagStateStore = new InMemoryDagStateStore();
this._jobStatusRetriever = Mockito.mock(JobStatusRetriever.class);
this.queue = new LinkedBlockingQueue<>();
this.cancelQueue = new LinkedBlockingQueue<>();
this.resumeQueue = new LinkedBlockingQueue<>();
MetricContext metricContext = Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()), getClass());
this._dagManagerThread = new DagManager.DagManagerThread(_jobStatusRetriever, _dagStateStore, failedDagStateStore, queue, cancelQueue, resumeQueue, true, 5, new HashMap<>(), new HashSet<>(), metricContext.contextAwareMeter("successMeter"), metricContext.contextAwareMeter("failedMeter"), START_SLA_DEFAULT);
Field jobToDagField = DagManager.DagManagerThread.class.getDeclaredField("jobToDag");
jobToDagField.setAccessible(true);
this.jobToDag = (Map<DagNode<JobExecutionPlan>, Dag<JobExecutionPlan>>) jobToDagField.get(this._dagManagerThread);
Field dagToJobsField = DagManager.DagManagerThread.class.getDeclaredField("dagToJobs");
dagToJobsField.setAccessible(true);
this.dagToJobs = (Map<String, LinkedList<DagNode<JobExecutionPlan>>>) dagToJobsField.get(this._dagManagerThread);
Field dagsField = DagManager.DagManagerThread.class.getDeclaredField("dags");
dagsField.setAccessible(true);
this.dags = (Map<String, Dag<JobExecutionPlan>>) dagsField.get(this._dagManagerThread);
Field failedDagIdsField = DagManager.DagManagerThread.class.getDeclaredField("failedDagIds");
failedDagIdsField.setAccessible(true);
this.failedDagIds = (Set<String>) failedDagIdsField.get(this._dagManagerThread);
}
use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.
the class MultiHopFlowCompilerTest method testCompileFlowWithRetention.
@Test(dependsOnMethods = "testCompileFlow")
public void testCompileFlowWithRetention() throws URISyntaxException, IOException {
FlowSpec spec = createFlowSpec("flow/flow1.conf", "LocalFS-1", "ADLS-1", true, true);
Dag<JobExecutionPlan> jobDag = this.specCompiler.compileFlow(spec);
Assert.assertEquals(jobDag.getNodes().size(), 9);
Assert.assertEquals(jobDag.getStartNodes().size(), 2);
Assert.assertEquals(jobDag.getEndNodes().size(), 5);
String flowGroup = "testFlowGroup";
String flowName = "testFlowName";
List<DagNode<JobExecutionPlan>> currentHopNodes = jobDag.getStartNodes();
List<String> expectedJobNames = Lists.newArrayList("SnapshotRetention", "Distcp", "SnapshotRetention", "ConvertToJsonAndEncrypt", "SnapshotRetention", "Distcp", "SnapshotRetention", "DistcpToADL", "SnapshotRetention");
List<String> sourceNodes = Lists.newArrayList("LocalFS-1", "LocalFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-3", "HDFS-3", "ADLS-1");
List<String> destinationNodes = Lists.newArrayList("LocalFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-1", "HDFS-3", "HDFS-3", "ADLS-1", "ADLS-1");
List<String> edgeNames = Lists.newArrayList("localRetention", "localToHdfs", "hdfsRetention", "hdfsConvertToJsonAndEncrypt", "hdfsRetention", "hdfsToHdfs", "hdfsRetention", "hdfsToAdl", "hdfsRemoteRetention");
List<DagNode<JobExecutionPlan>> nextHopNodes = new ArrayList<>();
for (int i = 0; i < 9; i += 2) {
if (i < 8) {
Assert.assertEquals(currentHopNodes.size(), 2);
} else {
Assert.assertEquals(currentHopNodes.size(), 1);
}
Set<String> jobNames = new HashSet<>();
jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, expectedJobNames.get(i), sourceNodes.get(i), destinationNodes.get(i), edgeNames.get(i)));
if (i < 8) {
jobNames.add(Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, expectedJobNames.get(i + 1), sourceNodes.get(i + 1), destinationNodes.get(i + 1), edgeNames.get(i + 1)));
}
for (DagNode<JobExecutionPlan> dagNode : currentHopNodes) {
Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobNames.stream().anyMatch(jobName::startsWith));
log.warn(jobName);
nextHopNodes.addAll(jobDag.getChildren(dagNode));
}
currentHopNodes = nextHopNodes;
nextHopNodes = new ArrayList<>();
}
Assert.assertEquals(nextHopNodes.size(), 0);
}
use of org.apache.gobblin.service.modules.flowgraph.Dag.DagNode in project incubator-gobblin by apache.
the class DagManagerUtils method getNext.
/**
* Traverse the dag to determine the next set of nodes to be executed. It starts with the startNodes of the dag and
* identifies each node yet to be executed and for which each of its parent nodes is in the {@link ExecutionStatus#COMPLETE}
* state.
*/
static Set<DagNode<JobExecutionPlan>> getNext(Dag<JobExecutionPlan> dag) {
Set<DagNode<JobExecutionPlan>> nextNodesToExecute = new HashSet<>();
LinkedList<DagNode<JobExecutionPlan>> nodesToExpand = Lists.newLinkedList(dag.getStartNodes());
FailureOption failureOption = getFailureOption(dag);
while (!nodesToExpand.isEmpty()) {
DagNode<JobExecutionPlan> node = nodesToExpand.poll();
ExecutionStatus executionStatus = getExecutionStatus(node);
boolean addFlag = true;
if (executionStatus == ExecutionStatus.PENDING || executionStatus == ExecutionStatus.PENDING_RETRY || executionStatus == ExecutionStatus.PENDING_RESUME) {
// Add a node to be executed next, only if all of its parent nodes are COMPLETE.
List<DagNode<JobExecutionPlan>> parentNodes = dag.getParents(node);
for (DagNode<JobExecutionPlan> parentNode : parentNodes) {
if (getExecutionStatus(parentNode) != ExecutionStatus.COMPLETE) {
addFlag = false;
break;
}
}
if (addFlag) {
nextNodesToExecute.add(node);
}
} else if (executionStatus == ExecutionStatus.COMPLETE) {
// Explore the children of COMPLETED node as next candidates for execution.
nodesToExpand.addAll(dag.getChildren(node));
} else if ((executionStatus == ExecutionStatus.FAILED) || (executionStatus == ExecutionStatus.CANCELLED)) {
switch(failureOption) {
case FINISH_RUNNING:
return new HashSet<>();
case FINISH_ALL_POSSIBLE:
default:
break;
}
}
}
return nextNodesToExecute;
}
Aggregations