use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class BFSPathFinder method findPathUnicast.
/**
* A simple path finding algorithm based on Breadth-First Search. At every step the algorithm adds the adjacent {@link FlowEdge}s
* to a queue. The {@link FlowEdge}s whose output {@link DatasetDescriptor} matches the destDatasetDescriptor are
* added first to the queue. This ensures that dataset transformations are always performed closest to the source.
* @return a path of {@link FlowEdgeContext}s starting at the srcNode and ending at the destNode.
*/
public List<FlowEdgeContext> findPathUnicast(DataNode destNode) {
// Initialization of auxiliary data structures used for path computation
this.pathMap = new HashMap<>();
// Base condition 1: Source Node or Dest Node is inactive; return null
if (!srcNode.isActive() || !destNode.isActive()) {
log.warn("Either source node {} or destination node {} is inactive; skipping path computation.", this.srcNode.getId(), destNode.getId());
return null;
}
// Base condition 2: Check if we are already at the target. If so, return an empty path.
if ((srcNode.equals(destNode)) && destDatasetDescriptor.contains(srcDatasetDescriptor)) {
return new ArrayList<>(0);
}
LinkedList<FlowEdgeContext> edgeQueue = new LinkedList<>(getNextEdges(srcNode, srcDatasetDescriptor, destDatasetDescriptor));
for (FlowEdgeContext flowEdgeContext : edgeQueue) {
this.pathMap.put(flowEdgeContext, flowEdgeContext);
}
// If the edge E' satisfies 1 and 2, add it to the edge queue for further consideration.
while (!edgeQueue.isEmpty()) {
FlowEdgeContext flowEdgeContext = edgeQueue.pop();
DataNode currentNode = this.flowGraph.getNode(flowEdgeContext.getEdge().getDest());
DatasetDescriptor currentOutputDatasetDescriptor = flowEdgeContext.getOutputDatasetDescriptor();
// Are we done?
if (isPathFound(currentNode, destNode, currentOutputDatasetDescriptor, destDatasetDescriptor)) {
return constructPath(flowEdgeContext);
}
// Expand the currentNode to its adjacent edges and add them to the queue.
List<FlowEdgeContext> nextEdges = getNextEdges(currentNode, currentOutputDatasetDescriptor, destDatasetDescriptor);
for (FlowEdgeContext childFlowEdgeContext : nextEdges) {
// queue.
if (!this.pathMap.containsKey(childFlowEdgeContext)) {
edgeQueue.add(childFlowEdgeContext);
this.pathMap.put(childFlowEdgeContext, flowEdgeContext);
}
}
}
// No path found. Return null.
return null;
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class MultiHopFlowCompilerTest method testGitFlowGraphMonitorService.
@Test(dependsOnMethods = "testMissingDestinationNodeError")
public void testGitFlowGraphMonitorService() throws IOException, GitAPIException, URISyntaxException, InterruptedException {
File remoteDir = new File(TESTDIR + "/remote");
File cloneDir = new File(TESTDIR + "/clone");
File flowGraphDir = new File(cloneDir, "/gobblin-flowgraph");
// Clean up
cleanUpDir(TESTDIR);
// Create a bare repository
RepositoryCache.FileKey fileKey = RepositoryCache.FileKey.exact(remoteDir, FS.DETECTED);
Repository remoteRepo = fileKey.open(false);
remoteRepo.create(true);
Git gitForPush = Git.cloneRepository().setURI(remoteRepo.getDirectory().getAbsolutePath()).setDirectory(cloneDir).call();
// push an empty commit as a base for detecting changes
gitForPush.commit().setMessage("First commit").call();
RefSpec masterRefSpec = new RefSpec("master");
gitForPush.push().setRemote("origin").setRefSpecs(masterRefSpec).call();
URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
Config config = ConfigBuilder.create().addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_REPO_URI, remoteRepo.getDirectory().getAbsolutePath()).addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_REPO_DIR, TESTDIR + "/git-flowgraph").addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_POLLING_INTERVAL, 5).addPrimitive(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString()).build();
// Create a MultiHopFlowCompiler instance
specCompiler = new MultiHopFlowCompiler(config, Optional.absent(), false);
specCompiler.setActive(true);
// Ensure node1 is not present in the graph
Assert.assertNull(specCompiler.getFlowGraph().getNode("node1"));
// push a new node file
File nodeDir = new File(flowGraphDir, "node1");
File nodeFile = new File(nodeDir, "node1.properties");
nodeDir.mkdirs();
nodeFile.createNewFile();
Files.write(FlowGraphConfigurationKeys.DATA_NODE_IS_ACTIVE_KEY + "=true\nparam1=val1" + "\n", nodeFile, Charsets.UTF_8);
// add, commit, push node
gitForPush.add().addFilepattern(formNodeFilePath(flowGraphDir, nodeDir.getName(), nodeFile.getName())).call();
gitForPush.commit().setMessage("Node commit").call();
gitForPush.push().setRemote("origin").setRefSpecs(masterRefSpec).call();
// polling is every 5 seconds, so wait twice as long and check
TimeUnit.SECONDS.sleep(10);
// Test that a DataNode is added to FlowGraph
DataNode dataNode = specCompiler.getFlowGraph().getNode("node1");
Assert.assertEquals(dataNode.getId(), "node1");
Assert.assertEquals(dataNode.getRawConfig().getString("param1"), "val1");
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class GitFlowGraphMonitorTest method testAddNode.
@Test
public void testAddNode() throws IOException, GitAPIException {
String file1Contents = FlowGraphConfigurationKeys.DATA_NODE_IS_ACTIVE_KEY + "=true\nparam1=value1\n";
String file2Contents = FlowGraphConfigurationKeys.DATA_NODE_IS_ACTIVE_KEY + "=true\nparam2=value2\n";
addNode(this.node1Dir, this.node1File, file1Contents);
addNode(this.node2Dir, this.node2File, file2Contents);
this.gitFlowGraphMonitor.processGitConfigChanges();
for (int i = 0; i < 1; i++) {
String nodeId = "node" + (i + 1);
String paramKey = "param" + (i + 1);
String paramValue = "value" + (i + 1);
// Check if nodes have been added to the FlowGraph
DataNode dataNode = this.flowGraph.getNode(nodeId);
Assert.assertEquals(dataNode.getId(), nodeId);
Assert.assertTrue(dataNode.isActive());
Assert.assertEquals(dataNode.getRawConfig().getString(paramKey), paramValue);
}
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class GitFlowGraphMonitorTest method testChangesReorder.
@Test(dependsOnMethods = "testRemoveNode")
public void testChangesReorder() throws GitAPIException, IOException, ExecutionException, InterruptedException {
String node1FileContents = FlowGraphConfigurationKeys.DATA_NODE_IS_ACTIVE_KEY + "=true\nparam1=value1\n";
String node2FileContents = FlowGraphConfigurationKeys.DATA_NODE_IS_ACTIVE_KEY + "=true\nparam2=value2\n";
String edgeFileContents = buildEdgeFileContents("node1", "node2", "edge1", "value1");
createNewFile(this.node1Dir, this.node1File, node1FileContents);
createNewFile(this.node2Dir, this.node2File, node2FileContents);
createNewFile(this.edge1Dir, this.edge1File, edgeFileContents);
// add, commit, push
this.gitForPush.add().addFilepattern(formNodeFilePath(this.node1Dir.getName(), this.node1File.getName())).call();
this.gitForPush.add().addFilepattern(formNodeFilePath(this.node2Dir.getName(), this.node2File.getName())).call();
this.gitForPush.commit().setMessage("Add nodes commit").call();
this.gitForPush.push().setRemote("origin").setRefSpecs(this.masterRefSpec).call();
this.gitForPush.add().addFilepattern(formEdgeFilePath(this.edge1Dir.getParentFile().getName(), this.edge1Dir.getName(), this.edge1File.getName())).call();
this.gitForPush.commit().setMessage("Add nodes and edges commit").call();
this.gitForPush.push().setRemote("origin").setRefSpecs(this.masterRefSpec).call();
this.gitFlowGraphMonitor.processGitConfigChanges();
// Ensure node1 and node2 are present in the graph
DataNode node1 = this.flowGraph.getNode("node1");
Assert.assertNotNull(node1);
DataNode node2 = this.flowGraph.getNode("node2");
Assert.assertNotNull(node2);
testIfEdgeSuccessfullyAdded("node1", "node2", "edge1", "value1");
// Delete node1, edge node1->node2 files
node1File.delete();
edge1File.delete();
// Commit1: delete node1 and edge node1->node2
this.gitForPush.rm().addFilepattern(formNodeFilePath(this.node1Dir.getName(), this.node1File.getName())).call();
this.gitForPush.rm().addFilepattern(formEdgeFilePath(this.edge1Dir.getParentFile().getName(), this.edge1Dir.getName(), this.edge1File.getName())).call();
this.gitForPush.commit().setMessage("Delete node1 and edge1 commit").call();
this.gitForPush.push().setRemote("origin").setRefSpecs(this.masterRefSpec).call();
// Commit2: add node1 back
createNewFile(this.node1Dir, this.node1File, node1FileContents);
this.gitForPush.add().addFilepattern(formNodeFilePath(this.node1Dir.getName(), this.node1File.getName())).call();
this.gitForPush.commit().setMessage("Add node1 commit").call();
this.gitForPush.push().setRemote("origin").setRefSpecs(this.masterRefSpec).call();
this.gitFlowGraphMonitor.processGitConfigChanges();
node1 = this.flowGraph.getNode("node1");
Assert.assertNotNull(node1);
Assert.assertEquals(this.flowGraph.getEdges(node1).size(), 0);
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class GitFlowGraphMonitorTest method testUpdateNode.
@Test(dependsOnMethods = "testUpdateEdge")
public void testUpdateNode() throws IOException, GitAPIException, URISyntaxException, ExecutionException, InterruptedException {
// Update param1 value in node1 and check if updated node is added to the graph
String fileContents = FlowGraphConfigurationKeys.DATA_NODE_IS_ACTIVE_KEY + "=true\nparam1=value3\n";
addNode(this.node1Dir, this.node1File, fileContents);
this.gitFlowGraphMonitor.processGitConfigChanges();
// Check if node has been updated in the FlowGraph
DataNode dataNode = this.flowGraph.getNode("node1");
Assert.assertEquals(dataNode.getId(), "node1");
Assert.assertTrue(dataNode.isActive());
Assert.assertEquals(dataNode.getRawConfig().getString("param1"), "value3");
}
Aggregations