use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class GitFlowGraphMonitor method addDataNode.
/**
* Add a {@link DataNode} to the {@link FlowGraph}. The method uses the {@link FlowGraphConfigurationKeys#DATA_NODE_CLASS} config
* to instantiate a {@link DataNode} from the node config file.
* @param change
*/
private void addDataNode(DiffEntry change) {
if (checkFilePath(change.getNewPath(), NODE_FILE_DEPTH)) {
Path nodeFilePath = new Path(this.repositoryDir, change.getNewPath());
try {
Config config = loadNodeFileWithOverrides(nodeFilePath);
Class dataNodeClass = Class.forName(ConfigUtils.getString(config, FlowGraphConfigurationKeys.DATA_NODE_CLASS, FlowGraphConfigurationKeys.DEFAULT_DATA_NODE_CLASS));
DataNode dataNode = (DataNode) GobblinConstructorUtils.invokeLongestConstructor(dataNodeClass, config);
if (!this.flowGraph.addDataNode(dataNode)) {
log.warn("Could not add DataNode {} to FlowGraph; skipping", dataNode.getId());
} else {
log.info("Added Datanode {} to FlowGraph", dataNode.getId());
}
} catch (Exception e) {
log.warn("Could not add DataNode defined in {} due to exception {}", change.getNewPath(), e);
}
}
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class MultiHopFlowCompilerTest method setUp.
@BeforeClass
public void setUp() throws URISyntaxException, IOException, ReflectiveOperationException, FlowEdgeFactory.FlowEdgeCreationException {
// Create a FlowGraph
this.flowGraph = new BaseFlowGraph();
// Add DataNodes to the graph from the node properties files
URI dataNodesUri = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/datanodes").toURI();
FileSystem fs = FileSystem.get(dataNodesUri, new Configuration());
Path dataNodesPath = new Path(dataNodesUri);
ConfigParseOptions options = ConfigParseOptions.defaults().setSyntax(ConfigSyntax.PROPERTIES).setAllowMissing(false);
for (FileStatus fileStatus : fs.listStatus(dataNodesPath)) {
try (InputStream is = fs.open(fileStatus.getPath())) {
Config nodeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
Class dataNodeClass = Class.forName(ConfigUtils.getString(nodeConfig, FlowGraphConfigurationKeys.DATA_NODE_CLASS, FlowGraphConfigurationKeys.DEFAULT_DATA_NODE_CLASS));
DataNode dataNode = (DataNode) GobblinConstructorUtils.invokeLongestConstructor(dataNodeClass, nodeConfig);
this.flowGraph.addDataNode(dataNode);
}
}
URI specExecutorCatalogUri = this.getClass().getClassLoader().getResource("topologyspec_catalog").toURI();
Map<URI, TopologySpec> topologySpecMap = buildTopologySpecMap(specExecutorCatalogUri);
// Create a FSFlowTemplateCatalog instance
URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
Properties properties = new Properties();
properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
Config config = ConfigFactory.parseProperties(properties);
Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
FSFlowTemplateCatalog flowCatalog = new FSFlowTemplateCatalog(templateCatalogCfg);
// Add FlowEdges from the edge properties files
URI flowEdgesURI = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/flowedges").toURI();
fs = FileSystem.get(flowEdgesURI, new Configuration());
Path flowEdgesPath = new Path(flowEdgesURI);
for (FileStatus fileStatus : fs.listStatus(flowEdgesPath)) {
log.warn(fileStatus.getPath().toString());
try (InputStream is = fs.open(fileStatus.getPath())) {
Config flowEdgeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
Class flowEdgeFactoryClass = Class.forName(ConfigUtils.getString(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_FACTORY_CLASS, FlowGraphConfigurationKeys.DEFAULT_FLOW_EDGE_FACTORY_CLASS));
FlowEdgeFactory flowEdgeFactory = (FlowEdgeFactory) GobblinConstructorUtils.invokeLongestConstructor(flowEdgeFactoryClass, config);
List<String> specExecutorNames = ConfigUtils.getStringList(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_SPEC_EXECUTORS_KEY);
List<SpecExecutor> specExecutors = new ArrayList<>();
for (String specExecutorName : specExecutorNames) {
specExecutors.add(topologySpecMap.get(new URI(specExecutorName)).getSpecExecutor());
}
FlowEdge edge = flowEdgeFactory.createFlowEdge(flowEdgeConfig, flowCatalog, specExecutors);
this.flowGraph.addFlowEdge(edge);
}
}
this.specCompiler = new MultiHopFlowCompiler(config, this.flowGraph);
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class GitFlowGraphMonitorTest method testRemoveNode.
@Test(dependsOnMethods = "testRemoveEdge")
public void testRemoveNode() throws GitAPIException, IOException {
// delete node files
node1File.delete();
node2File.delete();
// Ensure node1 and node2 are present in the graph before delete
DataNode node1 = this.flowGraph.getNode("node1");
Assert.assertNotNull(node1);
DataNode node2 = this.flowGraph.getNode("node2");
Assert.assertNotNull(node2);
// delete, commit, push
this.gitForPush.rm().addFilepattern(formNodeFilePath(this.node1Dir.getName(), this.node1File.getName())).call();
this.gitForPush.rm().addFilepattern(formNodeFilePath(this.node2Dir.getName(), this.node2File.getName())).call();
this.gitForPush.commit().setMessage("Node remove commit").call();
this.gitForPush.push().setRemote("origin").setRefSpecs(this.masterRefSpec).call();
this.gitFlowGraphMonitor.processGitConfigChanges();
// Check if node1 and node 2 have been deleted from the graph
node1 = this.flowGraph.getNode("node1");
Assert.assertNull(node1);
node2 = this.flowGraph.getNode("node2");
Assert.assertNull(node2);
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class AbstractPathFinder method findPath.
@Override
public FlowGraphPath findPath() throws PathFinderException {
FlowGraphPath flowGraphPath = new FlowGraphPath(flowSpec, flowExecutionId);
// flow graph.
for (DataNode destNode : this.destNodes) {
List<FlowEdgeContext> path = findPathUnicast(destNode);
if (path != null) {
log.info("Path to destination node {} found for flow {}. Path - {}", destNode.getId(), flowSpec.getUri(), path);
flowGraphPath.addPath(path);
} else {
log.error("Path to destination node {} could not be found for flow {}.", destNode.getId(), flowSpec.getUri());
// No path to at least one of the destination nodes.
return null;
}
}
return flowGraphPath;
}
use of org.apache.gobblin.service.modules.flowgraph.DataNode in project incubator-gobblin by apache.
the class MultiHopFlowCompiler method compileFlow.
/**
* j
* @param spec an instance of {@link FlowSpec}.
* @return A DAG of {@link JobExecutionPlan}s, which encapsulates the compiled {@link org.apache.gobblin.runtime.api.JobSpec}s
* together with the {@link SpecExecutor} where the job can be executed.
*/
@Override
public Dag<JobExecutionPlan> compileFlow(Spec spec) {
Preconditions.checkNotNull(spec);
Preconditions.checkArgument(spec instanceof FlowSpec, "MultiHopFlowCompiler only accepts FlowSpecs");
long startTime = System.nanoTime();
FlowSpec flowSpec = (FlowSpec) spec;
String source = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY, "");
String destination = ConfigUtils.getString(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY, "");
DataNode sourceNode = this.flowGraph.getNode(source);
if (sourceNode == null) {
flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", source));
return null;
}
List<String> destNodeIds = ConfigUtils.getStringList(flowSpec.getConfig(), ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY);
List<DataNode> destNodes = destNodeIds.stream().map(this.flowGraph::getNode).collect(Collectors.toList());
if (destNodes.contains(null)) {
flowSpec.addCompilationError(source, destination, String.format("Flowgraph does not have a node with id %s", destNodeIds.get(destNodes.indexOf(null))));
return null;
}
log.info(String.format("Compiling flow for source: %s and destination: %s", source, destination));
List<FlowSpec> flowSpecs = splitFlowSpec(flowSpec);
Dag<JobExecutionPlan> jobExecutionPlanDag = new Dag<>(new ArrayList<>());
try {
this.rwLock.readLock().lock();
for (FlowSpec datasetFlowSpec : flowSpecs) {
for (DataNode destNode : destNodes) {
long authStartTime = System.nanoTime();
try {
boolean authorized = this.dataMovementAuthorizer.isMovementAuthorized(flowSpec, sourceNode, destNode);
Instrumented.updateTimer(dataAuthorizationTimer, System.nanoTime() - authStartTime, TimeUnit.NANOSECONDS);
if (!authorized) {
String message = String.format("Data movement is not authorized for flow: %s, source: %s, destination: %s", flowSpec.getUri().toString(), source, destination);
log.error(message);
datasetFlowSpec.addCompilationError(source, destination, message);
return null;
}
} catch (Exception e) {
Instrumented.markMeter(flowCompilationFailedMeter);
datasetFlowSpec.addCompilationError(source, destination, Throwables.getStackTraceAsString(e));
return null;
}
}
// Compute the path from source to destination.
FlowGraphPath flowGraphPath = flowGraph.findPath(datasetFlowSpec);
if (flowGraphPath != null) {
// Convert the path into a Dag of JobExecutionPlans.
jobExecutionPlanDag = jobExecutionPlanDag.merge(flowGraphPath.asDag(this.config));
}
}
if (jobExecutionPlanDag.isEmpty()) {
Instrumented.markMeter(flowCompilationFailedMeter);
String message = String.format("No path found from source: %s and destination: %s", source, destination);
log.info(message);
if (!flowSpec.getCompilationErrors().stream().anyMatch(compilationError -> compilationError.errorPriority == 0)) {
flowSpec.addCompilationError(source, destination, message);
}
return null;
}
} catch (PathFinder.PathFinderException | SpecNotFoundException | JobTemplate.TemplateException | URISyntaxException | ReflectiveOperationException e) {
Instrumented.markMeter(flowCompilationFailedMeter);
String message = String.format("Exception encountered while compiling flow for source: %s and destination: %s, %s", source, destination, Throwables.getStackTraceAsString(e));
log.error(message, e);
flowSpec.addCompilationError(source, destination, message);
return null;
} finally {
this.rwLock.readLock().unlock();
}
Instrumented.markMeter(flowCompilationSuccessFulMeter);
Instrumented.updateTimer(flowCompilationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
return jobExecutionPlanDag;
}
Aggregations