Search in sources :

Example 1 with FlowEdgeFactory

use of org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory in project incubator-gobblin by apache.

the class GitFlowGraphMonitor method addFlowEdge.

/**
 * Add a {@link FlowEdge} to the {@link FlowGraph}. The method uses the {@link FlowEdgeFactory} instance
 * provided by the {@link FlowGraph} to build a {@link FlowEdge} from the edge config file.
 * @param change
 */
private void addFlowEdge(DiffEntry change) {
    if (checkFilePath(change.getNewPath(), EDGE_FILE_DEPTH)) {
        Path edgeFilePath = new Path(this.repositoryDir, change.getNewPath());
        try {
            Config edgeConfig = loadEdgeFileWithOverrides(edgeFilePath);
            List<SpecExecutor> specExecutors = getSpecExecutors(edgeConfig);
            Class flowEdgeFactoryClass = Class.forName(ConfigUtils.getString(edgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_FACTORY_CLASS, FlowGraphConfigurationKeys.DEFAULT_FLOW_EDGE_FACTORY_CLASS));
            FlowEdgeFactory flowEdgeFactory = (FlowEdgeFactory) GobblinConstructorUtils.invokeLongestConstructor(flowEdgeFactoryClass, edgeConfig);
            if (flowTemplateCatalog.isPresent()) {
                FlowEdge edge = flowEdgeFactory.createFlowEdge(edgeConfig, flowTemplateCatalog.get(), specExecutors);
                if (!this.flowGraph.addFlowEdge(edge)) {
                    log.warn("Could not add edge {} to FlowGraph; skipping", edge.getId());
                } else {
                    log.info("Added edge {} to FlowGraph", edge.getId());
                }
            } else {
                log.warn("Could not add edge defined in {} to FlowGraph as FlowTemplateCatalog is absent", change.getNewPath());
            }
        } catch (Exception e) {
            log.warn("Could not add edge defined in {} due to exception {}", change.getNewPath(), e.getMessage());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FlowEdge(org.apache.gobblin.service.modules.flowgraph.FlowEdge) FlowEdgeFactory(org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory) Config(com.typesafe.config.Config) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) URISyntaxException(java.net.URISyntaxException) GitAPIException(org.eclipse.jgit.api.errors.GitAPIException) IOException(java.io.IOException)

Example 2 with FlowEdgeFactory

use of org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory in project incubator-gobblin by apache.

the class MultiHopFlowCompilerTest method setUp.

@BeforeClass
public void setUp() throws URISyntaxException, IOException, ReflectiveOperationException, FlowEdgeFactory.FlowEdgeCreationException {
    // Create a FlowGraph
    this.flowGraph = new BaseFlowGraph();
    // Add DataNodes to the graph from the node properties files
    URI dataNodesUri = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/datanodes").toURI();
    FileSystem fs = FileSystem.get(dataNodesUri, new Configuration());
    Path dataNodesPath = new Path(dataNodesUri);
    ConfigParseOptions options = ConfigParseOptions.defaults().setSyntax(ConfigSyntax.PROPERTIES).setAllowMissing(false);
    for (FileStatus fileStatus : fs.listStatus(dataNodesPath)) {
        try (InputStream is = fs.open(fileStatus.getPath())) {
            Config nodeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
            Class dataNodeClass = Class.forName(ConfigUtils.getString(nodeConfig, FlowGraphConfigurationKeys.DATA_NODE_CLASS, FlowGraphConfigurationKeys.DEFAULT_DATA_NODE_CLASS));
            DataNode dataNode = (DataNode) GobblinConstructorUtils.invokeLongestConstructor(dataNodeClass, nodeConfig);
            this.flowGraph.addDataNode(dataNode);
        }
    }
    URI specExecutorCatalogUri = this.getClass().getClassLoader().getResource("topologyspec_catalog").toURI();
    Map<URI, TopologySpec> topologySpecMap = buildTopologySpecMap(specExecutorCatalogUri);
    // Create a FSFlowTemplateCatalog instance
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    FSFlowTemplateCatalog flowCatalog = new FSFlowTemplateCatalog(templateCatalogCfg);
    // Add FlowEdges from the edge properties files
    URI flowEdgesURI = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/flowedges").toURI();
    fs = FileSystem.get(flowEdgesURI, new Configuration());
    Path flowEdgesPath = new Path(flowEdgesURI);
    for (FileStatus fileStatus : fs.listStatus(flowEdgesPath)) {
        log.warn(fileStatus.getPath().toString());
        try (InputStream is = fs.open(fileStatus.getPath())) {
            Config flowEdgeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
            Class flowEdgeFactoryClass = Class.forName(ConfigUtils.getString(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_FACTORY_CLASS, FlowGraphConfigurationKeys.DEFAULT_FLOW_EDGE_FACTORY_CLASS));
            FlowEdgeFactory flowEdgeFactory = (FlowEdgeFactory) GobblinConstructorUtils.invokeLongestConstructor(flowEdgeFactoryClass, config);
            List<String> specExecutorNames = ConfigUtils.getStringList(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_SPEC_EXECUTORS_KEY);
            List<SpecExecutor> specExecutors = new ArrayList<>();
            for (String specExecutorName : specExecutorNames) {
                specExecutors.add(topologySpecMap.get(new URI(specExecutorName)).getSpecExecutor());
            }
            FlowEdge edge = flowEdgeFactory.createFlowEdge(flowEdgeConfig, flowCatalog, specExecutors);
            this.flowGraph.addFlowEdge(edge);
        }
    }
    this.specCompiler = new MultiHopFlowCompiler(config, this.flowGraph);
}
Also used : Path(org.apache.hadoop.fs.Path) FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) FlowEdge(org.apache.gobblin.service.modules.flowgraph.FlowEdge) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) AzkabanProjectConfig(org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) BaseFlowGraph(org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph) Properties(java.util.Properties) URI(java.net.URI) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) FlowEdgeFactory(org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory) DataNode(org.apache.gobblin.service.modules.flowgraph.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) AbstractSpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.AbstractSpecExecutor) BeforeClass(org.testng.annotations.BeforeClass) AfterClass(org.testng.annotations.AfterClass) ConfigParseOptions(com.typesafe.config.ConfigParseOptions) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

Config (com.typesafe.config.Config)2 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)2 FlowEdge (org.apache.gobblin.service.modules.flowgraph.FlowEdge)2 FlowEdgeFactory (org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory)2 Path (org.apache.hadoop.fs.Path)2 ConfigParseOptions (com.typesafe.config.ConfigParseOptions)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 Properties (java.util.Properties)1 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)1 AbstractSpecExecutor (org.apache.gobblin.runtime.spec_executorInstance.AbstractSpecExecutor)1 BaseFlowGraph (org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph)1 DataNode (org.apache.gobblin.service.modules.flowgraph.DataNode)1 AzkabanProjectConfig (org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig)1 FSFlowTemplateCatalog (org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog)1 Configuration (org.apache.hadoop.conf.Configuration)1