Search in sources :

Example 1 with BaseFlowGraph

use of org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph in project incubator-gobblin by apache.

the class MultiHopFlowCompilerTest method setUp.

@BeforeClass
public void setUp() throws URISyntaxException, IOException, ReflectiveOperationException, FlowEdgeFactory.FlowEdgeCreationException {
    // Create a FlowGraph
    this.flowGraph = new BaseFlowGraph();
    // Add DataNodes to the graph from the node properties files
    URI dataNodesUri = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/datanodes").toURI();
    FileSystem fs = FileSystem.get(dataNodesUri, new Configuration());
    Path dataNodesPath = new Path(dataNodesUri);
    ConfigParseOptions options = ConfigParseOptions.defaults().setSyntax(ConfigSyntax.PROPERTIES).setAllowMissing(false);
    for (FileStatus fileStatus : fs.listStatus(dataNodesPath)) {
        try (InputStream is = fs.open(fileStatus.getPath())) {
            Config nodeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
            Class dataNodeClass = Class.forName(ConfigUtils.getString(nodeConfig, FlowGraphConfigurationKeys.DATA_NODE_CLASS, FlowGraphConfigurationKeys.DEFAULT_DATA_NODE_CLASS));
            DataNode dataNode = (DataNode) GobblinConstructorUtils.invokeLongestConstructor(dataNodeClass, nodeConfig);
            this.flowGraph.addDataNode(dataNode);
        }
    }
    URI specExecutorCatalogUri = this.getClass().getClassLoader().getResource("topologyspec_catalog").toURI();
    Map<URI, TopologySpec> topologySpecMap = buildTopologySpecMap(specExecutorCatalogUri);
    // Create a FSFlowTemplateCatalog instance
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    FSFlowTemplateCatalog flowCatalog = new FSFlowTemplateCatalog(templateCatalogCfg);
    // Add FlowEdges from the edge properties files
    URI flowEdgesURI = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/flowedges").toURI();
    fs = FileSystem.get(flowEdgesURI, new Configuration());
    Path flowEdgesPath = new Path(flowEdgesURI);
    for (FileStatus fileStatus : fs.listStatus(flowEdgesPath)) {
        log.warn(fileStatus.getPath().toString());
        try (InputStream is = fs.open(fileStatus.getPath())) {
            Config flowEdgeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
            Class flowEdgeFactoryClass = Class.forName(ConfigUtils.getString(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_FACTORY_CLASS, FlowGraphConfigurationKeys.DEFAULT_FLOW_EDGE_FACTORY_CLASS));
            FlowEdgeFactory flowEdgeFactory = (FlowEdgeFactory) GobblinConstructorUtils.invokeLongestConstructor(flowEdgeFactoryClass, config);
            List<String> specExecutorNames = ConfigUtils.getStringList(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_SPEC_EXECUTORS_KEY);
            List<SpecExecutor> specExecutors = new ArrayList<>();
            for (String specExecutorName : specExecutorNames) {
                specExecutors.add(topologySpecMap.get(new URI(specExecutorName)).getSpecExecutor());
            }
            FlowEdge edge = flowEdgeFactory.createFlowEdge(flowEdgeConfig, flowCatalog, specExecutors);
            this.flowGraph.addFlowEdge(edge);
        }
    }
    this.specCompiler = new MultiHopFlowCompiler(config, this.flowGraph);
}
Also used : Path(org.apache.hadoop.fs.Path) FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) FlowEdge(org.apache.gobblin.service.modules.flowgraph.FlowEdge) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) AzkabanProjectConfig(org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) BaseFlowGraph(org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph) Properties(java.util.Properties) URI(java.net.URI) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) FlowEdgeFactory(org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory) DataNode(org.apache.gobblin.service.modules.flowgraph.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) AbstractSpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.AbstractSpecExecutor) BeforeClass(org.testng.annotations.BeforeClass) AfterClass(org.testng.annotations.AfterClass) ConfigParseOptions(com.typesafe.config.ConfigParseOptions) BeforeClass(org.testng.annotations.BeforeClass)

Example 2 with BaseFlowGraph

use of org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph in project incubator-gobblin by apache.

the class GitFlowGraphMonitorTest method setUp.

@BeforeClass
public void setUp() throws Exception {
    cleanUpDir(TEST_DIR);
    // Create a bare repository
    RepositoryCache.FileKey fileKey = RepositoryCache.FileKey.exact(remoteDir, FS.DETECTED);
    this.remoteRepo = fileKey.open(false);
    this.remoteRepo.create(true);
    this.gitForPush = Git.cloneRepository().setURI(this.remoteRepo.getDirectory().getAbsolutePath()).setDirectory(cloneDir).call();
    // push an empty commit as a base for detecting changes
    this.gitForPush.commit().setMessage("First commit").call();
    this.gitForPush.push().setRemote("origin").setRefSpecs(this.masterRefSpec).call();
    URI topologyCatalogUri = this.getClass().getClassLoader().getResource("topologyspec_catalog").toURI();
    Map<URI, TopologySpec> topologySpecMap = MultiHopFlowCompilerTest.buildTopologySpecMap(topologyCatalogUri);
    this.config = ConfigBuilder.create().addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_REPO_URI, this.remoteRepo.getDirectory().getAbsolutePath()).addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_REPO_DIR, TEST_DIR + "/git-flowgraph").addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_POLLING_INTERVAL, 5).build();
    // Create a FSFlowTemplateCatalog instance
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    this.flowCatalog = Optional.of(new FSFlowTemplateCatalog(templateCatalogCfg));
    // Create a FlowGraph instance with defaults
    this.flowGraph = new BaseFlowGraph();
    this.gitFlowGraphMonitor = new GitFlowGraphMonitor(this.config, this.flowCatalog, this.flowGraph, topologySpecMap, new CountDownLatch(1));
    this.gitFlowGraphMonitor.setActive(true);
}
Also used : FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) Config(com.typesafe.config.Config) RepositoryCache(org.eclipse.jgit.lib.RepositoryCache) BaseFlowGraph(org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph) Properties(java.util.Properties) CountDownLatch(java.util.concurrent.CountDownLatch) URI(java.net.URI) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

Config (com.typesafe.config.Config)2 URI (java.net.URI)2 Properties (java.util.Properties)2 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)2 BaseFlowGraph (org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph)2 FSFlowTemplateCatalog (org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog)2 BeforeClass (org.testng.annotations.BeforeClass)2 ConfigParseOptions (com.typesafe.config.ConfigParseOptions)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 ArrayList (java.util.ArrayList)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)1 AbstractSpecExecutor (org.apache.gobblin.runtime.spec_executorInstance.AbstractSpecExecutor)1 DataNode (org.apache.gobblin.service.modules.flowgraph.DataNode)1 FlowEdge (org.apache.gobblin.service.modules.flowgraph.FlowEdge)1 FlowEdgeFactory (org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory)1 AzkabanProjectConfig (org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1