Search in sources :

Example 1 with FSFlowTemplateCatalog

use of org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog in project incubator-gobblin by apache.

the class MultiHopFlowCompilerTest method setUp.

@BeforeClass
public void setUp() throws URISyntaxException, IOException, ReflectiveOperationException, FlowEdgeFactory.FlowEdgeCreationException {
    // Create a FlowGraph
    this.flowGraph = new BaseFlowGraph();
    // Add DataNodes to the graph from the node properties files
    URI dataNodesUri = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/datanodes").toURI();
    FileSystem fs = FileSystem.get(dataNodesUri, new Configuration());
    Path dataNodesPath = new Path(dataNodesUri);
    ConfigParseOptions options = ConfigParseOptions.defaults().setSyntax(ConfigSyntax.PROPERTIES).setAllowMissing(false);
    for (FileStatus fileStatus : fs.listStatus(dataNodesPath)) {
        try (InputStream is = fs.open(fileStatus.getPath())) {
            Config nodeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
            Class dataNodeClass = Class.forName(ConfigUtils.getString(nodeConfig, FlowGraphConfigurationKeys.DATA_NODE_CLASS, FlowGraphConfigurationKeys.DEFAULT_DATA_NODE_CLASS));
            DataNode dataNode = (DataNode) GobblinConstructorUtils.invokeLongestConstructor(dataNodeClass, nodeConfig);
            this.flowGraph.addDataNode(dataNode);
        }
    }
    URI specExecutorCatalogUri = this.getClass().getClassLoader().getResource("topologyspec_catalog").toURI();
    Map<URI, TopologySpec> topologySpecMap = buildTopologySpecMap(specExecutorCatalogUri);
    // Create a FSFlowTemplateCatalog instance
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    FSFlowTemplateCatalog flowCatalog = new FSFlowTemplateCatalog(templateCatalogCfg);
    // Add FlowEdges from the edge properties files
    URI flowEdgesURI = MultiHopFlowCompilerTest.class.getClassLoader().getResource("flowgraph/flowedges").toURI();
    fs = FileSystem.get(flowEdgesURI, new Configuration());
    Path flowEdgesPath = new Path(flowEdgesURI);
    for (FileStatus fileStatus : fs.listStatus(flowEdgesPath)) {
        log.warn(fileStatus.getPath().toString());
        try (InputStream is = fs.open(fileStatus.getPath())) {
            Config flowEdgeConfig = ConfigFactory.parseReader(new InputStreamReader(is, Charsets.UTF_8), options);
            Class flowEdgeFactoryClass = Class.forName(ConfigUtils.getString(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_FACTORY_CLASS, FlowGraphConfigurationKeys.DEFAULT_FLOW_EDGE_FACTORY_CLASS));
            FlowEdgeFactory flowEdgeFactory = (FlowEdgeFactory) GobblinConstructorUtils.invokeLongestConstructor(flowEdgeFactoryClass, config);
            List<String> specExecutorNames = ConfigUtils.getStringList(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_SPEC_EXECUTORS_KEY);
            List<SpecExecutor> specExecutors = new ArrayList<>();
            for (String specExecutorName : specExecutorNames) {
                specExecutors.add(topologySpecMap.get(new URI(specExecutorName)).getSpecExecutor());
            }
            FlowEdge edge = flowEdgeFactory.createFlowEdge(flowEdgeConfig, flowCatalog, specExecutors);
            this.flowGraph.addFlowEdge(edge);
        }
    }
    this.specCompiler = new MultiHopFlowCompiler(config, this.flowGraph);
}
Also used : Path(org.apache.hadoop.fs.Path) FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) FlowEdge(org.apache.gobblin.service.modules.flowgraph.FlowEdge) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) AzkabanProjectConfig(org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) BaseFlowGraph(org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph) Properties(java.util.Properties) URI(java.net.URI) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) FlowEdgeFactory(org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory) DataNode(org.apache.gobblin.service.modules.flowgraph.DataNode) FileSystem(org.apache.hadoop.fs.FileSystem) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) AbstractSpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.AbstractSpecExecutor) BeforeClass(org.testng.annotations.BeforeClass) AfterClass(org.testng.annotations.AfterClass) ConfigParseOptions(com.typesafe.config.ConfigParseOptions) BeforeClass(org.testng.annotations.BeforeClass)

Example 2 with FSFlowTemplateCatalog

use of org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog in project incubator-gobblin by apache.

the class BaseFlowEdgeFactoryTest method testCreateFlowEdge.

@Test
public void testCreateFlowEdge() throws Exception {
    Properties properties = new Properties();
    properties.put(FlowGraphConfigurationKeys.FLOW_EDGE_SOURCE_KEY, "node1");
    properties.put(FlowGraphConfigurationKeys.FLOW_EDGE_DESTINATION_KEY, "node2");
    properties.put(FlowGraphConfigurationKeys.FLOW_EDGE_NAME_KEY, "edge1");
    properties.put(FlowGraphConfigurationKeys.FLOW_EDGE_ID_KEY, "node1:node2:edge1");
    properties.put(FlowGraphConfigurationKeys.FLOW_EDGE_TEMPLATE_DIR_URI_KEY, "FS:///flowEdgeTemplate");
    List<SpecExecutor> specExecutorList = new ArrayList<>();
    Config config1 = ConfigFactory.empty().withValue("specStore.fs.dir", ConfigValueFactory.fromAnyRef("/tmp1")).withValue("specExecInstance.capabilities", ConfigValueFactory.fromAnyRef("s1:d1"));
    specExecutorList.add(new InMemorySpecExecutor(config1));
    Config config2 = ConfigFactory.empty().withValue("specStore.fs.dir", ConfigValueFactory.fromAnyRef("/tmp2")).withValue("specExecInstance.capabilities", ConfigValueFactory.fromAnyRef("s2:d2"));
    specExecutorList.add(new InMemorySpecExecutor(config2));
    FlowEdgeFactory flowEdgeFactory = new BaseFlowEdge.Factory();
    Properties props = new Properties();
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    props.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(props);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    FSFlowTemplateCatalog catalog = new FSFlowTemplateCatalog(templateCatalogCfg);
    Config edgeProps = ConfigUtils.propertiesToConfig(properties);
    FlowEdge flowEdge = flowEdgeFactory.createFlowEdge(edgeProps, catalog, specExecutorList);
    Assert.assertEquals(flowEdge.getSrc(), "node1");
    Assert.assertEquals(flowEdge.getDest(), "node2");
    Assert.assertEquals(flowEdge.getExecutors().get(0).getConfig().get().getString("specStore.fs.dir"), "/tmp1");
    Assert.assertEquals(flowEdge.getExecutors().get(0).getConfig().get().getString("specExecInstance.capabilities"), "s1:d1");
    Assert.assertEquals(flowEdge.getExecutors().get(1).getConfig().get().getString("specStore.fs.dir"), "/tmp2");
    Assert.assertEquals(flowEdge.getExecutors().get(1).getConfig().get().getString("specExecInstance.capabilities"), "s2:d2");
    Assert.assertEquals(flowEdge.getExecutors().get(0).getClass().getSimpleName(), "InMemorySpecExecutor");
    Assert.assertEquals(flowEdge.getExecutors().get(1).getClass().getSimpleName(), "InMemorySpecExecutor");
}
Also used : FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) InMemorySpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor) ConfigValueFactory(com.typesafe.config.ConfigValueFactory) ConfigFactory(com.typesafe.config.ConfigFactory) Properties(java.util.Properties) InMemorySpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor) URI(java.net.URI) Test(org.testng.annotations.Test)

Example 3 with FSFlowTemplateCatalog

use of org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog in project incubator-gobblin by apache.

the class JobExecutionPlanDagFactoryTest method setUp.

@BeforeClass
public void setUp() throws URISyntaxException, IOException, SpecNotFoundException, JobTemplate.TemplateException {
    // Create a FSFlowTemplateCatalog instance
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    FSFlowTemplateCatalog catalog = new FSFlowTemplateCatalog(templateCatalogCfg);
    FlowTemplate flowTemplate = catalog.getFlowTemplate(new URI(TEST_TEMPLATE_URI));
    this.jobTemplates = flowTemplate.getJobTemplates();
    // Create a spec executor instance
    properties = new Properties();
    properties.put("specStore.fs.dir", "/tmp/testSpecStoreDir");
    properties.put("specExecInstance.capabilities", "source:destination");
    Config specExecutorConfig = ConfigUtils.propertiesToConfig(properties);
    this.specExecutor = new InMemorySpecExecutor(specExecutorConfig);
}
Also used : FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) Config(com.typesafe.config.Config) Properties(java.util.Properties) URI(java.net.URI) InMemorySpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor) BeforeClass(org.testng.annotations.BeforeClass)

Example 4 with FSFlowTemplateCatalog

use of org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog in project incubator-gobblin by apache.

the class GitFlowGraphMonitorTest method setUp.

@BeforeClass
public void setUp() throws Exception {
    cleanUpDir(TEST_DIR);
    // Create a bare repository
    RepositoryCache.FileKey fileKey = RepositoryCache.FileKey.exact(remoteDir, FS.DETECTED);
    this.remoteRepo = fileKey.open(false);
    this.remoteRepo.create(true);
    this.gitForPush = Git.cloneRepository().setURI(this.remoteRepo.getDirectory().getAbsolutePath()).setDirectory(cloneDir).call();
    // push an empty commit as a base for detecting changes
    this.gitForPush.commit().setMessage("First commit").call();
    this.gitForPush.push().setRemote("origin").setRefSpecs(this.masterRefSpec).call();
    URI topologyCatalogUri = this.getClass().getClassLoader().getResource("topologyspec_catalog").toURI();
    Map<URI, TopologySpec> topologySpecMap = MultiHopFlowCompilerTest.buildTopologySpecMap(topologyCatalogUri);
    this.config = ConfigBuilder.create().addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_REPO_URI, this.remoteRepo.getDirectory().getAbsolutePath()).addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_REPO_DIR, TEST_DIR + "/git-flowgraph").addPrimitive(GitFlowGraphMonitor.GIT_FLOWGRAPH_MONITOR_PREFIX + "." + ConfigurationKeys.GIT_MONITOR_POLLING_INTERVAL, 5).build();
    // Create a FSFlowTemplateCatalog instance
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    this.flowCatalog = Optional.of(new FSFlowTemplateCatalog(templateCatalogCfg));
    // Create a FlowGraph instance with defaults
    this.flowGraph = new BaseFlowGraph();
    this.gitFlowGraphMonitor = new GitFlowGraphMonitor(this.config, this.flowCatalog, this.flowGraph, topologySpecMap, new CountDownLatch(1));
    this.gitFlowGraphMonitor.setActive(true);
}
Also used : FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) Config(com.typesafe.config.Config) RepositoryCache(org.eclipse.jgit.lib.RepositoryCache) BaseFlowGraph(org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph) Properties(java.util.Properties) CountDownLatch(java.util.concurrent.CountDownLatch) URI(java.net.URI) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

Config (com.typesafe.config.Config)4 URI (java.net.URI)4 Properties (java.util.Properties)4 FSFlowTemplateCatalog (org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog)4 BeforeClass (org.testng.annotations.BeforeClass)3 ArrayList (java.util.ArrayList)2 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)2 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)2 InMemorySpecExecutor (org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor)2 BaseFlowGraph (org.apache.gobblin.service.modules.flowgraph.BaseFlowGraph)2 ConfigFactory (com.typesafe.config.ConfigFactory)1 ConfigParseOptions (com.typesafe.config.ConfigParseOptions)1 ConfigValueFactory (com.typesafe.config.ConfigValueFactory)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 AbstractSpecExecutor (org.apache.gobblin.runtime.spec_executorInstance.AbstractSpecExecutor)1 DataNode (org.apache.gobblin.service.modules.flowgraph.DataNode)1 FlowEdge (org.apache.gobblin.service.modules.flowgraph.FlowEdge)1 FlowEdgeFactory (org.apache.gobblin.service.modules.flowgraph.FlowEdgeFactory)1