Search in sources :

Example 1 with FlowTemplate

use of org.apache.gobblin.service.modules.template.FlowTemplate in project incubator-gobblin by apache.

the class BaseFlowGraphTest method setUp.

@BeforeClass
public void setUp() throws URISyntaxException, DataNode.DataNodeCreationException {
    Properties properties = new Properties();
    properties.put("key1", "val1");
    Config node1Config = ConfigUtils.propertiesToConfig(properties).withValue(FlowGraphConfigurationKeys.DATA_NODE_ID_KEY, ConfigValueFactory.fromAnyRef("node1"));
    node1 = new BaseDataNode(node1Config);
    properties = new Properties();
    properties.put("key2", "val2");
    Config node2Config = ConfigUtils.propertiesToConfig(properties).withValue(FlowGraphConfigurationKeys.DATA_NODE_ID_KEY, ConfigValueFactory.fromAnyRef("node2"));
    node2 = new BaseDataNode(node2Config);
    properties = new Properties();
    properties.put("key3", "val3");
    Config node3Config = ConfigUtils.propertiesToConfig(properties).withValue(FlowGraphConfigurationKeys.DATA_NODE_ID_KEY, ConfigValueFactory.fromAnyRef("node3"));
    node3 = new BaseDataNode(node3Config);
    // Create a clone of node3
    node3c = new BaseDataNode(node3Config);
    FlowTemplate flowTemplate1 = new StaticFlowTemplate(new URI("FS:///uri1"), "", "", ConfigFactory.empty(), null, null);
    FlowTemplate flowTemplate2 = new StaticFlowTemplate(new URI("FS:///uri2"), "", "", ConfigFactory.empty(), null, null);
    FlowTemplate flowTemplate3 = new StaticFlowTemplate(new URI("FS:///uri3"), "", "", ConfigFactory.empty(), null, null);
    // Create edge instances
    edgeId1 = "node1:node2:edge1";
    edgeId2 = "node2:node3:edge2";
    edgeId3 = "node3:node1:edge3";
    edge1 = new BaseFlowEdge(Lists.newArrayList("node1", "node2"), edgeId1, flowTemplate1, null, ConfigFactory.empty(), true);
    edge2 = new BaseFlowEdge(Lists.newArrayList("node2", "node3"), edgeId2, flowTemplate2, null, ConfigFactory.empty(), true);
    edge3 = new BaseFlowEdge(Lists.newArrayList("node3", "node1"), edgeId3, flowTemplate3, null, ConfigFactory.empty(), true);
    // Create a clone of edge3
    edge3c = new BaseFlowEdge(Lists.newArrayList("node3", "node1"), edgeId3, flowTemplate3, null, ConfigFactory.empty(), true);
    // Create a FlowGraph
    graph = new BaseFlowGraph();
    // Add nodes
    Assert.assertTrue(graph.addDataNode(node1));
    Assert.assertTrue(graph.addDataNode(node2));
    Assert.assertTrue(graph.addDataNode(node3));
    Assert.assertEquals(graph.getEdges(node1).size(), 0);
    Assert.assertEquals(graph.getEdges(node2).size(), 0);
    Assert.assertEquals(graph.getEdges(node3).size(), 0);
    // Add edges
    Assert.assertTrue(graph.addFlowEdge(edge1));
    Assert.assertTrue(graph.addFlowEdge(edge2));
    Assert.assertTrue(graph.addFlowEdge(edge3));
}
Also used : StaticFlowTemplate(org.apache.gobblin.service.modules.template.StaticFlowTemplate) FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) StaticFlowTemplate(org.apache.gobblin.service.modules.template.StaticFlowTemplate) Config(com.typesafe.config.Config) Properties(java.util.Properties) URI(java.net.URI) BeforeClass(org.testng.annotations.BeforeClass)

Example 2 with FlowTemplate

use of org.apache.gobblin.service.modules.template.FlowTemplate in project incubator-gobblin by apache.

the class JobExecutionPlanDagFactoryTest method setUp.

@BeforeClass
public void setUp() throws URISyntaxException, IOException, SpecNotFoundException, JobTemplate.TemplateException {
    // Create a FSFlowTemplateCatalog instance
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    FSFlowTemplateCatalog catalog = new FSFlowTemplateCatalog(templateCatalogCfg);
    FlowTemplate flowTemplate = catalog.getFlowTemplate(new URI(TEST_TEMPLATE_URI));
    this.jobTemplates = flowTemplate.getJobTemplates();
    // Create a spec executor instance
    properties = new Properties();
    properties.put("specStore.fs.dir", "/tmp/testSpecStoreDir");
    properties.put("specExecInstance.capabilities", "source:destination");
    Config specExecutorConfig = ConfigUtils.propertiesToConfig(properties);
    this.specExecutor = new InMemorySpecExecutor(specExecutorConfig);
}
Also used : FSFlowTemplateCatalog(org.apache.gobblin.service.modules.template_catalog.FSFlowTemplateCatalog) FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) Config(com.typesafe.config.Config) Properties(java.util.Properties) URI(java.net.URI) InMemorySpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor) BeforeClass(org.testng.annotations.BeforeClass)

Example 3 with FlowTemplate

use of org.apache.gobblin.service.modules.template.FlowTemplate in project incubator-gobblin by apache.

the class ObservingFSFlowEdgeTemplateCatalogTest method testModifyFlowTemplate.

@Test
public void testModifyFlowTemplate() throws Exception {
    ObservingFSFlowEdgeTemplateCatalog catalog = new ObservingFSFlowEdgeTemplateCatalog(this.templateCatalogCfg, new ReentrantReadWriteLock());
    ServiceManager serviceManager = new ServiceManager(Lists.newArrayList(catalog));
    serviceManager.startAsync().awaitHealthy(5, TimeUnit.SECONDS);
    // Check cached flow template is returned
    FlowTemplate flowTemplate1 = catalog.getFlowTemplate(new URI(FSFlowTemplateCatalogTest.TEST_TEMPLATE_DIR_URI));
    FlowTemplate flowTemplate2 = catalog.getFlowTemplate(new URI(FSFlowTemplateCatalogTest.TEST_TEMPLATE_DIR_URI));
    Assert.assertSame(flowTemplate1, flowTemplate2);
    // Update a file flow catalog and check that the getFlowTemplate returns the new value
    Path flowConfPath = new File(new File(this.templateDir, FSFlowTemplateCatalogTest.TEST_TEMPLATE_NAME), "flow.conf").toPath();
    List<String> lines = java.nio.file.Files.readAllLines(flowConfPath);
    for (int i = 0; i < lines.size(); i++) {
        if (lines.get(i).equals("gobblin.flow.edge.input.dataset.descriptor.0.format=avro")) {
            lines.set(i, "gobblin.flow.edge.input.dataset.descriptor.0.format=any");
            break;
        }
    }
    java.nio.file.Files.write(flowConfPath, lines);
    Function testFunction = new GetFlowTemplateConfigFunction(new URI(FSFlowTemplateCatalogTest.TEST_TEMPLATE_DIR_URI), catalog, "gobblin.flow.edge.input.dataset.descriptor.0.format");
    AssertWithBackoff.create().timeoutMs(10000).assertEquals(testFunction, "any", "flow template updated");
}
Also used : Path(java.nio.file.Path) Function(com.google.common.base.Function) FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) ServiceManager(com.google.common.util.concurrent.ServiceManager) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) URI(java.net.URI) File(java.io.File) Test(org.testng.annotations.Test)

Example 4 with FlowTemplate

use of org.apache.gobblin.service.modules.template.FlowTemplate in project incubator-gobblin by apache.

the class FSFlowTemplateCatalogTest method testGetFlowTemplate.

@Test
public void testGetFlowTemplate() throws Exception {
    URI flowTemplateCatalogUri = this.getClass().getClassLoader().getResource("template_catalog").toURI();
    // Create a FSFlowTemplateCatalog instance
    Properties properties = new Properties();
    properties.put(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY, flowTemplateCatalogUri.toString());
    Config config = ConfigFactory.parseProperties(properties);
    Config templateCatalogCfg = config.withValue(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, config.getValue(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY));
    FSFlowTemplateCatalog catalog = new FSFlowTemplateCatalog(templateCatalogCfg);
    FlowTemplate flowTemplate = catalog.getFlowTemplate(new URI(TEST_TEMPLATE_DIR_URI));
    // Basic sanity check for the FlowTemplate
    List<JobTemplate> jobTemplates = flowTemplate.getJobTemplates();
    Assert.assertEquals(jobTemplates.size(), 4);
    for (int i = 0; i < 4; i++) {
        String uri = new Path(jobTemplates.get(i).getUri()).getName().split("\\.")[0];
        String templateId = uri.substring(uri.length() - 1);
        for (int j = 0; j < 2; j++) {
            Config jobTemplateConfig = jobTemplates.get(i).getRawTemplateConfig();
            String suffix = templateId + Integer.toString(j + 1);
            Assert.assertEquals(jobTemplateConfig.getString("key" + suffix), "val" + suffix);
        }
    }
    Config flowConfig = ConfigFactory.empty().withValue("team.name", ConfigValueFactory.fromAnyRef("test-team")).withValue("dataset.name", ConfigValueFactory.fromAnyRef("test-dataset"));
    List<Pair<DatasetDescriptor, DatasetDescriptor>> inputOutputDescriptors = flowTemplate.getDatasetDescriptors(flowConfig, true);
    Assert.assertTrue(inputOutputDescriptors.size() == 2);
    List<String> dirs = Lists.newArrayList("inbound", "outbound");
    for (int i = 0; i < 2; i++) {
        for (int j = 0; j < 2; j++) {
            FSDatasetDescriptor datasetDescriptor;
            if (j == 0) {
                datasetDescriptor = (FSDatasetDescriptor) inputOutputDescriptors.get(i).getLeft();
            } else {
                datasetDescriptor = (FSDatasetDescriptor) inputOutputDescriptors.get(i).getRight();
            }
            Assert.assertEquals(datasetDescriptor.getPlatform(), "hdfs");
            Assert.assertEquals(datasetDescriptor.getFormatConfig().getFormat(), "avro");
            Assert.assertEquals(datasetDescriptor.getPath(), "/data/" + dirs.get(i) + "/test-team/test-dataset");
        }
    }
    Config flowTemplateConfig = flowTemplate.getRawTemplateConfig();
    Assert.assertEquals(flowTemplateConfig.getString(DatasetDescriptorConfigKeys.FLOW_EDGE_INPUT_DATASET_DESCRIPTOR_PREFIX + ".0." + DatasetDescriptorConfigKeys.CLASS_KEY), FSDatasetDescriptor.class.getCanonicalName());
    Assert.assertEquals(flowTemplateConfig.getString(DatasetDescriptorConfigKeys.FLOW_EDGE_OUTPUT_DATASET_DESCRIPTOR_PREFIX + ".0." + DatasetDescriptorConfigKeys.CLASS_KEY), FSDatasetDescriptor.class.getCanonicalName());
}
Also used : Path(org.apache.hadoop.fs.Path) FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) Config(com.typesafe.config.Config) Properties(java.util.Properties) JobTemplate(org.apache.gobblin.runtime.api.JobTemplate) URI(java.net.URI) FSDatasetDescriptor(org.apache.gobblin.service.modules.dataset.FSDatasetDescriptor) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.testng.annotations.Test)

Example 5 with FlowTemplate

use of org.apache.gobblin.service.modules.template.FlowTemplate in project incubator-gobblin by apache.

the class FSFlowTemplateCatalog method getJobTemplatesForFlow.

/**
 * @param flowTemplateDirURI Relative URI of the flow template directory
 * @return a list of {@link JobTemplate}s for a given flow identified by its {@link URI}.
 * @throws IOException
 * @throws SpecNotFoundException
 * @throws JobTemplate.TemplateException
 */
public List<JobTemplate> getJobTemplatesForFlow(URI flowTemplateDirURI) throws IOException, SpecNotFoundException, JobTemplate.TemplateException, URISyntaxException {
    PathFilter extensionFilter = file -> {
        for (String extension : JOB_FILE_EXTENSIONS) {
            if (file.getName().endsWith(extension)) {
                return true;
            }
        }
        return false;
    };
    if (!validateTemplateURI(flowTemplateDirURI)) {
        throw new JobTemplate.TemplateException(String.format("The FlowTemplate %s is not valid", flowTemplateDirURI));
    }
    List<JobTemplate> jobTemplates = new ArrayList<>();
    String templateCatalogDir = this.sysConfig.getString(ServiceConfigKeys.TEMPLATE_CATALOGS_FULLY_QUALIFIED_PATH_KEY);
    // Flow templates are located under templateCatalogDir/flowEdgeTemplates
    Path flowTemplateDirPath = PathUtils.mergePaths(new Path(templateCatalogDir), new Path(flowTemplateDirURI));
    // Job files (with extension .job) are located under templateCatalogDir/flowEdgeTemplates/jobs directory.
    Path jobFilePath = new Path(flowTemplateDirPath, JOBS_DIR_NAME);
    FileSystem fs = FileSystem.get(jobFilePath.toUri(), new Configuration());
    for (FileStatus fileStatus : fs.listStatus(jobFilePath, extensionFilter)) {
        Config jobConfig = loadHoconFileAtPath(fileStatus.getPath());
        // Check if the .job file has an underlying job template
        if (jobConfig.hasPath(GOBBLIN_JOB_TEMPLATE_KEY)) {
            URI jobTemplateRelativeUri = new URI(jobConfig.getString(GOBBLIN_JOB_TEMPLATE_KEY));
            if (!jobTemplateRelativeUri.getScheme().equals(FS_SCHEME)) {
                throw new RuntimeException("Expected scheme " + FS_SCHEME + " got unsupported scheme " + flowTemplateDirURI.getScheme());
            }
            Path fullJobTemplatePath = PathUtils.mergePaths(new Path(templateCatalogDir), new Path(jobTemplateRelativeUri));
            jobConfig = jobConfig.withFallback(loadHoconFileAtPath(fullJobTemplatePath));
        }
        jobTemplates.add(new HOCONInputStreamJobTemplate(jobConfig, fileStatus.getPath().toUri(), this));
    }
    return jobTemplates;
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) FlowTemplate(org.apache.gobblin.service.modules.template.FlowTemplate) URISyntaxException(java.net.URISyntaxException) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) ConfigFactory(com.typesafe.config.ConfigFactory) JobTemplate(org.apache.gobblin.runtime.api.JobTemplate) PathUtils(org.apache.gobblin.util.PathUtils) URI(java.net.URI) ServiceConfigKeys(org.apache.gobblin.service.ServiceConfigKeys) HOCONInputStreamJobTemplate(org.apache.gobblin.runtime.template.HOCONInputStreamJobTemplate) Charsets(com.google.common.base.Charsets) FSJobCatalog(org.apache.gobblin.runtime.job_catalog.FSJobCatalog) Config(com.typesafe.config.Config) IOException(java.io.IOException) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) InputStreamReader(java.io.InputStreamReader) Alpha(org.apache.gobblin.annotation.Alpha) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) HOCONInputStreamFlowTemplate(org.apache.gobblin.service.modules.template.HOCONInputStreamFlowTemplate) List(java.util.List) GOBBLIN_JOB_TEMPLATE_KEY(org.apache.gobblin.runtime.AbstractJobLauncher.GOBBLIN_JOB_TEMPLATE_KEY) InputStream(java.io.InputStream) Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) HOCONInputStreamJobTemplate(org.apache.gobblin.runtime.template.HOCONInputStreamJobTemplate) JobTemplate(org.apache.gobblin.runtime.api.JobTemplate) HOCONInputStreamJobTemplate(org.apache.gobblin.runtime.template.HOCONInputStreamJobTemplate) URI(java.net.URI) FileSystem(org.apache.hadoop.fs.FileSystem)

Aggregations

FlowTemplate (org.apache.gobblin.service.modules.template.FlowTemplate)7 Config (com.typesafe.config.Config)5 URI (java.net.URI)5 Properties (java.util.Properties)3 ArrayList (java.util.ArrayList)2 JobTemplate (org.apache.gobblin.runtime.api.JobTemplate)2 Path (org.apache.hadoop.fs.Path)2 Test (org.testng.annotations.Test)2 Charsets (com.google.common.base.Charsets)1 Function (com.google.common.base.Function)1 ServiceManager (com.google.common.util.concurrent.ServiceManager)1 ConfigFactory (com.typesafe.config.ConfigFactory)1 File (java.io.File)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 URISyntaxException (java.net.URISyntaxException)1 Path (java.nio.file.Path)1 Arrays (java.util.Arrays)1 List (java.util.List)1