Search in sources :

Example 1 with PullFileLoader

use of org.apache.gobblin.util.PullFileLoader in project incubator-gobblin by apache.

the class EmbeddedGobblin method runAsync.

/**
 * Launch the Gobblin job asynchronously. This method will return when the Gobblin job has started.
 * @return a {@link JobExecutionDriver}. This object is a future that will resolve when the Gobblin job finishes.
 * @throws TimeoutException if the Gobblin job does not start within the launch timeout.
 */
@NotOnCli
public JobExecutionDriver runAsync() throws TimeoutException, InterruptedException {
    // Run function to distribute jars to workers in distributed mode
    this.distributeJarsFunction.run();
    Config sysProps = ConfigFactory.parseMap(this.builtConfigMap).withFallback(this.defaultSysConfig);
    Config userConfig = ConfigFactory.parseMap(this.userConfigMap);
    JobSpec jobSpec;
    if (this.jobFile.isPresent()) {
        try {
            Path jobFilePath = this.jobFile.get();
            PullFileLoader loader = new PullFileLoader(jobFilePath.getParent(), jobFilePath.getFileSystem(new Configuration()), PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS, PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);
            Config jobConfig = userConfig.withFallback(loader.loadPullFile(jobFilePath, sysProps, false));
            ImmutableFSJobCatalog.JobSpecConverter converter = new ImmutableFSJobCatalog.JobSpecConverter(jobFilePath.getParent(), Optional.<String>absent());
            jobSpec = converter.apply(jobConfig);
        } catch (IOException ioe) {
            throw new RuntimeException("Failed to run embedded Gobblin.", ioe);
        }
    } else {
        Config finalConfig = userConfig.withFallback(sysProps);
        if (this.template != null) {
            try {
                finalConfig = this.template.getResolvedConfig(finalConfig);
            } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
                throw new RuntimeException(exc);
            }
        }
        jobSpec = this.specBuilder.withConfig(finalConfig).build();
    }
    ResolvedJobSpec resolvedJobSpec;
    try {
        resolvedJobSpec = new ResolvedJobSpec(jobSpec);
    } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
        throw new RuntimeException("Failed to resolved template.", exc);
    }
    final JobCatalog jobCatalog = new StaticJobCatalog(Optional.of(this.useLog), Lists.<JobSpec>newArrayList(resolvedJobSpec));
    SimpleGobblinInstanceEnvironment instanceEnvironment = new SimpleGobblinInstanceEnvironment("EmbeddedGobblinInstance", this.useLog, getSysConfig());
    StandardGobblinInstanceDriver.Builder builder = new StandardGobblinInstanceDriver.Builder(Optional.<GobblinInstanceEnvironment>of(instanceEnvironment)).withLog(this.useLog).withJobCatalog(jobCatalog).withImmediateJobScheduler();
    for (GobblinInstancePluginFactory plugin : this.plugins) {
        builder.addPlugin(plugin);
    }
    final GobblinInstanceDriver driver = builder.build();
    EmbeddedJobLifecycleListener listener = new EmbeddedJobLifecycleListener(this.useLog);
    driver.registerJobLifecycleListener(listener);
    driver.startAsync();
    boolean started = listener.awaitStarted(this.launchTimeout.getTimeout(), this.launchTimeout.getTimeUnit());
    if (!started) {
        log.warn("Timeout waiting for job to start. Aborting.");
        driver.stopAsync();
        driver.awaitTerminated(this.shutdownTimeout.getTimeout(), this.shutdownTimeout.getTimeUnit());
        throw new TimeoutException("Timeout waiting for job to start.");
    }
    final JobExecutionDriver jobDriver = listener.getJobDriver();
    // Stop the Gobblin instance driver when the job finishes.
    Futures.addCallback(jobDriver, new FutureCallback<JobExecutionResult>() {

        @Override
        public void onSuccess(@Nullable JobExecutionResult result) {
            stopGobblinInstanceDriver();
        }

        @Override
        public void onFailure(Throwable t) {
            stopGobblinInstanceDriver();
        }

        private void stopGobblinInstanceDriver() {
            try {
                driver.stopAsync();
                driver.awaitTerminated(EmbeddedGobblin.this.shutdownTimeout.getTimeout(), EmbeddedGobblin.this.shutdownTimeout.getTimeUnit());
            } catch (TimeoutException te) {
                log.error("Failed to shutdown Gobblin instance driver.");
            }
        }
    });
    return listener.getJobDriver();
}
Also used : ImmutableFSJobCatalog(org.apache.gobblin.runtime.job_catalog.ImmutableFSJobCatalog) Configuration(org.apache.hadoop.conf.Configuration) Config(com.typesafe.config.Config) StandardGobblinInstanceDriver(org.apache.gobblin.runtime.instance.StandardGobblinInstanceDriver) GobblinInstanceDriver(org.apache.gobblin.runtime.api.GobblinInstanceDriver) StandardGobblinInstanceDriver(org.apache.gobblin.runtime.instance.StandardGobblinInstanceDriver) StaticJobCatalog(org.apache.gobblin.runtime.job_catalog.StaticJobCatalog) JobCatalog(org.apache.gobblin.runtime.api.JobCatalog) ImmutableFSJobCatalog(org.apache.gobblin.runtime.job_catalog.ImmutableFSJobCatalog) GobblinInstanceEnvironment(org.apache.gobblin.runtime.api.GobblinInstanceEnvironment) SimpleGobblinInstanceEnvironment(org.apache.gobblin.runtime.instance.SimpleGobblinInstanceEnvironment) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) StaticJobCatalog(org.apache.gobblin.runtime.job_catalog.StaticJobCatalog) ResolvedJobSpec(org.apache.gobblin.runtime.job_spec.ResolvedJobSpec) GobblinInstancePluginFactory(org.apache.gobblin.runtime.api.GobblinInstancePluginFactory) JobExecutionDriver(org.apache.gobblin.runtime.api.JobExecutionDriver) TimeoutException(java.util.concurrent.TimeoutException) Path(org.apache.hadoop.fs.Path) PullFileLoader(org.apache.gobblin.util.PullFileLoader) IOException(java.io.IOException) SimpleGobblinInstanceEnvironment(org.apache.gobblin.runtime.instance.SimpleGobblinInstanceEnvironment) JobExecutionResult(org.apache.gobblin.runtime.api.JobExecutionResult) ResolvedJobSpec(org.apache.gobblin.runtime.job_spec.ResolvedJobSpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) NotOnCli(org.apache.gobblin.runtime.cli.NotOnCli)

Example 2 with PullFileLoader

use of org.apache.gobblin.util.PullFileLoader in project incubator-gobblin by apache.

the class FSJobCatalogHelperTest method setUp.

@BeforeClass
public void setUp() throws IOException {
    this.jobConfigDir = java.nio.file.Files.createTempDirectory(String.format("gobblin-test_%s_job-conf", this.getClass().getSimpleName())).toFile();
    FileUtils.forceDeleteOnExit(this.jobConfigDir);
    this.subDir1 = new File(this.jobConfigDir, "test1");
    this.subDir11 = new File(this.subDir1, "test11");
    this.subDir2 = new File(this.jobConfigDir, "test2");
    this.subDir1.mkdirs();
    this.subDir11.mkdirs();
    this.subDir2.mkdirs();
    this.sysConfig = ConfigFactory.parseMap(ImmutableMap.<String, Object>builder().put(ConfigurationKeys.JOB_CONFIG_FILE_GENERAL_PATH_KEY, this.jobConfigDir.getAbsolutePath()).build());
    ImmutableFSJobCatalog.ConfigAccessor cfgAccess = new ImmutableFSJobCatalog.ConfigAccessor(this.sysConfig);
    this.loader = new PullFileLoader(new Path(jobConfigDir.toURI()), FileSystem.get(new Configuration()), cfgAccess.getJobConfigurationFileExtensions(), PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);
    this.converter = new ImmutableFSJobCatalog.JobSpecConverter(new Path(this.jobConfigDir.toURI()), Optional.of(FSJobCatalog.CONF_EXTENSION));
    Properties rootProps = new Properties();
    rootProps.setProperty("k1", "a1");
    rootProps.setProperty("k2", "a2");
    // test-job-conf-dir/root.properties
    rootProps.store(new FileWriter(new File(this.jobConfigDir, "root.properties")), "");
    Properties jobProps1 = new Properties();
    jobProps1.setProperty("k1", "c1");
    jobProps1.setProperty("k3", "b3");
    jobProps1.setProperty("k6", "a6");
    // test-job-conf-dir/test1/test11.pull
    jobProps1.store(new FileWriter(new File(this.subDir1, "test11.pull")), "");
    Properties jobProps2 = new Properties();
    jobProps2.setProperty("k7", "a7");
    // test-job-conf-dir/test1/test12.PULL
    jobProps2.store(new FileWriter(new File(this.subDir1, "test12.PULL")), "");
    Properties jobProps3 = new Properties();
    jobProps3.setProperty("k1", "d1");
    jobProps3.setProperty("k8", "a8");
    jobProps3.setProperty("k9", "${k8}");
    // test-job-conf-dir/test1/test11/test111.pull
    jobProps3.store(new FileWriter(new File(this.subDir11, "test111.pull")), "");
    Properties props2 = new Properties();
    props2.setProperty("k2", "b2");
    props2.setProperty("k5", "a5");
    // test-job-conf-dir/test2/test.properties
    props2.store(new FileWriter(new File(this.subDir2, "test.PROPERTIES")), "");
    Properties jobProps4 = new Properties();
    jobProps4.setProperty("k5", "b5");
    // test-job-conf-dir/test2/test21.PULL
    jobProps4.store(new FileWriter(new File(this.subDir2, "test21.PULL")), "");
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutableFSJobCatalog(org.apache.gobblin.runtime.job_catalog.ImmutableFSJobCatalog) Configuration(org.apache.hadoop.conf.Configuration) PullFileLoader(org.apache.gobblin.util.PullFileLoader) FileWriter(java.io.FileWriter) Properties(java.util.Properties) File(java.io.File) BeforeClass(org.testng.annotations.BeforeClass)

Example 3 with PullFileLoader

use of org.apache.gobblin.util.PullFileLoader in project incubator-gobblin by apache.

the class PullFileToConfigConverter method convert.

public void convert() throws IOException {
    Config baseConfig = ConfigFactory.parseString(DO_NOT_OVERRIDE_KEY + ": []");
    FileSystem pullFileFs = pullFileRootPath.getFileSystem(new Configuration());
    FileSystem outputFs = this.outputPath.getFileSystem(new Configuration());
    Config sysConfig = ConfigFactory.parseFile(this.sysConfigPath);
    PullFileLoader pullFileLoader = new PullFileLoader(this.pullFileRootPath, pullFileFs, PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS, PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);
    PackagedTemplatesJobCatalogDecorator catalog = new PackagedTemplatesJobCatalogDecorator();
    ConfigResolveOptions configResolveOptions = ConfigResolveOptions.defaults();
    configResolveOptions = configResolveOptions.setAllowUnresolved(true);
    ResourceBasedJobTemplate template;
    Config templateConfig;
    try {
        template = (ResourceBasedJobTemplate) catalog.getTemplate(templateURI.toUri());
        templateConfig = sysConfig.withFallback(template.getRawTemplateConfig()).withFallback(baseConfig).resolve(configResolveOptions);
    } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
        throw new IOException(exc);
    }
    Set<String> doNotOverride = templateConfig.hasPath(DO_NOT_OVERRIDE_KEY) ? Sets.newHashSet(templateConfig.getStringList(DO_NOT_OVERRIDE_KEY)) : Sets.<String>newHashSet();
    ConfigRenderOptions configRenderOptions = ConfigRenderOptions.defaults();
    configRenderOptions = configRenderOptions.setComments(false);
    configRenderOptions = configRenderOptions.setOriginComments(false);
    configRenderOptions = configRenderOptions.setFormatted(true);
    configRenderOptions = configRenderOptions.setJson(false);
    for (FileStatus pullFile : pullFileFs.globStatus(this.fileGlobToConvert)) {
        Config pullFileConfig = pullFileLoader.loadPullFile(pullFile.getPath(), ConfigFactory.empty(), true).resolve();
        Map<String, String> outputConfigMap = Maps.newHashMap();
        outputConfigMap.put(ConfigurationKeys.JOB_TEMPLATE_PATH, this.templateURI.toString());
        boolean somethingChanged;
        do {
            somethingChanged = false;
            Config currentOutputConfig = ConfigFactory.parseMap(outputConfigMap);
            Config currentResolvedConfig = currentOutputConfig.withFallback(templateConfig).resolve(configResolveOptions);
            for (Map.Entry<Object, Object> entry : ConfigUtils.configToProperties(pullFileConfig).entrySet()) {
                String key = (String) entry.getKey();
                String value = (String) entry.getValue();
                try {
                    if ((!currentResolvedConfig.hasPath(key)) || (!currentResolvedConfig.getString(key).equals(value) && !doNotOverride.contains(key))) {
                        if (!FILTER_KEYS.contains(key)) {
                            somethingChanged = true;
                            outputConfigMap.put(key, value);
                        }
                    }
                } catch (ConfigException.NotResolved nre) {
                // path is unresolved in config, will try again next iteration
                }
            }
        } while (somethingChanged);
        try {
            Config outputConfig = ConfigFactory.parseMap(outputConfigMap);
            Config currentResolvedConfig = outputConfig.withFallback(templateConfig).resolve();
            String rendered = outputConfig.root().render(configRenderOptions);
            Path newPath = PathUtils.removeExtension(pullFile.getPath(), PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS.toArray(new String[] {}));
            newPath = PathUtils.addExtension(newPath, "conf");
            newPath = new Path(this.outputPath, newPath.getName());
            FSDataOutputStream os = outputFs.create(newPath);
            os.write(rendered.getBytes(Charsets.UTF_8));
            os.close();
        } catch (ConfigException.NotResolved nre) {
            throw new IOException("Not all configuration keys were resolved in pull file " + pullFile.getPath(), nre);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) PackagedTemplatesJobCatalogDecorator(org.apache.gobblin.runtime.job_catalog.PackagedTemplatesJobCatalogDecorator) Config(com.typesafe.config.Config) PullFileLoader(org.apache.gobblin.util.PullFileLoader) ConfigException(com.typesafe.config.ConfigException) IOException(java.io.IOException) ConfigResolveOptions(com.typesafe.config.ConfigResolveOptions) ConfigRenderOptions(com.typesafe.config.ConfigRenderOptions) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Map(java.util.Map)

Aggregations

PullFileLoader (org.apache.gobblin.util.PullFileLoader)3 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 Config (com.typesafe.config.Config)2 IOException (java.io.IOException)2 SpecNotFoundException (org.apache.gobblin.runtime.api.SpecNotFoundException)2 ImmutableFSJobCatalog (org.apache.gobblin.runtime.job_catalog.ImmutableFSJobCatalog)2 ConfigException (com.typesafe.config.ConfigException)1 ConfigRenderOptions (com.typesafe.config.ConfigRenderOptions)1 ConfigResolveOptions (com.typesafe.config.ConfigResolveOptions)1 File (java.io.File)1 FileWriter (java.io.FileWriter)1 Map (java.util.Map)1 Properties (java.util.Properties)1 TimeoutException (java.util.concurrent.TimeoutException)1 GobblinInstanceDriver (org.apache.gobblin.runtime.api.GobblinInstanceDriver)1 GobblinInstanceEnvironment (org.apache.gobblin.runtime.api.GobblinInstanceEnvironment)1 GobblinInstancePluginFactory (org.apache.gobblin.runtime.api.GobblinInstancePluginFactory)1 JobCatalog (org.apache.gobblin.runtime.api.JobCatalog)1 JobExecutionDriver (org.apache.gobblin.runtime.api.JobExecutionDriver)1