Search in sources :

Example 11 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class AvroJobSpecKafkaJobMonitor method parseJobSpec.

/**
 * Creates a {@link JobSpec} or {@link URI} from the {@link AvroJobSpec} record.
 * @param record the record as an {@link AvroJobSpec}
 * @return a {@link JobSpec} or {@link URI} wrapped in a {@link Collection} of {@link Either}
 */
@Override
public Collection<Either<JobSpec, URI>> parseJobSpec(AvroJobSpec record) {
    JobSpec.Builder jobSpecBuilder = JobSpec.builder(record.getUri());
    Properties props = new Properties();
    props.putAll(record.getProperties());
    jobSpecBuilder.withJobCatalogURI(record.getUri()).withVersion(record.getVersion()).withDescription(record.getDescription()).withConfigAsProperties(props);
    if (!record.getTemplateUri().isEmpty()) {
        try {
            jobSpecBuilder.withTemplate(new URI(record.getTemplateUri()));
        } catch (URISyntaxException e) {
            log.error("could not parse template URI " + record.getTemplateUri());
        }
    }
    String verbName = record.getMetadata().get(VERB_KEY);
    Verb verb = Verb.valueOf(verbName);
    JobSpec jobSpec = jobSpecBuilder.build();
    log.info("Parsed job spec " + jobSpec.toString());
    if (verb == Verb.ADD || verb == Verb.UPDATE) {
        return Lists.newArrayList(Either.<JobSpec, URI>left(jobSpec));
    } else {
        return Lists.newArrayList(Either.<JobSpec, URI>right(jobSpec.getUri()));
    }
}
Also used : Verb(org.apache.gobblin.runtime.api.SpecExecutor.Verb) AvroJobSpec(org.apache.gobblin.runtime.job_spec.AvroJobSpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) URISyntaxException(java.net.URISyntaxException) Properties(java.util.Properties) URI(java.net.URI)

Example 12 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class SLAEventKafkaJobMonitor method parseJobSpec.

@Override
public Collection<Either<JobSpec, URI>> parseJobSpec(GobblinTrackingEvent event) {
    if (!acceptEvent(event)) {
        this.rejectedEvents.inc();
        return Lists.newArrayList();
    }
    String datasetURN = event.getMetadata().get(SlaEventKeys.DATASET_URN_KEY);
    URI jobSpecURI = PathUtils.mergePaths(new Path(this.baseURI), new Path(datasetURN)).toUri();
    Map<String, String> jobConfigMap = Maps.newHashMap();
    for (Map.Entry<String, String> entry : this.extractKeys.entrySet()) {
        if (event.getMetadata().containsKey(entry.getKey())) {
            jobConfigMap.put(entry.getValue(), event.getMetadata().get(entry.getKey()));
        }
    }
    Config jobConfig = ConfigFactory.parseMap(jobConfigMap);
    JobSpec jobSpec = JobSpec.builder(jobSpecURI).withTemplate(this.template).withConfig(jobConfig).build();
    return Lists.newArrayList(Either.<JobSpec, URI>left(jobSpec));
}
Also used : Path(org.apache.hadoop.fs.Path) Config(com.typesafe.config.Config) JobSpec(org.apache.gobblin.runtime.api.JobSpec) URI(java.net.URI) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 13 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class EmbeddedGobblin method runAsync.

/**
 * Launch the Gobblin job asynchronously. This method will return when the Gobblin job has started.
 * @return a {@link JobExecutionDriver}. This object is a future that will resolve when the Gobblin job finishes.
 * @throws TimeoutException if the Gobblin job does not start within the launch timeout.
 */
@NotOnCli
public JobExecutionDriver runAsync() throws TimeoutException, InterruptedException {
    // Run function to distribute jars to workers in distributed mode
    this.distributeJarsFunction.run();
    Config sysProps = ConfigFactory.parseMap(this.builtConfigMap).withFallback(this.defaultSysConfig);
    Config userConfig = ConfigFactory.parseMap(this.userConfigMap);
    JobSpec jobSpec;
    if (this.jobFile.isPresent()) {
        try {
            Path jobFilePath = this.jobFile.get();
            PullFileLoader loader = new PullFileLoader(jobFilePath.getParent(), jobFilePath.getFileSystem(new Configuration()), PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS, PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);
            Config jobConfig = userConfig.withFallback(loader.loadPullFile(jobFilePath, sysProps, false));
            ImmutableFSJobCatalog.JobSpecConverter converter = new ImmutableFSJobCatalog.JobSpecConverter(jobFilePath.getParent(), Optional.<String>absent());
            jobSpec = converter.apply(jobConfig);
        } catch (IOException ioe) {
            throw new RuntimeException("Failed to run embedded Gobblin.", ioe);
        }
    } else {
        Config finalConfig = userConfig.withFallback(sysProps);
        if (this.template != null) {
            try {
                finalConfig = this.template.getResolvedConfig(finalConfig);
            } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
                throw new RuntimeException(exc);
            }
        }
        jobSpec = this.specBuilder.withConfig(finalConfig).build();
    }
    ResolvedJobSpec resolvedJobSpec;
    try {
        resolvedJobSpec = new ResolvedJobSpec(jobSpec);
    } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
        throw new RuntimeException("Failed to resolved template.", exc);
    }
    final JobCatalog jobCatalog = new StaticJobCatalog(Optional.of(this.useLog), Lists.<JobSpec>newArrayList(resolvedJobSpec));
    SimpleGobblinInstanceEnvironment instanceEnvironment = new SimpleGobblinInstanceEnvironment("EmbeddedGobblinInstance", this.useLog, getSysConfig());
    StandardGobblinInstanceDriver.Builder builder = new StandardGobblinInstanceDriver.Builder(Optional.<GobblinInstanceEnvironment>of(instanceEnvironment)).withLog(this.useLog).withJobCatalog(jobCatalog).withImmediateJobScheduler();
    for (GobblinInstancePluginFactory plugin : this.plugins) {
        builder.addPlugin(plugin);
    }
    final GobblinInstanceDriver driver = builder.build();
    EmbeddedJobLifecycleListener listener = new EmbeddedJobLifecycleListener(this.useLog);
    driver.registerJobLifecycleListener(listener);
    driver.startAsync();
    boolean started = listener.awaitStarted(this.launchTimeout.getTimeout(), this.launchTimeout.getTimeUnit());
    if (!started) {
        log.warn("Timeout waiting for job to start. Aborting.");
        driver.stopAsync();
        driver.awaitTerminated(this.shutdownTimeout.getTimeout(), this.shutdownTimeout.getTimeUnit());
        throw new TimeoutException("Timeout waiting for job to start.");
    }
    final JobExecutionDriver jobDriver = listener.getJobDriver();
    // Stop the Gobblin instance driver when the job finishes.
    Futures.addCallback(jobDriver, new FutureCallback<JobExecutionResult>() {

        @Override
        public void onSuccess(@Nullable JobExecutionResult result) {
            stopGobblinInstanceDriver();
        }

        @Override
        public void onFailure(Throwable t) {
            stopGobblinInstanceDriver();
        }

        private void stopGobblinInstanceDriver() {
            try {
                driver.stopAsync();
                driver.awaitTerminated(EmbeddedGobblin.this.shutdownTimeout.getTimeout(), EmbeddedGobblin.this.shutdownTimeout.getTimeUnit());
            } catch (TimeoutException te) {
                log.error("Failed to shutdown Gobblin instance driver.");
            }
        }
    });
    return listener.getJobDriver();
}
Also used : ImmutableFSJobCatalog(org.apache.gobblin.runtime.job_catalog.ImmutableFSJobCatalog) Configuration(org.apache.hadoop.conf.Configuration) Config(com.typesafe.config.Config) StandardGobblinInstanceDriver(org.apache.gobblin.runtime.instance.StandardGobblinInstanceDriver) GobblinInstanceDriver(org.apache.gobblin.runtime.api.GobblinInstanceDriver) StandardGobblinInstanceDriver(org.apache.gobblin.runtime.instance.StandardGobblinInstanceDriver) StaticJobCatalog(org.apache.gobblin.runtime.job_catalog.StaticJobCatalog) JobCatalog(org.apache.gobblin.runtime.api.JobCatalog) ImmutableFSJobCatalog(org.apache.gobblin.runtime.job_catalog.ImmutableFSJobCatalog) GobblinInstanceEnvironment(org.apache.gobblin.runtime.api.GobblinInstanceEnvironment) SimpleGobblinInstanceEnvironment(org.apache.gobblin.runtime.instance.SimpleGobblinInstanceEnvironment) SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) StaticJobCatalog(org.apache.gobblin.runtime.job_catalog.StaticJobCatalog) ResolvedJobSpec(org.apache.gobblin.runtime.job_spec.ResolvedJobSpec) GobblinInstancePluginFactory(org.apache.gobblin.runtime.api.GobblinInstancePluginFactory) JobExecutionDriver(org.apache.gobblin.runtime.api.JobExecutionDriver) TimeoutException(java.util.concurrent.TimeoutException) Path(org.apache.hadoop.fs.Path) PullFileLoader(org.apache.gobblin.util.PullFileLoader) IOException(java.io.IOException) SimpleGobblinInstanceEnvironment(org.apache.gobblin.runtime.instance.SimpleGobblinInstanceEnvironment) JobExecutionResult(org.apache.gobblin.runtime.api.JobExecutionResult) ResolvedJobSpec(org.apache.gobblin.runtime.job_spec.ResolvedJobSpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) NotOnCli(org.apache.gobblin.runtime.cli.NotOnCli)

Example 14 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class FSPathAlterationListenerAdaptor method onFileChange.

@Override
public void onFileChange(Path rawPath) {
    try {
        JobSpec updatedJobSpec = this.converter.apply(loader.loadPullFile(rawPath, sysConfig, false));
        listeners.onUpdateJob(updatedJobSpec);
    } catch (IOException e) {
        throw new RuntimeException(e.getMessage());
    }
}
Also used : JobSpec(org.apache.gobblin.runtime.api.JobSpec) IOException(java.io.IOException)

Example 15 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class FSPathAlterationListenerAdaptor method onFileCreate.

/**
 * Transform the event triggered by file creation into JobSpec Creation for Driver (One of the JobCatalogListener )
 * Create a new JobSpec object and notify each of member inside JobCatalogListenersList
 * @param rawPath This could be complete path to the newly-created configuration file.
 */
@Override
public void onFileCreate(Path rawPath) {
    try {
        JobSpec newJobSpec = this.converter.apply(loader.loadPullFile(rawPath, sysConfig, false));
        listeners.onAddJob(newJobSpec);
    } catch (IOException e) {
        throw new RuntimeException(e.getMessage());
    }
}
Also used : JobSpec(org.apache.gobblin.runtime.api.JobSpec) IOException(java.io.IOException)

Aggregations

JobSpec (org.apache.gobblin.runtime.api.JobSpec)52 Test (org.testng.annotations.Test)34 URI (java.net.URI)18 Properties (java.util.Properties)14 Spec (org.apache.gobblin.runtime.api.Spec)11 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)11 ResolvedJobSpec (org.apache.gobblin.runtime.job_spec.ResolvedJobSpec)9 Map (java.util.Map)8 Pair (org.apache.commons.lang3.tuple.Pair)8 Config (com.typesafe.config.Config)7 Logger (org.slf4j.Logger)7 JobCatalogListener (org.apache.gobblin.runtime.api.JobCatalogListener)6 WriteResponse (org.apache.gobblin.writer.WriteResponse)6 IOException (java.io.IOException)5 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)5 JobExecutionDriver (org.apache.gobblin.runtime.api.JobExecutionDriver)5 JobExecutionResult (org.apache.gobblin.runtime.api.JobExecutionResult)5 JobLifecycleListener (org.apache.gobblin.runtime.api.JobLifecycleListener)5 Path (org.apache.hadoop.fs.Path)5 SpecNotFoundException (org.apache.gobblin.runtime.api.SpecNotFoundException)4