Search in sources :

Example 31 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class IdentityFlowToJobSpecCompiler method compileFlow.

@Override
public Map<Spec, SpecExecutor> compileFlow(Spec spec) {
    Preconditions.checkNotNull(spec);
    Preconditions.checkArgument(spec instanceof FlowSpec, "IdentityFlowToJobSpecCompiler only converts FlowSpec to JobSpec");
    long startTime = System.nanoTime();
    Map<Spec, SpecExecutor> specExecutorMap = Maps.newLinkedHashMap();
    FlowSpec flowSpec = (FlowSpec) spec;
    String source = flowSpec.getConfig().getString(ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY);
    String destination = flowSpec.getConfig().getString(ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY);
    log.info(String.format("Compiling flow for source: %s and destination: %s", source, destination));
    JobSpec jobSpec = jobSpecGenerator(flowSpec);
    for (TopologySpec topologySpec : topologySpecMap.values()) {
        try {
            Map<ServiceNode, ServiceNode> capabilities = (Map<ServiceNode, ServiceNode>) topologySpec.getSpecExecutor().getCapabilities().get();
            for (Map.Entry<ServiceNode, ServiceNode> capability : capabilities.entrySet()) {
                log.info(String.format("Evaluating current JobSpec: %s against TopologySpec: %s with " + "capability of source: %s and destination: %s ", jobSpec.getUri(), topologySpec.getUri(), capability.getKey(), capability.getValue()));
                if (source.equals(capability.getKey().getNodeName()) && destination.equals(capability.getValue().getNodeName())) {
                    specExecutorMap.put(jobSpec, topologySpec.getSpecExecutor());
                    log.info(String.format("Current JobSpec: %s is executable on TopologySpec: %s. Added TopologySpec as candidate.", jobSpec.getUri(), topologySpec.getUri()));
                    log.info("Since we found a candidate executor, we will not try to compute more. " + "(Intended limitation for IdentityFlowToJobSpecCompiler)");
                    return specExecutorMap;
                }
            }
        } catch (InterruptedException | ExecutionException e) {
            Instrumented.markMeter(this.flowCompilationFailedMeter);
            throw new RuntimeException("Cannot determine topology capabilities", e);
        }
    }
    Instrumented.markMeter(this.flowCompilationSuccessFulMeter);
    Instrumented.updateTimer(this.flowCompilationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
    return specExecutorMap;
}
Also used : ServiceNode(org.apache.gobblin.runtime.api.ServiceNode) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) JobSpec(org.apache.gobblin.runtime.api.JobSpec) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Spec(org.apache.gobblin.runtime.api.Spec) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map)

Example 32 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class BaseFlowToJobSpecCompiler method jobSpecGenerator.

/**
 * Naive implementation of generating jobSpec, which fetch the first available template,
 * in an exemplified single-hop FlowCompiler implementation.
 * @param flowSpec
 * @return
 */
protected JobSpec jobSpecGenerator(FlowSpec flowSpec) {
    JobSpec jobSpec;
    JobSpec.Builder jobSpecBuilder = JobSpec.builder(jobSpecURIGenerator(flowSpec)).withConfig(flowSpec.getConfig()).withDescription(flowSpec.getDescription()).withVersion(flowSpec.getVersion());
    if (flowSpec.getTemplateURIs().isPresent() && templateCatalog.isPresent()) {
        // Only first template uri will be honored for Identity
        jobSpecBuilder = jobSpecBuilder.withTemplate(flowSpec.getTemplateURIs().get().iterator().next());
        try {
            jobSpec = new ResolvedJobSpec(jobSpecBuilder.build(), templateCatalog.get());
            log.info("Resolved JobSpec properties are: " + jobSpec.getConfigAsProperties());
        } catch (SpecNotFoundException | JobTemplate.TemplateException e) {
            throw new RuntimeException("Could not resolve template in JobSpec from TemplateCatalog", e);
        }
    } else {
        jobSpec = jobSpecBuilder.build();
        log.info("Unresolved JobSpec properties are: " + jobSpec.getConfigAsProperties());
    }
    // Remove schedule
    jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY));
    // Add job.name and job.group
    if (flowSpec.getConfig().hasPath(ConfigurationKeys.FLOW_NAME_KEY)) {
        jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_NAME_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_NAME_KEY)));
    }
    if (flowSpec.getConfig().hasPath(ConfigurationKeys.FLOW_GROUP_KEY)) {
        jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_GROUP_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_GROUP_KEY)));
    }
    // Add flow execution id for this compilation
    long flowExecutionId = System.currentTimeMillis();
    jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, ConfigValueFactory.fromAnyRef(flowExecutionId)));
    // Reset properties in Spec from Config
    jobSpec.setConfigAsProperties(ConfigUtils.configToProperties(jobSpec.getConfig()));
    return jobSpec;
}
Also used : SpecNotFoundException(org.apache.gobblin.runtime.api.SpecNotFoundException) ResolvedJobSpec(org.apache.gobblin.runtime.job_spec.ResolvedJobSpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) ResolvedJobSpec(org.apache.gobblin.runtime.job_spec.ResolvedJobSpec)

Example 33 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class IdentityFlowToJobSpecCompilerTest method testCompilerWithTemplateCatalog.

@Test
public void testCompilerWithTemplateCatalog() {
    FlowSpec flowSpec = initFlowSpec();
    // Run compiler on flowSpec
    Map<Spec, SpecExecutor> specExecutorMapping = this.compilerWithTemplateCalague.compileFlow(flowSpec);
    // Assert pre-requisites
    Assert.assertNotNull(specExecutorMapping, "Expected non null mapping.");
    Assert.assertTrue(specExecutorMapping.size() == 1, "Exepected 1 executor for FlowSpec.");
    // Assert FlowSpec compilation
    Spec spec = specExecutorMapping.keySet().iterator().next();
    Assert.assertTrue(spec instanceof JobSpec, "Expected JobSpec compiled from FlowSpec.");
    // Assert JobSpec properties
    JobSpec jobSpec = (JobSpec) spec;
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty1"), "testValue1");
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty2"), "test.Value1");
    Assert.assertEquals(jobSpec.getConfig().getString("testProperty3"), "100");
    Assert.assertEquals(jobSpec.getConfig().getString(ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY), TEST_SOURCE_NAME);
    Assert.assertFalse(jobSpec.getConfig().hasPath(ConfigurationKeys.JOB_SCHEDULE_KEY));
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_NAME_KEY), TEST_FLOW_NAME);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_GROUP_KEY), TEST_FLOW_GROUP);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_NAME_KEY), TEST_FLOW_NAME);
    Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_GROUP_KEY), TEST_FLOW_GROUP);
    Assert.assertTrue(jobSpec.getConfig().hasPath(ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
}
Also used : FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) InMemorySpecExecutor(org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor) JobSpec(org.apache.gobblin.runtime.api.JobSpec) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Spec(org.apache.gobblin.runtime.api.Spec) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Test(org.testng.annotations.Test)

Example 34 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class TestQuartzJobSpecScheduler method testSchedule.

@Test
public void testSchedule() throws Exception {
    final Logger log = LoggerFactory.getLogger(getClass().getName() + ".testSchedule");
    Config quartzCfg = ConfigFactory.parseMap(ImmutableMap.<String, Object>builder().put("org.quartz.scheduler.instanceName", "TestQuartzJobSpecScheduler.testSchedule").put("org.quartz.threadPool.threadCount", "10").put("org.quartz.jobStore.class", "org.quartz.simpl.RAMJobStore").build());
    QuartzJobSpecScheduler scheduler = new QuartzJobSpecScheduler(log, quartzCfg);
    scheduler.startAsync();
    scheduler.awaitRunning(10, TimeUnit.SECONDS);
    Assert.assertTrue(scheduler._scheduler.getScheduler().isStarted());
    final ArrayBlockingQueue<JobSpec> expectedCalls = new ArrayBlockingQueue<>(100);
    try {
        Config jobCfg1 = ConfigFactory.parseMap(ImmutableMap.<String, Object>builder().put(ConfigurationKeys.JOB_SCHEDULE_KEY, "0/5 * * * * ?").build());
        Config jobCfg2 = ConfigFactory.parseMap(ImmutableMap.<String, Object>builder().put(ConfigurationKeys.JOB_SCHEDULE_KEY, "3/5 * * * * ?").build());
        final JobSpec js1 = JobSpec.builder("test.job1").withConfig(jobCfg1).build();
        final JobSpec js2 = JobSpec.builder("test.job2").withConfig(jobCfg2).build();
        final JobSpec js1_2 = JobSpec.builder("test.job1").withConfig(jobCfg1).withVersion("2").build();
        JobSpecSchedule jss1 = scheduler.scheduleJob(js1, new Runnable() {

            @Override
            public void run() {
                expectedCalls.offer(js1);
            }
        });
        Assert.assertEquals(scheduler.getSchedules().size(), 1);
        Assert.assertEquals(jss1.getJobSpec(), js1);
        Assert.assertTrue(jss1 instanceof QuartzJobSchedule);
        QuartzJobSchedule qjss1 = (QuartzJobSchedule) jss1;
        Assert.assertNotNull(scheduler._scheduler.getScheduler().getJobDetail(qjss1.getQuartzTrigger().getJobKey()));
        Assert.assertNotNull(scheduler._scheduler.getScheduler().getTrigger(qjss1.getQuartzTrigger().getKey()));
        Assert.assertTrue(qjss1.getQuartzTrigger().mayFireAgain());
        // Wait for the next run
        JobSpec expJs1 = expectedCalls.poll(6000, TimeUnit.MILLISECONDS);
        Assert.assertEquals(expJs1, js1);
        // Wait for the next run
        expJs1 = expectedCalls.poll(6000, TimeUnit.MILLISECONDS);
        Assert.assertEquals(expJs1, js1);
        // Schedule another job
        JobSpecSchedule jss2 = scheduler.scheduleJob(js2, new Runnable() {

            @Override
            public void run() {
                expectedCalls.offer(js2);
            }
        });
        Assert.assertEquals(scheduler.getSchedules().size(), 2);
        Assert.assertEquals(jss2.getJobSpec(), js2);
        // Wait for the next run -- we should get js2
        JobSpec expJs2 = expectedCalls.poll(6000, TimeUnit.MILLISECONDS);
        Assert.assertEquals(expJs2, js2);
        // Wait for the next run -- we should get js1
        expJs1 = expectedCalls.poll(6000, TimeUnit.MILLISECONDS);
        Assert.assertEquals(expJs1, js1);
        // Wait for the next run -- we should get js2
        expJs2 = expectedCalls.poll(6000, TimeUnit.MILLISECONDS);
        log.info("Found call: " + expJs2);
        Assert.assertEquals(expJs2, js2);
        // Update the first job
        QuartzJobSchedule qjss1_2 = (QuartzJobSchedule) scheduler.scheduleJob(js1_2, new Runnable() {

            @Override
            public void run() {
                expectedCalls.offer(js1_2);
            }
        });
        Assert.assertEquals(scheduler.getSchedules().size(), 2);
        // Wait for 5 seconds -- we should see at least 2 runs of js1_2 and js2
        Thread.sleep(15000);
        int js1_2_cnt = 0;
        int js2_cnt = 0;
        for (JobSpec nextJs : expectedCalls) {
            log.info("Found call: " + nextJs);
            if (js1_2.equals(nextJs)) {
                ++js1_2_cnt;
            } else if (js2.equals(nextJs)) {
                ++js2_cnt;
            } else {
                Assert.fail("Unexpected job spec: " + nextJs);
            }
        }
        Assert.assertTrue(js1_2_cnt >= 2, "js1_2_cnt=" + js1_2_cnt);
        Assert.assertTrue(js2_cnt >= 2, "js2_cnt=" + js2_cnt);
        scheduler.unscheduleJob(js1_2.getUri());
        Assert.assertEquals(scheduler.getSchedules().size(), 1);
        Assert.assertFalse(scheduler._scheduler.getScheduler().checkExists(qjss1_2.getQuartzTrigger().getJobKey()));
        // Flush calls
        Thread.sleep(1000);
        expectedCalls.clear();
        // All subsequent calls should be for js2
        for (int i = 0; i < 2; ++i) {
            JobSpec nextJs = expectedCalls.poll(12000, TimeUnit.MILLISECONDS);
            Assert.assertEquals(nextJs, js2);
        }
    } finally {
        scheduler.stopAsync();
        scheduler.awaitTerminated(10, TimeUnit.SECONDS);
    }
    // we may have to drain at most one call due to race conditions
    if (null != expectedCalls.poll(2100, TimeUnit.MILLISECONDS)) {
        Assert.assertNull(expectedCalls.poll(3000, TimeUnit.MILLISECONDS));
    }
}
Also used : ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) Config(com.typesafe.config.Config) QuartzJobSchedule(org.apache.gobblin.runtime.scheduler.QuartzJobSpecScheduler.QuartzJobSchedule) JobSpecSchedule(org.apache.gobblin.runtime.api.JobSpecSchedule) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Logger(org.slf4j.Logger) Test(org.testng.annotations.Test)

Example 35 with JobSpec

use of org.apache.gobblin.runtime.api.JobSpec in project incubator-gobblin by apache.

the class TestFilteredJobLifecycleListener method testSimple.

@Test
public void testSimple() {
    Config config = ConfigFactory.empty().withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef("myJob"));
    JobSpec js1_1 = JobSpec.builder("gobblin:/testSimple/job1").withVersion("1").withConfig(config).build();
    JobSpec js1_2 = JobSpec.builder("gobblin:/testSimple/job1").withVersion("2").withConfig(config).build();
    JobLifecycleListener mockListener = mock(JobLifecycleListener.class);
    FilteredJobLifecycleListener testListener = new FilteredJobLifecycleListener(JobSpecFilter.builder().eqURI("gobblin:/testSimple/job1").eqVersion("2").build(), mockListener);
    JobExecutionState jss1_1 = new JobExecutionState(js1_1, JobExecutionUpdatable.createFromJobSpec(js1_1), Optional.<JobExecutionStateListener>absent());
    JobExecutionState jss1_2 = new JobExecutionState(js1_2, JobExecutionUpdatable.createFromJobSpec(js1_2), Optional.<JobExecutionStateListener>absent());
    testListener.onAddJob(js1_1);
    testListener.onDeleteJob(js1_1.getUri(), js1_1.getVersion());
    testListener.onUpdateJob(js1_1);
    ;
    testListener.onStatusChange(jss1_1, RunningState.PENDING, RunningState.RUNNING);
    testListener.onStageTransition(jss1_1, "Stage1", "Stage2");
    testListener.onMetadataChange(jss1_1, "metaKey", "value1", "value2");
    testListener.onAddJob(js1_2);
    testListener.onDeleteJob(js1_2.getUri(), js1_2.getVersion());
    testListener.onUpdateJob(js1_2);
    testListener.onStatusChange(jss1_2, RunningState.RUNNING, RunningState.SUCCESSFUL);
    testListener.onStageTransition(jss1_2, "Stage1", "Stage2");
    testListener.onMetadataChange(jss1_2, "metaKey", "value1", "value2");
    verify(mockListener).onAddJob(eq(js1_2));
    verify(mockListener).onDeleteJob(eq(js1_2.getUri()), eq(js1_2.getVersion()));
    verify(mockListener).onUpdateJob(eq(js1_2));
    verify(mockListener).onStatusChange(eq(jss1_2), eq(RunningState.RUNNING), eq(RunningState.SUCCESSFUL));
    verify(mockListener).onStageTransition(eq(jss1_2), eq("Stage1"), eq("Stage2"));
    verify(mockListener).onMetadataChange(eq(jss1_2), eq("metaKey"), eq("value1"), eq("value2"));
    verify(mockListener, never()).onAddJob(eq(js1_1));
    verify(mockListener, never()).onDeleteJob(eq(js1_1.getUri()), eq(js1_1.getVersion()));
    verify(mockListener, never()).onUpdateJob(eq(js1_1));
    verify(mockListener, never()).onStatusChange(eq(jss1_1), eq(RunningState.RUNNING), eq(RunningState.SUCCESSFUL));
    verify(mockListener, never()).onStatusChange(eq(jss1_1), eq(RunningState.PENDING), eq(RunningState.RUNNING));
    verify(mockListener, never()).onStageTransition(eq(jss1_1), eq("Stage1"), eq("Stage2"));
    verify(mockListener, never()).onMetadataChange(eq(jss1_1), eq("metaKey"), eq("value1"), eq("value2"));
}
Also used : Config(com.typesafe.config.Config) JobExecutionState(org.apache.gobblin.runtime.api.JobExecutionState) JobLifecycleListener(org.apache.gobblin.runtime.api.JobLifecycleListener) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Test(org.testng.annotations.Test)

Aggregations

JobSpec (org.apache.gobblin.runtime.api.JobSpec)52 Test (org.testng.annotations.Test)34 URI (java.net.URI)18 Properties (java.util.Properties)14 Spec (org.apache.gobblin.runtime.api.Spec)11 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)11 ResolvedJobSpec (org.apache.gobblin.runtime.job_spec.ResolvedJobSpec)9 Map (java.util.Map)8 Pair (org.apache.commons.lang3.tuple.Pair)8 Config (com.typesafe.config.Config)7 Logger (org.slf4j.Logger)7 JobCatalogListener (org.apache.gobblin.runtime.api.JobCatalogListener)6 WriteResponse (org.apache.gobblin.writer.WriteResponse)6 IOException (java.io.IOException)5 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)5 JobExecutionDriver (org.apache.gobblin.runtime.api.JobExecutionDriver)5 JobExecutionResult (org.apache.gobblin.runtime.api.JobExecutionResult)5 JobLifecycleListener (org.apache.gobblin.runtime.api.JobLifecycleListener)5 Path (org.apache.hadoop.fs.Path)5 SpecNotFoundException (org.apache.gobblin.runtime.api.SpecNotFoundException)4