Search in sources :

Example 31 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class DagManager method submitEventsAndSetStatus.

private void submitEventsAndSetStatus(Dag<JobExecutionPlan> dag) {
    if (this.eventSubmitter.isPresent()) {
        for (DagNode<JobExecutionPlan> dagNode : dag.getNodes()) {
            JobExecutionPlan jobExecutionPlan = DagManagerUtils.getJobExecutionPlan(dagNode);
            Map<String, String> jobMetadata = TimingEventUtils.getJobMetadata(Maps.newHashMap(), jobExecutionPlan);
            this.eventSubmitter.get().getTimingEvent(TimingEvent.LauncherTimings.JOB_PENDING).stop(jobMetadata);
            jobExecutionPlan.setExecutionStatus(PENDING);
        }
    }
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan)

Example 32 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class DagManager method setActive.

/**
 * When a {@link DagManager} becomes active, it loads the serialized representations of the currently running {@link Dag}s
 * from the checkpoint directory, deserializes the {@link Dag}s and adds them to a queue to be consumed by
 * the {@link DagManagerThread}s.
 * @param active a boolean to indicate if the {@link DagManager} is the leader.
 */
public synchronized void setActive(boolean active) {
    if (this.isActive == active) {
        log.info("DagManager already {}, skipping further actions.", (!active) ? "inactive" : "active");
        return;
    }
    this.isActive = active;
    try {
        if (this.isActive) {
            log.info("Activating DagManager.");
            log.info("Scheduling {} DagManager threads", numThreads);
            // Initializing state store for persisting Dags.
            this.dagStateStore = createDagStateStore(config, topologySpecMap);
            DagStateStore failedDagStateStore = createDagStateStore(ConfigUtils.getConfigOrEmpty(config, FAILED_DAG_STATESTORE_PREFIX).withFallback(config), topologySpecMap);
            Set<String> failedDagIds = Collections.synchronizedSet(failedDagStateStore.getDagIds());
            ContextAwareMeter allSuccessfulMeter = null;
            ContextAwareMeter allFailedMeter = null;
            if (instrumentationEnabled) {
                MetricContext metricContext = Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()), getClass());
                allSuccessfulMeter = metricContext.contextAwareMeter(MetricRegistry.name(ServiceMetricNames.GOBBLIN_SERVICE_PREFIX, ServiceMetricNames.SUCCESSFUL_FLOW_METER));
                allFailedMeter = metricContext.contextAwareMeter(MetricRegistry.name(ServiceMetricNames.GOBBLIN_SERVICE_PREFIX, ServiceMetricNames.FAILED_FLOW_METER));
            }
            // On startup, the service creates DagManagerThreads that are scheduled at a fixed rate.
            this.dagManagerThreads = new DagManagerThread[numThreads];
            for (int i = 0; i < numThreads; i++) {
                DagManagerThread dagManagerThread = new DagManagerThread(jobStatusRetriever, dagStateStore, failedDagStateStore, runQueue[i], cancelQueue[i], resumeQueue[i], instrumentationEnabled, defaultQuota, perUserQuota, failedDagIds, allSuccessfulMeter, allFailedMeter, this.defaultJobStartSlaTimeMillis);
                this.dagManagerThreads[i] = dagManagerThread;
                this.scheduledExecutorPool.scheduleAtFixedRate(dagManagerThread, 0, this.pollingInterval, TimeUnit.SECONDS);
            }
            FailedDagRetentionThread failedDagRetentionThread = new FailedDagRetentionThread(failedDagStateStore, failedDagIds, failedDagRetentionTime);
            this.scheduledExecutorPool.scheduleAtFixedRate(failedDagRetentionThread, 0, retentionPollingInterval, TimeUnit.MINUTES);
            List<Dag<JobExecutionPlan>> dags = dagStateStore.getDags();
            log.info("Loading " + dags.size() + " dags from dag state store");
            for (Dag<JobExecutionPlan> dag : dags) {
                addDag(dag, false, false);
            }
        } else {
            // Mark the DagManager inactive.
            log.info("Inactivating the DagManager. Shutting down all DagManager threads");
            this.scheduledExecutorPool.shutdown();
            // The DMThread's metrics mappings follow the lifecycle of the DMThread itself and so are lost by DM deactivation-reactivation but the RootMetricContext is a (persistent) singleton.
            // To avoid IllegalArgumentException by the RMC preventing (re-)add of a metric already known, remove all metrics that a new DMThread thread would attempt to add (in DagManagerThread::initialize) whenever running post-re-enablement
            RootMetricContext.get().removeMatching(getMetricsFilterForDagManager());
            try {
                this.scheduledExecutorPool.awaitTermination(TERMINATION_TIMEOUT, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                log.error("Exception encountered when shutting down DagManager threads.", e);
            }
        }
    } catch (IOException e) {
        log.error("Exception encountered when activating the new DagManager", e);
        throw new RuntimeException(e);
    }
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) IOException(java.io.IOException) RootMetricContext(org.apache.gobblin.metrics.RootMetricContext) MetricContext(org.apache.gobblin.metrics.MetricContext) ContextAwareMeter(org.apache.gobblin.metrics.ContextAwareMeter)

Example 33 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class MysqlDagStateStore method convertDagIntoState.

/**
 * For {@link Dag} to work with {@link MysqlStateStore}, it needs to be packaged into a {@link State} object.
 * The way that it does is simply serialize the {@link Dag} first and use the key {@link #DAG_KEY_IN_STATE}
 * to be pair with it.
 *
 * The serialization step is required for readability and portability of serde lib.
 * @param dag The dag to be converted.
 * @return An {@link State} object that contains a single k-v pair for {@link Dag}.
 */
private State convertDagIntoState(Dag<JobExecutionPlan> dag) {
    State outputState = new State();
    // Make sure the object has been serialized.
    List<JobExecutionPlan> jobExecutionPlanList = dag.getNodes().stream().map(Dag.DagNode::getValue).collect(Collectors.toList());
    outputState.setProp(DAG_KEY_IN_STATE, serDe.serialize(jobExecutionPlanList));
    return outputState;
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) State(org.apache.gobblin.configuration.State) Dag(org.apache.gobblin.service.modules.flowgraph.Dag)

Example 34 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class Orchestrator method deleteFromExecutor.

private void deleteFromExecutor(Spec spec, Properties headers) {
    Dag<JobExecutionPlan> jobExecutionPlanDag = specCompiler.compileFlow(spec);
    if (jobExecutionPlanDag.isEmpty()) {
        _log.warn("Cannot determine an executor to delete Spec: " + spec);
        return;
    }
    // Delete all compiled JobSpecs on their respective Executor
    for (Dag.DagNode<JobExecutionPlan> dagNode : jobExecutionPlanDag.getNodes()) {
        JobExecutionPlan jobExecutionPlan = dagNode.getValue();
        Spec jobSpec = jobExecutionPlan.getJobSpec();
        try {
            SpecProducer<Spec> producer = jobExecutionPlan.getSpecExecutor().getProducer().get();
            _log.info(String.format("Going to delete JobSpec: %s on Executor: %s", jobSpec, producer));
            producer.deleteSpec(jobSpec.getUri(), headers);
        } catch (Exception e) {
            _log.error(String.format("Could not delete JobSpec: %s for flow: %s", jobSpec, spec), e);
        }
    }
}
Also used : JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) Dag(org.apache.gobblin.service.modules.flowgraph.Dag) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) TopologySpec(org.apache.gobblin.runtime.api.TopologySpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Spec(org.apache.gobblin.runtime.api.Spec) InvocationTargetException(java.lang.reflect.InvocationTargetException) IOException(java.io.IOException)

Example 35 with JobExecutionPlan

use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.

the class GobblinServiceJobScheduler method onAddSpec.

/**
 * @param addedSpec spec to be added
 * @return add spec response, which contains <code>null</code> if there is an error
 */
@Override
public AddSpecResponse onAddSpec(Spec addedSpec) {
    if (this.helixManager.isPresent() && !this.helixManager.get().isConnected()) {
        // Specs in store will be notified when Scheduler is added as listener to FlowCatalog, so ignore
        // .. Specs if in cluster mode and Helix is not yet initialized
        _log.info("System not yet initialized. Skipping Spec Addition: " + addedSpec);
        return null;
    }
    _log.info("New Flow Spec detected: " + addedSpec);
    if (!(addedSpec instanceof FlowSpec)) {
        return null;
    }
    FlowSpec flowSpec = (FlowSpec) addedSpec;
    URI flowSpecUri = flowSpec.getUri();
    Properties jobConfig = createJobConfig(flowSpec);
    boolean isExplain = flowSpec.isExplain();
    String response = null;
    // always try to compile the flow to verify if it is compilable
    Dag<JobExecutionPlan> dag = this.orchestrator.getSpecCompiler().compileFlow(flowSpec);
    // If dag is null then a compilation error has occurred
    if (dag != null && !dag.isEmpty()) {
        response = dag.toString();
    }
    boolean compileSuccess = FlowCatalog.isCompileSuccessful(response);
    if (isExplain || !compileSuccess || !this.isActive) {
        // todo: in case of a scheduled job, we should also check if the job schedule is a valid cron schedule
        // so it can be scheduled
        _log.info("Ignoring the spec {}. isExplain: {}, compileSuccess: {}, master: {}", addedSpec, isExplain, compileSuccess, this.isActive);
        return new AddSpecResponse<>(response);
    }
    // todo : we should probably not schedule a flow if it is a runOnce flow
    this.scheduledFlowSpecs.put(flowSpecUri.toString(), addedSpec);
    if (jobConfig.containsKey(ConfigurationKeys.JOB_SCHEDULE_KEY)) {
        _log.info("{} Scheduling flow spec: {} ", this.serviceName, addedSpec);
        try {
            scheduleJob(jobConfig, null);
        } catch (JobException je) {
            _log.error("{} Failed to schedule or run FlowSpec {}", serviceName, addedSpec, je);
            this.scheduledFlowSpecs.remove(addedSpec.getUri().toString());
            return null;
        }
        if (PropertiesUtils.getPropAsBoolean(jobConfig, ConfigurationKeys.FLOW_RUN_IMMEDIATELY, "false")) {
            _log.info("RunImmediately requested, hence executing FlowSpec: " + addedSpec);
            this.jobExecutor.execute(new NonScheduledJobRunner(flowSpecUri, false, jobConfig, null));
        }
    } else {
        _log.info("No FlowSpec schedule found, so running FlowSpec: " + addedSpec);
        this.jobExecutor.execute(new NonScheduledJobRunner(flowSpecUri, true, jobConfig, null));
    }
    return new AddSpecResponse<>(response);
}
Also used : UnableToInterruptJobException(org.quartz.UnableToInterruptJobException) JobException(org.apache.gobblin.runtime.JobException) JobExecutionPlan(org.apache.gobblin.service.modules.spec.JobExecutionPlan) FlowSpec(org.apache.gobblin.runtime.api.FlowSpec) Properties(java.util.Properties) URI(java.net.URI) AddSpecResponse(org.apache.gobblin.runtime.spec_catalog.AddSpecResponse)

Aggregations

JobExecutionPlan (org.apache.gobblin.service.modules.spec.JobExecutionPlan)39 Config (com.typesafe.config.Config)22 FlowSpec (org.apache.gobblin.runtime.api.FlowSpec)21 Test (org.testng.annotations.Test)21 JobSpec (org.apache.gobblin.runtime.api.JobSpec)15 ArrayList (java.util.ArrayList)12 Dag (org.apache.gobblin.service.modules.flowgraph.Dag)12 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)10 AzkabanProjectConfig (org.apache.gobblin.service.modules.orchestration.AzkabanProjectConfig)8 JobExecutionPlanDagFactory (org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory)8 URI (java.net.URI)7 Spec (org.apache.gobblin.runtime.api.Spec)6 TopologySpec (org.apache.gobblin.runtime.api.TopologySpec)6 IOException (java.io.IOException)5 DagNode (org.apache.gobblin.service.modules.flowgraph.Dag.DagNode)5 File (java.io.File)4 HashSet (java.util.HashSet)4 Path (org.apache.hadoop.fs.Path)4 Joiner (com.google.common.base.Joiner)3 Optional (com.google.common.base.Optional)3