use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class DagManager method setActive.
/**
* When a {@link DagManager} becomes active, it loads the serialized representations of the currently running {@link Dag}s
* from the checkpoint directory, deserializes the {@link Dag}s and adds them to a queue to be consumed by
* the {@link DagManagerThread}s.
* @param active a boolean to indicate if the {@link DagManager} is the leader.
*/
public synchronized void setActive(boolean active) {
if (this.isActive == active) {
log.info("DagManager already {}, skipping further actions.", (!active) ? "inactive" : "active");
return;
}
this.isActive = active;
try {
if (this.isActive) {
log.info("Activating DagManager.");
log.info("Scheduling {} DagManager threads", numThreads);
// Initializing state store for persisting Dags.
this.dagStateStore = createDagStateStore(config, topologySpecMap);
DagStateStore failedDagStateStore = createDagStateStore(ConfigUtils.getConfigOrEmpty(config, FAILED_DAG_STATESTORE_PREFIX).withFallback(config), topologySpecMap);
Set<String> failedDagIds = Collections.synchronizedSet(failedDagStateStore.getDagIds());
ContextAwareMeter allSuccessfulMeter = null;
ContextAwareMeter allFailedMeter = null;
if (instrumentationEnabled) {
MetricContext metricContext = Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()), getClass());
allSuccessfulMeter = metricContext.contextAwareMeter(MetricRegistry.name(ServiceMetricNames.GOBBLIN_SERVICE_PREFIX, ServiceMetricNames.SUCCESSFUL_FLOW_METER));
allFailedMeter = metricContext.contextAwareMeter(MetricRegistry.name(ServiceMetricNames.GOBBLIN_SERVICE_PREFIX, ServiceMetricNames.FAILED_FLOW_METER));
}
// On startup, the service creates DagManagerThreads that are scheduled at a fixed rate.
this.dagManagerThreads = new DagManagerThread[numThreads];
for (int i = 0; i < numThreads; i++) {
DagManagerThread dagManagerThread = new DagManagerThread(jobStatusRetriever, dagStateStore, failedDagStateStore, runQueue[i], cancelQueue[i], resumeQueue[i], instrumentationEnabled, defaultQuota, perUserQuota, failedDagIds, allSuccessfulMeter, allFailedMeter, this.defaultJobStartSlaTimeMillis);
this.dagManagerThreads[i] = dagManagerThread;
this.scheduledExecutorPool.scheduleAtFixedRate(dagManagerThread, 0, this.pollingInterval, TimeUnit.SECONDS);
}
FailedDagRetentionThread failedDagRetentionThread = new FailedDagRetentionThread(failedDagStateStore, failedDagIds, failedDagRetentionTime);
this.scheduledExecutorPool.scheduleAtFixedRate(failedDagRetentionThread, 0, retentionPollingInterval, TimeUnit.MINUTES);
List<Dag<JobExecutionPlan>> dags = dagStateStore.getDags();
log.info("Loading " + dags.size() + " dags from dag state store");
for (Dag<JobExecutionPlan> dag : dags) {
addDag(dag, false, false);
}
} else {
// Mark the DagManager inactive.
log.info("Inactivating the DagManager. Shutting down all DagManager threads");
this.scheduledExecutorPool.shutdown();
// The DMThread's metrics mappings follow the lifecycle of the DMThread itself and so are lost by DM deactivation-reactivation but the RootMetricContext is a (persistent) singleton.
// To avoid IllegalArgumentException by the RMC preventing (re-)add of a metric already known, remove all metrics that a new DMThread thread would attempt to add (in DagManagerThread::initialize) whenever running post-re-enablement
RootMetricContext.get().removeMatching(getMetricsFilterForDagManager());
try {
this.scheduledExecutorPool.awaitTermination(TERMINATION_TIMEOUT, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.error("Exception encountered when shutting down DagManager threads.", e);
}
}
} catch (IOException e) {
log.error("Exception encountered when activating the new DagManager", e);
throw new RuntimeException(e);
}
}
use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class MysqlDagStateStore method convertDagIntoState.
/**
* For {@link Dag} to work with {@link MysqlStateStore}, it needs to be packaged into a {@link State} object.
* The way that it does is simply serialize the {@link Dag} first and use the key {@link #DAG_KEY_IN_STATE}
* to be pair with it.
*
* The serialization step is required for readability and portability of serde lib.
* @param dag The dag to be converted.
* @return An {@link State} object that contains a single k-v pair for {@link Dag}.
*/
private State convertDagIntoState(Dag<JobExecutionPlan> dag) {
State outputState = new State();
// Make sure the object has been serialized.
List<JobExecutionPlan> jobExecutionPlanList = dag.getNodes().stream().map(Dag.DagNode::getValue).collect(Collectors.toList());
outputState.setProp(DAG_KEY_IN_STATE, serDe.serialize(jobExecutionPlanList));
return outputState;
}
use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class Orchestrator method deleteFromExecutor.
private void deleteFromExecutor(Spec spec, Properties headers) {
Dag<JobExecutionPlan> jobExecutionPlanDag = specCompiler.compileFlow(spec);
if (jobExecutionPlanDag.isEmpty()) {
_log.warn("Cannot determine an executor to delete Spec: " + spec);
return;
}
// Delete all compiled JobSpecs on their respective Executor
for (Dag.DagNode<JobExecutionPlan> dagNode : jobExecutionPlanDag.getNodes()) {
JobExecutionPlan jobExecutionPlan = dagNode.getValue();
Spec jobSpec = jobExecutionPlan.getJobSpec();
try {
SpecProducer<Spec> producer = jobExecutionPlan.getSpecExecutor().getProducer().get();
_log.info(String.format("Going to delete JobSpec: %s on Executor: %s", jobSpec, producer));
producer.deleteSpec(jobSpec.getUri(), headers);
} catch (Exception e) {
_log.error(String.format("Could not delete JobSpec: %s for flow: %s", jobSpec, spec), e);
}
}
}
use of org.apache.gobblin.service.modules.flowgraph.Dag in project incubator-gobblin by apache.
the class FlowGraphPath method concatenate.
/**
* Concatenate two {@link Dag}s. Modify the {@link ConfigurationKeys#JOB_DEPENDENCIES} in the {@link JobSpec}s of the child
* {@link Dag} to reflect the concatenation operation.
* @param dagLeft The parent dag.
* @param dagRight The child dag.
* @return The concatenated dag with modified {@link ConfigurationKeys#JOB_DEPENDENCIES}.
*/
@VisibleForTesting
static Dag<JobExecutionPlan> concatenate(Dag<JobExecutionPlan> dagLeft, Dag<JobExecutionPlan> dagRight) {
// Compute the fork nodes - set of nodes with no dependents in the concatenated dag.
Set<DagNode<JobExecutionPlan>> forkNodes = dagLeft.getEndNodes().stream().filter(endNode -> isNodeForkable(endNode)).collect(Collectors.toSet());
Set<DagNode<JobExecutionPlan>> dependencyNodes = dagLeft.getDependencyNodes(forkNodes);
if (!dependencyNodes.isEmpty()) {
List<String> dependenciesList = dependencyNodes.stream().map(dagNode -> dagNode.getValue().getJobSpec().getConfig().getString(ConfigurationKeys.JOB_NAME_KEY)).collect(Collectors.toList());
String dependencies = Joiner.on(",").join(dependenciesList);
for (DagNode<JobExecutionPlan> childNode : dagRight.getStartNodes()) {
JobSpec jobSpec = childNode.getValue().getJobSpec();
jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef(dependencies)));
}
}
return dagLeft.concatenate(dagRight, forkNodes);
}
Aggregations