use of com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent in project titus-control-plane by Netflix.
the class JobReconciliationFrameworkFactory method newInstance.
ReconciliationFramework<JobManagerReconcilerEvent> newInstance() {
List<Pair<Job, List<Task>>> jobsAndTasks = loadJobsAndTasksFromStore(errorCollector);
// initialize fenzo with running tasks
List<InternalReconciliationEngine<JobManagerReconcilerEvent>> engines = new ArrayList<>();
for (Pair<Job, List<Task>> pair : jobsAndTasks) {
Job job = pair.getLeft();
List<Task> tasks = pair.getRight();
InternalReconciliationEngine<JobManagerReconcilerEvent> engine = newRestoredEngine(job, tasks);
engines.add(engine);
for (Task task : tasks) {
Optional<Task> validatedTask = validateTask(task);
if (!validatedTask.isPresent()) {
errorCollector.invalidTaskRecord(task.getId());
}
}
}
errorCollector.failIfTooManyBadRecords();
return new DefaultReconciliationFramework<>(engines, bootstrapModel -> newEngine(bootstrapModel, true), jobManagerConfiguration.getReconcilerIdleTimeoutMs(), jobManagerConfiguration.getReconcilerActiveTimeoutMs(), jobManagerConfiguration.getCheckpointIntervalMs(), INDEX_COMPARATORS, JOB_EVENT_FACTORY, registry, optionalScheduler);
}
use of com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent in project titus-control-plane by Netflix.
the class DefaultV3JobOperations method moveServiceTask.
@Override
public Observable<Void> moveServiceTask(String sourceJobId, String targetJobId, String taskId, CallMetadata callMetadata) {
return Observable.defer(() -> {
Pair<ReconciliationEngine<JobManagerReconcilerEvent>, EntityHolder> fromEngineTaskPair = reconciliationFramework.findEngineByChildId(taskId).orElseThrow(() -> JobManagerException.taskNotFound(taskId));
ReconciliationEngine<JobManagerReconcilerEvent> engineFrom = fromEngineTaskPair.getLeft();
Job<ServiceJobExt> jobFrom = engineFrom.getReferenceView().getEntity();
if (!JobFunctions.isServiceJob(jobFrom)) {
throw JobManagerException.notServiceJob(jobFrom.getId());
}
if (!jobFrom.getId().equals(sourceJobId)) {
throw JobManagerException.taskJobMismatch(taskId, sourceJobId);
}
if (jobFrom.getId().equals(targetJobId)) {
throw JobManagerException.sameJobs(jobFrom.getId());
}
ReconciliationEngine<JobManagerReconcilerEvent> engineTo = reconciliationFramework.findEngineByRootId(targetJobId).orElseThrow(() -> JobManagerException.jobNotFound(targetJobId));
Job<ServiceJobExt> jobTo = engineTo.getReferenceView().getEntity();
if (!JobFunctions.isServiceJob(jobTo)) {
throw JobManagerException.notServiceJob(jobTo.getId());
}
JobCompatibility compatibility = JobCompatibility.of(jobFrom, jobTo);
if (featureActivationConfiguration.isMoveTaskValidationEnabled() && !compatibility.isCompatible()) {
Optional<String> diffReport = ProtobufExt.diffReport(GrpcJobManagementModelConverters.toGrpcJobDescriptor(compatibility.getNormalizedDescriptorFrom()), GrpcJobManagementModelConverters.toGrpcJobDescriptor(compatibility.getNormalizedDescriptorTo()));
throw JobManagerException.notCompatible(jobFrom, jobTo, diffReport.orElse(""));
}
return reconciliationFramework.changeReferenceModel(new MoveTaskBetweenJobsAction(engineFrom, engineTo, taskId, store, callMetadata, versionSupplier), (rootId, modelUpdatesObservable) -> {
String name;
String summary;
if (targetJobId.equals(rootId)) {
name = "moveTask(to)";
summary = "Moving a task to this job from job " + jobFrom.getId();
} else {
name = "moveTask(from)";
summary = "Moving a task out of this job to job " + jobTo.getId();
}
return new TitusChangeAction(Trigger.API, rootId, null, name, summary, callMetadata) {
@Override
public Observable<List<ModelActionHolder>> apply() {
return modelUpdatesObservable;
}
};
}, jobFrom.getId(), jobTo.getId());
});
}
use of com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent in project titus-control-plane by Netflix.
the class JobTransactionLoggerTest method testLogFormatting.
/**
* Sole purpose of this test is visual inspection of the generated log line.
*/
@Test
public void testLogFormatting() throws Exception {
Job previousJob = createJob();
Job currentJob = previousJob.toBuilder().withStatus(JobStatus.newBuilder().withState(JobState.Finished).build()).build();
ModelActionHolder modelActionHolder = ModelActionHolder.reference(TitusModelAction.newModelUpdate("testModelAction").job(previousJob).trigger(Trigger.API).summary("Job model update").jobUpdate(jobHolder -> jobHolder.setEntity(currentJob)));
TitusChangeAction changeAction = TitusChangeAction.newAction("testChangeAction").job(previousJob).trigger(Trigger.API).summary("Job update").callMetadata(CallMetadata.newBuilder().withCallerId("LoggerTest").withCallReason("Testing logger transaction").build()).applyModelUpdate(self -> modelActionHolder);
JobManagerReconcilerEvent jobReconcilerEvent = new JobModelUpdateReconcilerEvent(previousJob, changeAction, modelActionHolder, EntityHolder.newRoot(currentJob.getId(), currentJob), Optional.of(EntityHolder.newRoot(previousJob.getId(), previousJob)), "1");
String logLine = JobTransactionLogger.doFormat(jobReconcilerEvent);
assertThat(logLine).isNotEmpty();
logger.info("Job event: {}", logLine);
}
use of com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent in project titus-control-plane by Netflix.
the class DefaultV3JobOperations method enterActiveMode.
@Activator
public void enterActiveMode() {
this.reconciliationFramework = jobReconciliationFrameworkFactory.newInstance();
// BUG: event stream breaks permanently, and cannot be retried.
// As we cannot fix the underlying issue yet, we have to be able to discover when it happens.
AtomicLong eventStreamLastError = new AtomicLong();
Clock clock = titusRuntime.getClock();
this.transactionLoggerSubscription = JobTransactionLogger.logEvents(reconciliationFramework, eventStreamLastError, clock);
PolledMeter.using(titusRuntime.getRegistry()).withName(METRIC_EVENT_STREAM_LAST_ERROR).monitorValue(eventStreamLastError, value -> value.get() <= 0 ? 0 : clock.wallTime() - value.get());
// Remove finished jobs from the reconciliation framework.
Observable<JobManagerReconcilerEvent> reconciliationEventsObservable = reconciliationFramework.events().onBackpressureBuffer(OBSERVE_JOBS_BACKPRESSURE_BUFFER_SIZE, () -> logger.warn("Overflowed the buffer size: " + OBSERVE_JOBS_BACKPRESSURE_BUFFER_SIZE), BackpressureOverflow.ON_OVERFLOW_ERROR).doOnSubscribe(() -> {
List<EntityHolder> entityHolders = reconciliationFramework.orderedView(IndexKind.StatusCreationTime);
for (EntityHolder entityHolder : entityHolders) {
handleJobCompletedEvent(entityHolder);
}
});
this.reconcilerEventSubscription = titusRuntime.persistentStream(reconciliationEventsObservable).subscribe(event -> {
if (event instanceof JobModelUpdateReconcilerEvent) {
JobModelUpdateReconcilerEvent jobUpdateEvent = (JobModelUpdateReconcilerEvent) event;
handleJobCompletedEvent(jobUpdateEvent.getChangedEntityHolder());
}
}, e -> logger.error("Event stream terminated with an error", e), () -> logger.info("Event stream completed"));
reconciliationFramework.start();
}
use of com.netflix.titus.master.jobmanager.service.event.JobManagerReconcilerEvent in project titus-control-plane by Netflix.
the class DefaultV3JobOperations method updateTask.
@Override
public Completable updateTask(String taskId, Function<Task, Optional<Task>> changeFunction, Trigger trigger, String reason, CallMetadata callMetadata) {
Optional<ReconciliationEngine<JobManagerReconcilerEvent>> engineOpt = reconciliationFramework.findEngineByChildId(taskId).map(Pair::getLeft);
if (!engineOpt.isPresent()) {
return Completable.error(JobManagerException.taskNotFound(taskId));
}
ReconciliationEngine<JobManagerReconcilerEvent> engine = engineOpt.get();
TitusChangeAction changeAction = BasicTaskActions.updateTaskInRunningModel(taskId, trigger, jobManagerConfiguration, engine, changeFunction, reason, versionSupplier, titusRuntime, callMetadata);
return engine.changeReferenceModel(changeAction, taskId).toCompletable();
}
Aggregations