use of org.bf2.srs.fleetmanager.execution.manager.Task in project srs-fleet-manager by bf2fc6cc711aee1a0c2a.
the class JobWrapper method execute.
@Override
@SneakyThrows
@ActivateRequestContext
public void execute(JobExecutionContext quartzJobContext) {
Task task = loadTask(quartzJobContext);
List<Worker> selectedWorkers = workers.stream().filter(w -> w.supports(task) && !workerExclusions.contains(w.getClass())).collect(toList());
for (Worker worker : selectedWorkers) {
WorkerContextImpl wCtx = loadWorkerContext(quartzJobContext, worker, task);
Instant next = null;
Exception lastException = null;
try {
log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Executing task.", task, worker, wCtx);
worker.execute(task, wCtx);
wCtx.getDelayedActions().forEach(Runnable::run);
// OK vvv
// Reset retry counter
wCtx.setRetryAttempts(0);
// Reset min retry counter
wCtx.setMinRetries(task.getSchedule().getMinRetries());
// Normal rescheduling
next = nextExecution(task);
} catch (Exception anEx) {
// TODO Throwable?
lastException = anEx;
if (anEx instanceof RetryExecutionControlException) {
log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Task requested a retry.", task, worker, wCtx, anEx);
RetryExecutionControlException ex = (RetryExecutionControlException) anEx;
if (ex.isForce() && wCtx.getMinRetries() < Integer.MAX_VALUE) {
// Make space for forced retry, no more than Integer.MAX_VALUE
wCtx.setMinRetries(wCtx.getMinRetries() + 1);
next = Instant.now().plus(Duration.ofSeconds(1));
}
if (ex.getMinRetries() > wCtx.getMinRetries()) {
wCtx.setMinRetries(ex.getMinRetries());
}
lastException = null;
}
if (wCtx.getRetryAttempts() < wCtx.getMinRetries() && (next == null)) {
// Reschedule if the minRetries is not reached
next = Instant.now().plus(backoff(wCtx.getRetryAttempts()));
}
if (anEx instanceof StopExecutionControlException) {
log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Task requested a stop.", task, worker, wCtx, anEx);
// Unschedule
next = null;
lastException = null;
}
if (lastException != null) {
log.warn("Task Manager (task = {}, worker = {}, workerContext = {}, nextExecution = {}): Task threw an exception during execution: {}", task, worker, wCtx, next, anEx);
}
wCtx.setRetryAttempts(wCtx.getRetryAttempts() + 1);
} finally {
// Unlikely used
wCtx.setDelayedActions(new ArrayList<>(0));
saveWorkerContext(quartzJobContext, wCtx, worker);
saveTask(quartzJobContext, task);
// Scheduling
if (next != null) {
if (wCtx.getRetryAttempts() == wCtx.getMinRetries()) {
log.info("Task Manager (task = {}, worker = {}, workerContext = {}): Last rescheduling at {}.", task, worker, wCtx, next);
} else {
log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Rescheduling task at {}.", task, worker, wCtx, next);
}
taskManager.rerigger(task, next);
} else {
try {
log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Executing finallyExecute. Last exception = {}", task, worker, wCtx, lastException);
worker.finallyExecute(task, wCtx, ofNullable(lastException));
wCtx.getDelayedActions().forEach(Runnable::run);
} catch (Exception ex) {
log.warn("Task Manager (task = {}, worker = {}, workerContext = {}): Ignoring an exception thrown in finallyExecute: {}", task, worker, wCtx, ex);
} finally {
log.debug("Task Manager (task = {}, worker = {}, workerContext = {}): Removing task.", task, worker, wCtx);
taskManager.remove(task);
}
}
}
}
}
use of org.bf2.srs.fleetmanager.execution.manager.Task in project srs-fleet-manager by bf2fc6cc711aee1a0c2a.
the class ProvisionRegistryTenantWorker method execute.
@Transactional
@Override
public void execute(Task aTask, WorkerContext ctl) throws RegistryStorageConflictException, TenantManagerServiceException {
// TODO Split along failure points?
ProvisionRegistryTenantTask task = (ProvisionRegistryTenantTask) aTask;
Optional<RegistryData> registryOptional = storage.getRegistryById(task.getRegistryId());
// NOTE: Failure point 1
if (registryOptional.isEmpty()) {
ctl.retry();
}
RegistryData registry = registryOptional.get();
RegistryDeploymentData registryDeployment = registry.getRegistryDeployment();
// NOTE: Failure point 2
if (registryDeployment == null) {
// Either the schedule task didn't run yet, or we are in trouble
ctl.retry();
}
String registryUrl = registryDeployment.getRegistryDeploymentUrl();
// New approach: configure the deployment URL with a replacement like: https://TENANT_ID.shrd.sr.openshift.com
if (registryUrl.contains("TENANT_ID")) {
registryUrl = registryUrl.replace("TENANT_ID", registry.getId());
} else {
// Old approach: configure the deployment URL without a replacement, and just add "/t/TENANT_ID" to the end of it.
if (!registryUrl.endsWith("/")) {
registryUrl += "/";
}
registryUrl += "t/" + registry.getId();
}
registry.setRegistryUrl(registryUrl);
// Avoid accidentally creating orphan tenants
if (task.getRegistryTenantId() == null) {
CreateTenantRequest tenantRequest = CreateTenantRequest.builder().tenantId(registry.getId()).createdBy(registry.getOwner()).organizationId(registry.getOrgId()).resources(plansService.determineQuotaPlan(registry.getOrgId()).getResources()).build();
TenantManagerConfig tenantManager = Utils.createTenantManagerConfig(registryDeployment);
// NOTE: Failure point 4
tmClient.createTenant(tenantManager, tenantRequest);
task.setRegistryTenantId(registry.getId());
}
// Add expiration task if this is an eval instance
if (isEvalInstance(registry.getInstanceType())) {
var expiration = Instant.now().plus(Duration.ofSeconds(evalLifetimeSeconds));
log.debug("Scheduling an expiration task for the eval instance {} to be executed at {}", registry, expiration);
ctl.delay(() -> tasks.submit(EvalInstanceExpirationRegistryTask.builder().registryId(registry.getId()).schedule(TaskSchedule.builder().firstExecuteAt(expiration).build()).build()));
}
// NOTE: Failure point 5
registry.setStatus(RegistryStatusValueDto.READY.value());
storage.createOrUpdateRegistry(registry);
// TODO This task is (temporarily) not used. Enable when needed.
// Update status to available in the heartbeat task, which should run ASAP
// ctl.delay(() -> tasks.submit(RegistryHeartbeatTask.builder().registryId(registry.getId()).build()));
}
use of org.bf2.srs.fleetmanager.execution.manager.Task in project srs-fleet-manager by bf2fc6cc711aee1a0c2a.
the class ProvisionRegistryTenantWorker method finallyExecute.
@Transactional
@Override
public void finallyExecute(Task aTask, WorkerContext ctl, Optional<Exception> error) throws RegistryNotFoundException, RegistryStorageConflictException, SubscriptionNotFoundServiceException, AccountManagementServiceException, TenantManagerServiceException {
ProvisionRegistryTenantTask task = (ProvisionRegistryTenantTask) aTask;
RegistryData registry = storage.getRegistryById(task.getRegistryId()).orElse(null);
RegistryDeploymentData registryDeployment = null;
if (registry != null)
registryDeployment = registry.getRegistryDeployment();
// SUCCESS STATE
if (registry != null && registry.getRegistryUrl() != null)
return;
// Cleanup orphan susbcription, if it's null, it's not needed since it will likely be an eval instance
if (registry != null && registryDeployment != null && registry.getSubscriptionId() != null) {
accountManagementService.deleteSubscription(registry.getSubscriptionId());
}
// Cleanup orphan tenant
if (registry != null && registryDeployment != null && task.getRegistryTenantId() != null) {
try {
tmClient.deleteTenant(Utils.createTenantManagerConfig(registryDeployment), registry.getId());
} catch (TenantNotFoundServiceException e) {
log.warn("Could not delete tenant '{}'. Tenant does not exist and may have been already deleted.", registry.getId());
}
}
// Remove registry entity
if (registry != null) {
storage.deleteRegistry(registry.getId());
}
}
use of org.bf2.srs.fleetmanager.execution.manager.Task in project srs-fleet-manager by bf2fc6cc711aee1a0c2a.
the class ScheduleRegistryWorker method execute.
@Transactional
@Override
public void execute(Task aTask, WorkerContext ctl) throws RegistryStorageConflictException {
ScheduleRegistryTask task = (ScheduleRegistryTask) aTask;
Optional<RegistryData> registryOptional = storage.getRegistryById(task.getRegistryId());
if (registryOptional.isEmpty()) {
// NOTE: Failure point 1
ctl.retry();
}
RegistryData registry = registryOptional.get();
List<RegistryDeploymentData> eligibleRegistryDeployments = storage.getAllRegistryDeployments().stream().filter(rd -> RegistryDeploymentStatusValue.of(rd.getStatus().getValue()) == RegistryDeploymentStatusValue.AVAILABLE).collect(toList());
if (eligibleRegistryDeployments.isEmpty()) {
// NOTE: Failure point 2
// TODO How to report it better?
log.warn("Could not schedule registry with ID {}. No deployments are available.", registry.getId());
// We can wait here longer, somebody needs to create a deployment
ctl.retry(100);
}
// Schedule to a random registry deployment
// TODO Improve & use a specific scheduling strategy
RegistryDeploymentData registryDeployment = eligibleRegistryDeployments.get(ThreadLocalRandom.current().nextInt(eligibleRegistryDeployments.size()));
// TODO only available
log.info("Scheduling {} to {}.", registry, registryDeployment);
registry.setRegistryDeployment(registryDeployment);
registry.setStatus(RegistryStatusValueDto.PROVISIONING.value());
// NOTE: Failure point 3
storage.createOrUpdateRegistry(registry);
ctl.delay(() -> tasks.submit(ProvisionRegistryTenantTask.builder().registryId(registry.getId()).build()));
}
use of org.bf2.srs.fleetmanager.execution.manager.Task in project srs-fleet-manager by bf2fc6cc711aee1a0c2a.
the class DataCollectingWorker method execute.
@Override
public void execute(Task aTask, WorkerContext ctl) {
boolean finished = false;
try {
TestTask task = (TestTask) aTask;
Command command = task.getCommands().peekFirst();
if (command != null && command.done()) {
task.getCommands().removeFirst();
command = task.getCommands().peekFirst();
}
if (command != null) {
command.execute(ctl, task);
} else {
// Do not record stop caused by end of commands
finished = true;
ctl.stop();
}
data.recordSuccess();
} catch (RetryExecutionControlException ex) {
if (ex.isForce())
data.recordForceRetry();
else
data.recordRetry();
throw ex;
} catch (StopExecutionControlException ex) {
if (!finished)
data.recordStop();
throw ex;
} catch (Exception ex) {
data.recordException();
throw ex;
} finally {
if (!finished)
data.recordExecution();
}
}
Aggregations