use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.
the class JooqTaskRelocationStoreTest method testRelocationPlanStoreCrud.
@Test
public void testRelocationPlanStoreCrud() {
List<TaskRelocationPlan> plans = newRelocationPlans(1);
TaskRelocationPlan plan = plans.get(0);
// Create
Map<String, Optional<Throwable>> result = store.createOrUpdateTaskRelocationPlans(plans).block();
assertThat(result).hasSize(1);
assertThat(result.get(plan.getTaskId())).isEmpty();
// Reboot (to force reload from the database).
this.store = newStore();
// Read
assertThat(store.getAllTaskRelocationPlans().block()).hasSize(1);
assertThat(store.getAllTaskRelocationPlans().block().get(plan.getTaskId())).isEqualTo(plan);
// Update
TaskRelocationPlan updatedPlan = plan.toBuilder().withReasonMessage("Updated...").build();
Map<String, Optional<Throwable>> updatedPlanResult = store.createOrUpdateTaskRelocationPlans(Collections.singletonList(updatedPlan)).block();
assertThat(updatedPlanResult).hasSize(1);
assertThat(store.getAllTaskRelocationPlans().block().get(plan.getTaskId())).isEqualTo(updatedPlan);
// Delete
Map<String, Optional<Throwable>> deleteResult = store.removeTaskRelocationPlans(Collections.singleton(plan.getTaskId())).block();
assertThat(deleteResult).hasSize(1);
// Reboot
this.store = newStore();
assertThat(store.getAllTaskRelocationPlans().block()).hasSize(0);
}
use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.
the class DefaultDeschedulerServiceTest method verifyRelocationPlan.
private void verifyRelocationPlan(long relocationDelay, String reasonMessage) {
ReadOnlyJobOperations jobOperations = mock(ReadOnlyJobOperations.class);
DefaultDeschedulerService dds = new DefaultDeschedulerService(jobOperations, mock(ReadOnlyEvictionOperations.class), new KubernetesNodeDataResolver(configuration, TestDataFactory.mockFabric8IOConnector(), node -> true), () -> "foo|bar", titusRuntime);
Job<ServiceJobExt> job = JobGenerator.serviceJobs(oneTaskServiceJobDescriptor().but(ofServiceSize(2), withDisruptionBudget(budget(selfManagedPolicy(relocationDelay), unlimitedRate(), Collections.emptyList())))).getValue();
ServiceJobTask task = JobGenerator.serviceTasks(job).getValue();
when(jobOperations.getJob(job.getId())).thenReturn(Optional.of(job));
TitusNode node = TitusNode.newBuilder().withId("node1").withServerGroupId("asg1").withRelocationRequired(true).withBadCondition(false).build();
// Advance test clock
long clockAdvancedMs = 5_000;
TestClock testClock = (TestClock) titusRuntime.getClock();
testClock.advanceTime(Duration.ofMillis(clockAdvancedMs));
Optional<TaskRelocationPlan> relocationPlanForTask = dds.getRelocationPlanForTask(node, task, Collections.emptyMap());
assertThat(relocationPlanForTask).isPresent();
assertThat(relocationPlanForTask.get().getTaskId()).isEqualTo(task.getId());
// relocation time is expected to be decision clock time + retentionTimeMs
assertThat(relocationPlanForTask.get().getRelocationTime()).isEqualTo(relocationDelay + clockAdvancedMs);
assertThat(relocationPlanForTask.get().getDecisionTime()).isEqualTo(clockAdvancedMs);
assertThat(relocationPlanForTask.get().getReasonMessage()).isEqualTo(reasonMessage);
}
use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.
the class TaskEvictionStepTest method testFailedEviction.
@Test
public void testFailedEviction() {
TaskRelocationPlan taskRelocationPlan = oneMigrationPlan().toBuilder().withTaskId("nonExistingTaskId").build();
Map<String, TaskRelocationStatus> result = step.evict(Collections.singletonMap("nonExistingTaskId", taskRelocationPlan));
assertThat(result).hasSize(1);
TaskRelocationStatus relocationStatus = result.get("nonExistingTaskId");
assertThat(relocationStatus.getTaskId()).isEqualTo("nonExistingTaskId");
assertThat(relocationStatus.getStatusCode()).isEqualTo(TaskRelocationStatus.STATUS_EVICTION_ERROR);
assertThat(relocationStatus.getTaskRelocationPlan()).isEqualTo(taskRelocationPlan);
}
use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.
the class MustBeRelocatedSelfManagedTaskCollectorStep method buildRelocationPlans.
private Map<String, TaskRelocationPlan> buildRelocationPlans() {
Map<String, TitusNode> nodes = nodeDataResolver.resolve();
List<Triple<Job<?>, Task, TitusNode>> allItems = findAllJobTaskAgentTriples(nodes);
Map<String, TaskRelocationPlan> result = new HashMap<>();
logger.debug("Number of triplets to check: {}", allItems.size());
allItems.forEach(triple -> {
Job<?> job = triple.getFirst();
Task task = triple.getSecond();
TitusNode instance = triple.getThird();
checkIfNeedsRelocationPlan(job, task, instance).ifPresent(reason -> result.put(task.getId(), buildSelfManagedRelocationPlan(job, task, reason)));
});
this.lastResult = result;
return result;
}
use of com.netflix.titus.api.relocation.model.TaskRelocationPlan in project titus-control-plane by Netflix.
the class TaskEvictionStep method execute.
private Map<String, TaskRelocationStatus> execute(Map<String, TaskRelocationPlan> taskToEvict) {
Map<String, Mono<Void>> actions = taskToEvict.values().stream().collect(Collectors.toMap(TaskRelocationPlan::getTaskId, p -> {
String message;
switch(p.getReason()) {
case AgentEvacuation:
message = String.format("Agent evacuation: %s", p.getReasonMessage());
break;
case SelfManagedMigration:
message = String.format("Self managed migration requested on %s: %s", DateTimeExt.toUtcDateTimeString(p.getDecisionTime()), p.getReasonMessage());
break;
case TaskMigration:
message = p.getReasonMessage();
break;
default:
message = String.format("[unrecognized relocation reason %s]: %s" + p.getReason(), p.getReasonMessage());
}
return evictionServiceClient.terminateTask(p.getTaskId(), message).timeout(EVICTION_TIMEOUT);
}));
Map<String, Optional<Throwable>> evictionResults;
try {
evictionResults = ReactorExt.merge(actions, CONCURRENCY_LIMIT, scheduler).block();
} catch (Exception e) {
logger.warn("Unexpected error when calling the eviction service", e);
return taskToEvict.values().stream().map(p -> TaskRelocationStatus.newBuilder().withState(TaskRelocationState.Failure).withStatusCode(TaskRelocationStatus.STATUS_SYSTEM_ERROR).withStatusMessage("Unexpected error: " + ExceptionExt.toMessageChain(e)).withTimestamp(clock.wallTime()).build()).collect(Collectors.toMap(TaskRelocationStatus::getTaskId, s -> s));
}
Map<String, TaskRelocationStatus> results = new HashMap<>();
taskToEvict.forEach((taskId, plan) -> {
Optional<Throwable> evictionResult = evictionResults.get(plan.getTaskId());
TaskRelocationStatus status;
if (evictionResult != null) {
if (!evictionResult.isPresent()) {
status = TaskRelocationStatus.newBuilder().withTaskId(taskId).withState(TaskRelocationState.Success).withStatusCode(TaskRelocationStatus.STATUS_CODE_TERMINATED).withStatusMessage("Task terminated successfully").withTaskRelocationPlan(plan).withTimestamp(clock.wallTime()).build();
} else {
status = TaskRelocationStatus.newBuilder().withTaskId(taskId).withState(TaskRelocationState.Failure).withStatusCode(TaskRelocationStatus.STATUS_EVICTION_ERROR).withStatusMessage(evictionResult.get().getMessage()).withTaskRelocationPlan(plan).withTimestamp(clock.wallTime()).build();
}
} else {
// This should never happen
invariants.inconsistent("Eviction result missing: taskId=%s", plan.getTaskId());
status = TaskRelocationStatus.newBuilder().withTaskId(taskId).withState(TaskRelocationState.Failure).withStatusCode(TaskRelocationStatus.STATUS_SYSTEM_ERROR).withStatusMessage("Eviction result missing").withTaskRelocationPlan(plan).withTimestamp(clock.wallTime()).build();
}
results.put(taskId, status);
transactionLog.logTaskRelocationStatus(STEP_NAME, "eviction", status);
});
return results;
}
Aggregations