use of com.mesosphere.sdk.scheduler.plan.PodInstanceRequirement in project dcos-commons by mesosphere.
the class TaskUtils method getPodRequirements.
/**
* Given a list of all tasks and failed tasks, returns a list of tasks (via returned
* {@link PodInstanceRequirement#getTasksToLaunch()}) that should be relaunched.
*
* @param failedTasks tasks marked as needing recovery
* @param allLaunchedTasks all launched tasks in the service
* @return list of pods, each with contained named tasks to be relaunched
*/
public static List<PodInstanceRequirement> getPodRequirements(ConfigStore<ServiceSpec> configStore, Collection<Protos.TaskInfo> failedTasks, Collection<Protos.TaskInfo> allLaunchedTasks) {
// Mapping of pods, to failed tasks within those pods.
// Arbitrary consistent ordering: by pod instance name (e.g. "otherpodtype-0","podtype-0","podtype-1")
Map<PodInstance, Collection<TaskSpec>> podsToFailedTasks = new TreeMap<>(Comparator.comparing(PodInstance::getName));
for (Protos.TaskInfo taskInfo : failedTasks) {
try {
PodInstance podInstance = getPodInstance(configStore, taskInfo);
Optional<TaskSpec> taskSpec = getTaskSpec(podInstance, taskInfo.getName());
if (!taskSpec.isPresent()) {
LOGGER.error("No TaskSpec found for failed task: {}", taskInfo.getName());
continue;
}
Collection<TaskSpec> failedTaskSpecs = podsToFailedTasks.get(podInstance);
if (failedTaskSpecs == null) {
failedTaskSpecs = new ArrayList<>();
podsToFailedTasks.put(podInstance, failedTaskSpecs);
}
failedTaskSpecs.add(taskSpec.get());
} catch (TaskException e) {
LOGGER.error(String.format("Failed to get pod instance for task: %s", taskInfo.getName()), e);
}
}
if (podsToFailedTasks.isEmpty()) {
// short circuit
return Collections.emptyList();
}
// Log failed pod map
for (Map.Entry<PodInstance, Collection<TaskSpec>> entry : podsToFailedTasks.entrySet()) {
List<String> taskNames = entry.getValue().stream().map(taskSpec -> taskSpec.getName()).collect(Collectors.toList());
LOGGER.info("Failed pod: {} with tasks: {}", entry.getKey().getName(), taskNames);
}
Set<String> allLaunchedTaskNames = allLaunchedTasks.stream().map(taskInfo -> taskInfo.getName()).collect(Collectors.toSet());
List<PodInstanceRequirement> podInstanceRequirements = new ArrayList<>();
for (Map.Entry<PodInstance, Collection<TaskSpec>> entry : podsToFailedTasks.entrySet()) {
boolean anyFailedTasksAreEssential = entry.getValue().stream().anyMatch(taskSpec -> taskSpec.isEssential());
Collection<TaskSpec> taskSpecsToLaunch;
if (anyFailedTasksAreEssential) {
// One or more of the failed tasks in this pod are marked as 'essential'.
// Relaunch all applicable tasks in the pod.
taskSpecsToLaunch = entry.getKey().getPod().getTasks();
} else {
// None of the failed tasks in this pod are 'essential'.
// Only recover the failed task(s), leave others in the pod as-is.
taskSpecsToLaunch = entry.getValue();
}
// Additional filtering:
// - Only relaunch tasks that have a RUNNING goal state. Don't worry about FINISHED tasks.
// - Don't relaunch tasks that haven't been launched yet (as indicated by presence in allLaunchedTasks)
taskSpecsToLaunch = taskSpecsToLaunch.stream().filter(taskSpec -> taskSpec.getGoal() == GoalState.RUNNING && allLaunchedTaskNames.contains(TaskSpec.getInstanceName(entry.getKey(), taskSpec.getName()))).collect(Collectors.toList());
if (taskSpecsToLaunch.isEmpty()) {
LOGGER.info("No tasks to recover for pod: {}", entry.getKey().getName());
continue;
}
LOGGER.info("Tasks to relaunch in pod {}: {}", entry.getKey().getName(), taskSpecsToLaunch.stream().map(taskSpec -> String.format("%s=%s", taskSpec.getName(), taskSpec.isEssential() ? "essential" : "nonessential")).collect(Collectors.toList()));
podInstanceRequirements.add(PodInstanceRequirement.newBuilder(entry.getKey(), taskSpecsToLaunch.stream().map(taskSpec -> taskSpec.getName()).collect(Collectors.toList())).build());
}
return podInstanceRequirements;
}
use of com.mesosphere.sdk.scheduler.plan.PodInstanceRequirement in project dcos-commons by mesosphere.
the class TLSEvaluationStageTest method getRequirementWithTransportEncryption.
private static PodInstanceRequirement getRequirementWithTransportEncryption(ResourceSet resourceSet, String type, int index, Collection<TransportEncryptionSpec> transportEncryptionSpecs) {
TaskSpec taskSpec = DefaultTaskSpec.newBuilder().name(TestConstants.TASK_NAME).commandSpec(DefaultCommandSpec.newBuilder(Collections.emptyMap()).value(TestConstants.TASK_CMD).build()).goalState(GoalState.RUNNING).resourceSet(resourceSet).setTransportEncryption(transportEncryptionSpecs).build();
PodSpec podSpec = DefaultPodSpec.newBuilder("executor-uri").type(type).count(1).tasks(Arrays.asList(taskSpec)).preReservedRole(Constants.ANY_ROLE).build();
PodInstance podInstance = new DefaultPodInstance(podSpec, index);
List<String> taskNames = podInstance.getPod().getTasks().stream().map(ts -> ts.getName()).collect(Collectors.toList());
return PodInstanceRequirement.newBuilder(podInstance, taskNames).build();
}
use of com.mesosphere.sdk.scheduler.plan.PodInstanceRequirement in project dcos-commons by mesosphere.
the class PortEvaluationStageTest method testDynamicPortNotStickyAfterReplacement.
@Test
public void testDynamicPortNotStickyAfterReplacement() throws Exception {
// The initial dynamic port should be the min of the available range.
Protos.Resource offeredPorts = ResourceTestUtils.getUnreservedPorts(10000, 10050);
Protos.Offer offer = OfferTestUtils.getOffer(offeredPorts);
PortSpec portSpec = new PortSpec(getPort(0), TestConstants.ROLE, Constants.ANY_ROLE, TestConstants.PRINCIPAL, "PORT_TEST", "TEST", TestConstants.PORT_VISIBILITY, Collections.emptyList());
PodInstanceRequirement podInstanceRequirement = getPodInstanceRequirement(portSpec);
PodInfoBuilder podInfoBuilder = new PodInfoBuilder(podInstanceRequirement, TestConstants.SERVICE_NAME, UUID.randomUUID(), ArtifactResource.getUrlFactory(TestConstants.SERVICE_NAME), SchedulerConfigTestUtils.getTestSchedulerConfig(), Collections.emptyList(), TestConstants.FRAMEWORK_ID, true, Collections.emptyMap());
PortEvaluationStage portEvaluationStage = new PortEvaluationStage(portSpec, TestConstants.TASK_NAME, Optional.empty(), Optional.empty());
MesosResourcePool mesosResourcePool = new MesosResourcePool(offer, Optional.of(Constants.ANY_ROLE));
EvaluationOutcome outcome = portEvaluationStage.evaluate(mesosResourcePool, podInfoBuilder);
Assert.assertEquals(true, outcome.isPassing());
Protos.TaskInfo.Builder taskBuilder = podInfoBuilder.getTaskBuilder(TestConstants.TASK_NAME);
checkDiscoveryInfo(taskBuilder.getDiscovery(), "TEST", 10000);
// In a restart, we want port stickiness. It should fail if the original dynamic port is not
// available in the offer.
Protos.TaskInfo.Builder currentTaskBuilder = podInfoBuilder.getTaskBuilders().stream().findFirst().get();
podInfoBuilder = new PodInfoBuilder(podInstanceRequirement, TestConstants.SERVICE_NAME, UUID.randomUUID(), ArtifactResource.getUrlFactory(TestConstants.SERVICE_NAME), SchedulerConfigTestUtils.getTestSchedulerConfig(), Collections.singleton(currentTaskBuilder.build()), TestConstants.FRAMEWORK_ID, true, Collections.emptyMap());
// Omit 10,000 the expected port.
offer = OfferTestUtils.getOffer(ResourceTestUtils.getUnreservedPorts(10001, 10050));
mesosResourcePool = new MesosResourcePool(offer, Optional.of(Constants.ANY_ROLE));
outcome = portEvaluationStage.evaluate(mesosResourcePool, podInfoBuilder);
Assert.assertEquals(false, outcome.isPassing());
// In permanent replacement, the previous dynamic port should be discarded, so an offer
// without that port should be valid.
currentTaskBuilder.setLabels(new TaskLabelWriter(currentTaskBuilder).setPermanentlyFailed().toProto());
podInfoBuilder = new PodInfoBuilder(podInstanceRequirement, TestConstants.SERVICE_NAME, UUID.randomUUID(), ArtifactResource.getUrlFactory(TestConstants.SERVICE_NAME), SchedulerConfigTestUtils.getTestSchedulerConfig(), Collections.singleton(currentTaskBuilder.build()), TestConstants.FRAMEWORK_ID, true, Collections.emptyMap());
mesosResourcePool = new MesosResourcePool(offer, Optional.of(Constants.ANY_ROLE));
outcome = portEvaluationStage.evaluate(mesosResourcePool, podInfoBuilder);
Assert.assertEquals(true, outcome.isPassing());
}
use of com.mesosphere.sdk.scheduler.plan.PodInstanceRequirement in project dcos-commons by mesosphere.
the class PortEvaluationStageTest method testHealthCheckPortEnvvarIsCorrectOnOverlay.
@Test
public void testHealthCheckPortEnvvarIsCorrectOnOverlay() throws Exception {
DefaultPodInstance podInstance = getPodInstance("valid-port-healthcheck-overlay.yml");
PodInstanceRequirement podInstanceRequirement = PodInstanceRequirement.newBuilder(podInstance, TaskUtils.getTaskNames(podInstance)).build();
PodInfoBuilder podInfoBuilder = getPodInfoBuilder(podInstanceRequirement, true);
Protos.Resource offeredPorts = ResourceTestUtils.getUnreservedPorts(10000, 10000);
Protos.Offer offer = OfferTestUtils.getOffer(offeredPorts);
PortEvaluationStage portEvaluationStage = new PortEvaluationStage(getPortSpec(podInstance), TestConstants.TASK_NAME, Optional.empty(), Optional.empty());
EvaluationOutcome outcome = portEvaluationStage.evaluate(new MesosResourcePool(offer, Optional.of(Constants.ANY_ROLE)), podInfoBuilder);
Assert.assertTrue(outcome.isPassing());
Assert.assertEquals(0, outcome.getOfferRecommendations().size());
Protos.TaskInfo.Builder taskBuilder = podInfoBuilder.getTaskBuilders().stream().findFirst().get();
Assert.assertTrue(taskBuilder.getCommand().getEnvironment().getVariablesList().stream().filter(variable -> variable.getName().equals("PORT_TEST_PORT") && variable.getValue().equals("10000")).count() == 1);
Assert.assertTrue(taskBuilder.getHealthCheck().getCommand().getEnvironment().getVariablesList().stream().filter(variable -> variable.getName().equals("PORT_TEST_PORT") && variable.getValue().equals("10000")).count() == 1);
}
use of com.mesosphere.sdk.scheduler.plan.PodInstanceRequirement in project dcos-commons by mesosphere.
the class PortEvaluationStageTest method testPortEnvvarOnReadinessCheck.
@Test
public void testPortEnvvarOnReadinessCheck() throws Exception {
DefaultPodInstance podInstance = getPodInstance("valid-port-readinesscheck.yml");
PodInstanceRequirement podInstanceRequirement = PodInstanceRequirement.newBuilder(podInstance, TaskUtils.getTaskNames(podInstance)).build();
PodInfoBuilder podInfoBuilder = getPodInfoBuilder(podInstanceRequirement, true);
Protos.Resource offeredPorts = ResourceTestUtils.getUnreservedPorts(10000, 10000);
Protos.Offer offer = OfferTestUtils.getOffer(offeredPorts);
PortEvaluationStage portEvaluationStage = new PortEvaluationStage(getPortSpec(podInstance), TestConstants.TASK_NAME, Optional.empty(), Optional.empty());
EvaluationOutcome outcome = portEvaluationStage.evaluate(new MesosResourcePool(offer, Optional.of(Constants.ANY_ROLE)), podInfoBuilder);
Assert.assertTrue(outcome.isPassing());
Assert.assertEquals(1, outcome.getOfferRecommendations().size());
OfferRecommendation recommendation = outcome.getOfferRecommendations().iterator().next();
Assert.assertEquals(Protos.Offer.Operation.Type.RESERVE, recommendation.getOperation().getType());
Protos.Resource resource = recommendation.getOperation().getReserve().getResources(0);
Assert.assertEquals(10000, resource.getRanges().getRange(0).getBegin(), resource.getRanges().getRange(0).getEnd());
Protos.TaskInfo.Builder taskBuilder = podInfoBuilder.getTaskBuilders().stream().findFirst().get();
boolean portInTaskEnv = false;
for (int i = 0; i < taskBuilder.getCommand().getEnvironment().getVariablesCount(); i++) {
Protos.Environment.Variable variable = taskBuilder.getCommand().getEnvironment().getVariables(i);
if (Objects.equals(variable.getName(), "PORT_TEST_PORT")) {
Assert.assertEquals(variable.getValue(), "10000");
portInTaskEnv = true;
}
}
Assert.assertTrue(portInTaskEnv);
boolean portInHealthEnv = false;
Protos.CheckInfo readinessCheck = taskBuilder.getCheck();
for (int i = 0; i < readinessCheck.getCommand().getCommand().getEnvironment().getVariablesCount(); i++) {
Protos.Environment.Variable variable = readinessCheck.getCommand().getCommand().getEnvironment().getVariables(i);
if (Objects.equals(variable.getName(), "PORT_TEST_PORT")) {
Assert.assertEquals(variable.getValue(), "10000");
portInHealthEnv = true;
}
}
Assert.assertTrue(portInHealthEnv);
}
Aggregations