use of com.mesosphere.sdk.state.ConfigStore in project dcos-commons by mesosphere.
the class TaskUtils method getPodRequirements.
/**
* Given a list of all tasks and failed tasks, returns a list of tasks (via returned
* {@link PodInstanceRequirement#getTasksToLaunch()}) that should be relaunched.
*
* @param failedTasks tasks marked as needing recovery
* @param allLaunchedTasks all launched tasks in the service
* @return list of pods, each with contained named tasks to be relaunched
*/
public static List<PodInstanceRequirement> getPodRequirements(ConfigStore<ServiceSpec> configStore, Collection<Protos.TaskInfo> failedTasks, Collection<Protos.TaskInfo> allLaunchedTasks) {
// Mapping of pods, to failed tasks within those pods.
// Arbitrary consistent ordering: by pod instance name (e.g. "otherpodtype-0","podtype-0","podtype-1")
Map<PodInstance, Collection<TaskSpec>> podsToFailedTasks = new TreeMap<>(Comparator.comparing(PodInstance::getName));
for (Protos.TaskInfo taskInfo : failedTasks) {
try {
PodInstance podInstance = getPodInstance(configStore, taskInfo);
Optional<TaskSpec> taskSpec = getTaskSpec(podInstance, taskInfo.getName());
if (!taskSpec.isPresent()) {
LOGGER.error("No TaskSpec found for failed task: {}", taskInfo.getName());
continue;
}
Collection<TaskSpec> failedTaskSpecs = podsToFailedTasks.get(podInstance);
if (failedTaskSpecs == null) {
failedTaskSpecs = new ArrayList<>();
podsToFailedTasks.put(podInstance, failedTaskSpecs);
}
failedTaskSpecs.add(taskSpec.get());
} catch (TaskException e) {
LOGGER.error(String.format("Failed to get pod instance for task: %s", taskInfo.getName()), e);
}
}
if (podsToFailedTasks.isEmpty()) {
// short circuit
return Collections.emptyList();
}
// Log failed pod map
for (Map.Entry<PodInstance, Collection<TaskSpec>> entry : podsToFailedTasks.entrySet()) {
List<String> taskNames = entry.getValue().stream().map(taskSpec -> taskSpec.getName()).collect(Collectors.toList());
LOGGER.info("Failed pod: {} with tasks: {}", entry.getKey().getName(), taskNames);
}
Set<String> allLaunchedTaskNames = allLaunchedTasks.stream().map(taskInfo -> taskInfo.getName()).collect(Collectors.toSet());
List<PodInstanceRequirement> podInstanceRequirements = new ArrayList<>();
for (Map.Entry<PodInstance, Collection<TaskSpec>> entry : podsToFailedTasks.entrySet()) {
boolean anyFailedTasksAreEssential = entry.getValue().stream().anyMatch(taskSpec -> taskSpec.isEssential());
Collection<TaskSpec> taskSpecsToLaunch;
if (anyFailedTasksAreEssential) {
// One or more of the failed tasks in this pod are marked as 'essential'.
// Relaunch all applicable tasks in the pod.
taskSpecsToLaunch = entry.getKey().getPod().getTasks();
} else {
// None of the failed tasks in this pod are 'essential'.
// Only recover the failed task(s), leave others in the pod as-is.
taskSpecsToLaunch = entry.getValue();
}
// Additional filtering:
// - Only relaunch tasks that have a RUNNING goal state. Don't worry about FINISHED tasks.
// - Don't relaunch tasks that haven't been launched yet (as indicated by presence in allLaunchedTasks)
taskSpecsToLaunch = taskSpecsToLaunch.stream().filter(taskSpec -> taskSpec.getGoal() == GoalState.RUNNING && allLaunchedTaskNames.contains(TaskSpec.getInstanceName(entry.getKey(), taskSpec.getName()))).collect(Collectors.toList());
if (taskSpecsToLaunch.isEmpty()) {
LOGGER.info("No tasks to recover for pod: {}", entry.getKey().getName());
continue;
}
LOGGER.info("Tasks to relaunch in pod {}: {}", entry.getKey().getName(), taskSpecsToLaunch.stream().map(taskSpec -> String.format("%s=%s", taskSpec.getName(), taskSpec.isEssential() ? "essential" : "nonessential")).collect(Collectors.toList()));
podInstanceRequirements.add(PodInstanceRequirement.newBuilder(entry.getKey(), taskSpecsToLaunch.stream().map(taskSpec -> taskSpec.getName()).collect(Collectors.toList())).build());
}
return podInstanceRequirements;
}
use of com.mesosphere.sdk.state.ConfigStore in project dcos-commons by mesosphere.
the class TaskUtilsTest method buildPodLayout.
private static ConfigStore<ServiceSpec> buildPodLayout(int essentialTasks, int nonessentialTasks) {
DefaultPodSpec.Builder podBuilder = DefaultPodSpec.newBuilder("executor-uri").type("server").count(3);
for (int i = 0; i < essentialTasks; ++i) {
podBuilder.addTask(buildTaskTemplate(String.format("essential%d", i)).goalState(GoalState.RUNNING).build());
}
for (int i = 0; i < nonessentialTasks; ++i) {
podBuilder.addTask(buildTaskTemplate(String.format("nonessential%d", i)).goalState(GoalState.RUNNING).essential(false).build());
}
// should be ignored for recovery purposes:
podBuilder.addTask(buildTaskTemplate("once").goalState(GoalState.ONCE).build());
ServiceSpec serviceSpec = DefaultServiceSpec.newBuilder().name("svc").addPod(podBuilder.build()).build();
ConfigStore<ServiceSpec> configStore = new ConfigStore<>(DefaultServiceSpec.getConfigurationFactory(serviceSpec), new MemPersister());
try {
configStore.setTargetConfig(configStore.store(serviceSpec));
} catch (ConfigStoreException e) {
throw new IllegalStateException(e);
}
return configStore;
}
use of com.mesosphere.sdk.state.ConfigStore in project dcos-commons by mesosphere.
the class CassandraRecoveryPlanOverriderTest method beforeEach.
@Before
public void beforeEach() throws Exception {
stateStore = new StateStore(new MemPersister());
ConfigStore<ServiceSpec> configStore = new ConfigStore<>(DefaultServiceSpec.getConfigurationFactory(serviceSpec), new MemPersister());
UUID targetConfig = configStore.store(serviceSpec);
configStore.setTargetConfig(targetConfig);
planOverrider = new CassandraRecoveryPlanOverrider(stateStore, getReplacePlan(configStore));
}
use of com.mesosphere.sdk.state.ConfigStore in project dcos-commons by mesosphere.
the class SchedulerBuilder method build.
/**
* Creates a new Mesos scheduler instance with the provided values or their defaults, or an empty {@link Optional}
* if no Mesos scheduler should be registered for this run.
*
* @return a new Mesos scheduler instance to be registered, or an empty {@link Optional}
* @throws IllegalArgumentException if validating the provided configuration failed
*/
public AbstractScheduler build() {
// If region awareness is enabled (via java bit or via env) and the cluster supports it, update the ServiceSpec
// to include region constraints.
final ServiceSpec serviceSpec;
if (Capabilities.getInstance().supportsDomains()) {
// This cluster supports domains. We need to update pod placement with region configuration, for any pods
// that weren't already configured by the developer (expected to be rare, but possible).
// Whether region awareness is enabled for the service (via env or via java).
boolean regionAwarenessEnabled = isRegionAwarenessEnabled();
// A region to target, as specified in env, if any.
Optional<String> schedulerRegion = schedulerConfig.getSchedulerRegion();
// Target the specified region, or use the local region.
// Local region is determined at framework registration, see IsLocalRegionRule.setLocalDomain().
final PlacementRule placementRuleToAdd;
if (regionAwarenessEnabled && schedulerRegion.isPresent()) {
logger.info("Updating pods with placement rule for region={}", schedulerRegion.get());
placementRuleToAdd = RegionRuleFactory.getInstance().require(ExactMatcher.create(schedulerRegion.get()));
} else {
logger.info("Updating pods with local region placement rule: region awareness={}, scheduler region={}", regionAwarenessEnabled, schedulerRegion);
placementRuleToAdd = new IsLocalRegionRule();
}
List<PodSpec> updatedPodSpecs = new ArrayList<>();
for (PodSpec podSpec : originalServiceSpec.getPods()) {
if (PlacementUtils.placementRuleReferencesRegion(podSpec)) {
// Pod already has a region constraint (specified by developer?). Leave it as-is.
logger.info("Pod {} already has a region rule defined, leaving as-is", podSpec.getType());
updatedPodSpecs.add(podSpec);
} else {
// Combine the new rule with any existing rules:
PlacementRule mergedRule = podSpec.getPlacementRule().isPresent() ? new AndRule(placementRuleToAdd, podSpec.getPlacementRule().get()) : placementRuleToAdd;
updatedPodSpecs.add(DefaultPodSpec.newBuilder(podSpec).placementRule(mergedRule).build());
}
}
DefaultServiceSpec.Builder builder = DefaultServiceSpec.newBuilder(originalServiceSpec).pods(updatedPodSpecs);
if (schedulerRegion.isPresent()) {
builder.region(schedulerRegion.get());
}
serviceSpec = builder.build();
} else {
serviceSpec = originalServiceSpec;
}
// NOTE: we specifically avoid accessing the provided persister before build() is called.
// This is to ensure that upstream has a chance to e.g. lock it via CuratorLocker.
// When multi-service is enabled, state/configs are stored within a namespace matching the service name.
// Otherwise use an empty namespace, which indicates single-service mode.
String namespaceStr = namespace.orElse("");
FrameworkStore frameworkStore = new FrameworkStore(persister);
StateStore stateStore = new StateStore(persister, namespaceStr);
ConfigStore<ServiceSpec> configStore = new ConfigStore<>(DefaultServiceSpec.getConfigurationFactory(serviceSpec), persister, namespaceStr);
if (schedulerConfig.isUninstallEnabled()) {
// uninstall mode. UninstallScheduler will internally flag the stateStore with an uninstall bit if needed.
return new UninstallScheduler(serviceSpec, frameworkStore, stateStore, configStore, FrameworkConfig.fromServiceSpec(serviceSpec), schedulerConfig, Optional.ofNullable(planCustomizer));
}
if (StateStoreUtils.isUninstalling(stateStore)) {
// SERVICE UNINSTALL: The service has an uninstall bit set in its (potentially namespaced) state store.
if (namespace.isPresent()) {
// Launch the service in uninstall mode so that it can continue with whatever may be left.
return new UninstallScheduler(serviceSpec, frameworkStore, stateStore, configStore, FrameworkConfig.fromServiceSpec(serviceSpec), schedulerConfig, Optional.ofNullable(planCustomizer));
} else {
// This is an illegal state for a single-service scheduler. SchedulerConfig's uninstall bit should have
// also been enabled. If we got here, it means that the user likely tampered with the scheduler env
// after having previously triggered an uninstall, which had set the bit in stateStore. Just exit,
// because the service is likely now in an inconsistent state resulting from the incomplete uninstall.
logger.error("Service has been previously told to uninstall, this cannot be reversed. " + "Reenable the uninstall flag to complete the process.");
SchedulerUtils.hardExit(SchedulerErrorCode.SCHEDULER_ALREADY_UNINSTALLING);
}
}
try {
return getDefaultScheduler(serviceSpec, frameworkStore, stateStore, configStore);
} catch (ConfigStoreException e) {
logger.error("Failed to construct scheduler.", e);
SchedulerUtils.hardExit(SchedulerErrorCode.INITIALIZATION_FAILURE);
// This is so the compiler doesn't complain. The scheduler is going down anyway.
return null;
}
}
Aggregations