use of com.mesosphere.sdk.offer.TaskException in project dcos-commons by mesosphere.
the class ClusterState method getLastLaunchedPod.
/**
* Returns the last pod to be launched with the specified name.
*
* @param podName name+index of the pod, of the form "podtype-#"
* @return a list of tasks which were included in the pod
* @throws IllegalStateException if no such pod was found
* @see #getLastLaunchedPod()
*/
public LaunchedPod getLastLaunchedPod(String podName) {
Set<String> allPodNames = new TreeSet<>();
LaunchedPod foundPod = null;
for (LaunchedPod pod : createdPods) {
// Sample pod info from the first task. All tasks should share the same pod info:
final Protos.TaskInfo task = pod.getTasks().iterator().next();
final TaskLabelReader reader = new TaskLabelReader(task);
final String thisPod;
try {
thisPod = PodInstance.getName(reader.getType(), reader.getIndex());
} catch (TaskException e) {
throw new IllegalStateException("Unable to extract pod from task " + task.getName(), e);
}
allPodNames.add(thisPod);
if (thisPod.equals(podName)) {
foundPod = pod;
// Don't break: want to collect the most recent version
}
}
if (foundPod == null) {
throw new IllegalStateException(String.format("Unable to find pod named %s. Available pods were: %s", podName, allPodNames));
}
return foundPod;
}
use of com.mesosphere.sdk.offer.TaskException in project dcos-commons by mesosphere.
the class DecommissionPlanFactory method getPodsToDecommission.
/**
* Returns a mapping of pods to be decommissioned with affected tasks within those pods. The returned mapping will
* be in the order that the pods should be decommissioned.
*/
@VisibleForTesting
static SortedMap<PodKey, Collection<Protos.TaskInfo>> getPodsToDecommission(ServiceSpec serviceSpec, Collection<Protos.TaskInfo> tasks) {
// If multiple pod types are being decommissioned, they should be decommissioned in the reverse of the order
// that they're declared in the ServiceSpec (opposite direction of default deployment)
List<String> orderedPodTypes = serviceSpec.getPods().stream().map(PodSpec::getType).collect(Collectors.toList());
Collections.reverse(orderedPodTypes);
Map<String, Integer> expectedPodCounts = serviceSpec.getPods().stream().collect(Collectors.toMap(PodSpec::getType, PodSpec::getCount));
LOGGER.info("Expected pod counts: {}", expectedPodCounts);
SortedMap<PodKey, Collection<Protos.TaskInfo>> podsToDecommission = new TreeMap<>();
for (Protos.TaskInfo task : tasks) {
final PodKey podKey;
try {
TaskLabelReader labelReader = new TaskLabelReader(task);
podKey = new PodKey(labelReader.getType(), labelReader.getIndex(), orderedPodTypes);
} catch (TaskException e) {
LOGGER.error(String.format("Failed to retrieve task metadata. Omitting task from decommission: %s", task.getName()), e);
continue;
}
Integer expectedPodCount = expectedPodCounts.get(podKey.podType);
if (expectedPodCount == null) {
LOGGER.info("Scheduling '{}' for decommission: '{}' is not present in service spec: {}", task.getName(), podKey.podType, expectedPodCounts.keySet());
} else if (podKey.podIndex >= expectedPodCount) {
LOGGER.info("Scheduling '{}' for decommission: '{}' exceeds desired pod count {}", task.getName(), podKey.getPodName(), expectedPodCount);
} else {
// Do nothing
continue;
}
Collection<Protos.TaskInfo> podTasks = podsToDecommission.get(podKey);
if (podTasks == null) {
podTasks = new ArrayList<>();
podsToDecommission.put(podKey, podTasks);
}
podTasks.add(task);
}
LOGGER.info("Pods scheduled for decommission: {}", podsToDecommission.keySet());
return podsToDecommission;
}
use of com.mesosphere.sdk.offer.TaskException in project dcos-commons by mesosphere.
the class DefaultStepFactory method hasReachedGoalState.
@VisibleForTesting
protected boolean hasReachedGoalState(PodInstance podInstance, Protos.TaskInfo taskInfo) throws TaskException {
GoalState goalState = TaskUtils.getGoalState(podInstance, taskInfo.getName());
Optional<Protos.TaskStatus> status = stateStore.fetchStatus(taskInfo.getName());
if (!status.isPresent()) {
return false;
}
if (goalState.equals(GoalState.RUNNING)) {
switch(status.get().getState()) {
case TASK_RUNNING:
if (Capabilities.getInstance().supportsDefaultExecutor()) {
return new TaskLabelReader(taskInfo).isReadinessCheckSucceeded(status.get());
}
// readiness checks on restart.
return true;
default:
return false;
}
} else if (goalState.equals(GoalState.ONCE) || goalState.equals(GoalState.FINISH) || goalState.equals(GoalState.FINISHED)) {
switch(status.get().getState()) {
case TASK_FINISHED:
return true;
default:
return false;
}
} else {
throw new TaskException("Unexpected goal state encountered: " + goalState);
}
}
use of com.mesosphere.sdk.offer.TaskException in project dcos-commons by mesosphere.
the class TimedFailureMonitor method hasFailed.
/**
* Determines whether the given task has failed, by tracking the time delta between the first observed failure and
* the current time.
* <p>
* The first time a task is noticed to be failed, we record that time into a map, keyed by the task's {@link
* TaskID}. Then, we return true if at least the configured amount of time has passed since then.
*
* @param terminatedTask The task that stopped and might be failed
* @return true if the task has been stopped for at least the configured interval
*/
@Override
public boolean hasFailed(TaskInfo terminatedTask) {
if (super.hasFailed(terminatedTask)) {
return true;
}
Date taskLaunchedTime;
synchronized (firstFailureDetected) {
if (!firstFailureDetected.containsKey(terminatedTask.getTaskId())) {
firstFailureDetected.put(terminatedTask.getTaskId(), new Date());
}
taskLaunchedTime = firstFailureDetected.get(terminatedTask.getTaskId());
}
Date taskExpiredTime = new Date(taskLaunchedTime.getTime() + durationUntilFailed.toMillis());
Date now = new Date();
log.info("Looking at " + terminatedTask.getName() + " launchHappened at " + taskLaunchedTime + ", expires at " + taskExpiredTime + " which is " + now.after(taskExpiredTime));
if (now.after(taskExpiredTime)) {
try {
PodInstance podInstance = TaskUtils.getPodInstance(configStore, terminatedTask);
FailureUtils.setPermanentlyFailed(stateStore, podInstance);
} catch (TaskException e) {
log.error("Failed to get pod instance to mark as failed.", e);
}
}
return super.hasFailed(terminatedTask);
}
use of com.mesosphere.sdk.offer.TaskException in project dcos-commons by mesosphere.
the class DefaultConfigurationUpdater method cleanupDuplicateAndUnusedConfigs.
/**
* Searches for any task configurations which are already identical to the target configuration
* and updates the embedded config version label in those tasks to point to the current target
* configuration.
*/
private void cleanupDuplicateAndUnusedConfigs(ServiceSpec targetConfig, UUID targetConfigId) throws ConfigStoreException {
List<Protos.TaskInfo> taskInfosToUpdate = new ArrayList<>();
Set<UUID> neededConfigs = new HashSet<>();
neededConfigs.add(targetConfigId);
// Search task labels for configs which need to be cleaned up.
for (Protos.TaskInfo taskInfo : stateStore.fetchTasks()) {
final UUID taskConfigId;
try {
taskConfigId = new TaskLabelReader(taskInfo).getTargetConfiguration();
} catch (TaskException e) {
LOGGER.warn(String.format("Unable to extract configuration ID from task %s: %s", taskInfo.getName(), TextFormat.shortDebugString(taskInfo)), e);
continue;
}
if (taskConfigId.equals(targetConfigId)) {
LOGGER.info("Task {} configuration ID matches target: {}", taskInfo.getName(), taskConfigId);
} else {
try {
final ServiceSpec taskConfig = configStore.fetch(taskConfigId);
if (!needsConfigUpdate(taskInfo, targetConfig, taskConfig)) {
// Task is effectively already on the target config. Update task's config ID to match target,
// and allow the duplicate config to be dropped from configStore.
TaskInfo.Builder taskBuilder = taskInfo.toBuilder();
taskBuilder.setLabels(new TaskLabelWriter(taskInfo).setTargetConfiguration(targetConfigId).toProto());
taskInfosToUpdate.add(taskBuilder.build());
} else {
// Config isn't the same as the target. Refrain from updating task, mark config as 'needed'.
neededConfigs.add(taskConfigId);
}
} catch (Exception e) {
LOGGER.error(String.format("Failed to fetch configuration %s for task %s", taskConfigId, taskInfo.getName()), e);
// Cannot read this task's config. Do not delete the config.
neededConfigs.add(taskConfigId);
}
}
}
if (!taskInfosToUpdate.isEmpty()) {
LOGGER.info("Updating {} tasks in StateStore with target configuration ID {}", taskInfosToUpdate.size(), targetConfigId);
stateStore.storeTasks(taskInfosToUpdate);
}
Collection<UUID> configIds = configStore.list();
LOGGER.info("Testing deserialization of {} listed configurations before cleanup:", configIds.size());
for (UUID configId : configIds) {
try {
configStore.fetch(configId);
LOGGER.info("- {}: OK", configId);
} catch (Exception e) {
LOGGER.info("- {}: FAILED, leaving as-is: {}", configId, e.getMessage());
neededConfigs.add(configId);
}
}
clearConfigsNotListed(neededConfigs);
}
Aggregations