use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.
the class AutoscaleManager method getOrderedPipelines.
private List<Pod> getOrderedPipelines(List<Pod> items, KubernetesClient client) {
Map<Pod, Long> parentIds = new HashMap<>();
Map<Pod, Long> priorityScore = new HashMap<>();
List<Pod> checkedPods = new ArrayList<>();
for (Pod pod : items) {
Long runId = Long.parseLong(pod.getMetadata().getLabels().get(KubernetesConstants.RUN_ID_LABEL));
try {
PipelineRun run = pipelineRunManager.loadPipelineRun(runId);
if (run.getStatus().isFinal()) {
LOGGER.debug("Pipeline run {} is already in final status", runId);
continue;
}
if (run.getStatus() == TaskStatus.PAUSED) {
LOGGER.debug("Pipeline run {} is paused", runId);
continue;
}
List<PipelineRunParameter> runParameters = run.getPipelineRunParameters();
if (!preferenceManager.getPreference(SystemPreferences.CLUSTER_RANDOM_SCHEDULING)) {
getParentId(parentIds, pod, runParameters);
}
checkedPods.add(pod);
priorityScore.put(pod, getParameterValue(runParameters, "priority-score", 0L));
} catch (IllegalArgumentException e) {
LOGGER.error("Failed to load pipeline run {}.", runId);
LOGGER.error(e.getMessage(), e);
// If we failed to load a matching pipeline run for a pod, we delete it here, since
// PodMonitor wont't process it either
deletePod(pod, client);
removeNodeUpTask(runId);
}
}
if (!CollectionUtils.isEmpty(checkedPods)) {
checkedPods.sort((p1, p2) -> {
if (!preferenceManager.getPreference(SystemPreferences.CLUSTER_RANDOM_SCHEDULING)) {
Long parentId1 = parentIds.get(p1);
Long parentId2 = parentIds.get(p2);
if (!parentId1.equals(parentId2)) {
return Long.compare(parentId1, parentId2);
}
}
return Long.compare(priorityScore.get(p2), priorityScore.get(p1));
});
return checkedPods;
} else {
return Collections.emptyList();
}
}
use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.
the class NodesManager method terminateNode.
public NodeInstance terminateNode(String name) {
NodeInstance nodeInstance = getNode(name);
Assert.isTrue(!isNodeProtected(nodeInstance), messageHelper.getMessage(MessageConstants.ERROR_NODE_IS_PROTECTED, name));
if (nodeInstance.getPipelineRun() != null) {
PipelineRun run = nodeInstance.getPipelineRun();
pipelineRunManager.updatePipelineStatusIfNotFinal(run.getId(), TaskStatus.STOPPED, null);
}
Optional<NodeInstanceAddress> internalIP = nodeInstance.getAddresses().stream().filter(a -> a.getType() != null && a.getType().equalsIgnoreCase("internalip")).findAny();
if (internalIP.isPresent()) {
String command = buildCommand(internalIP.get().getAddress(), nodeInstance.getName(), nodeTerminateScript);
LOGGER.debug("Terminating node. Command: {}.", command);
cmdExecutor.executeCommand(command);
}
return nodeInstance;
}
use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.
the class ResourceMonitoringManager method processRuns.
private List<PipelineRun> processRuns(Map<String, PipelineRun> running, Map<String, Double> cpuMetrics, double idleCpuLevel, int actionTimeout, IdleRunAction action) {
List<PipelineRun> runsToUpdate = new ArrayList<>(running.size());
List<Pair<PipelineRun, Double>> runsToNotify = new ArrayList<>(running.size());
for (Map.Entry<String, PipelineRun> entry : running.entrySet()) {
PipelineRun run = entry.getValue();
if (run.isNonPause()) {
continue;
}
Double metric = cpuMetrics.get(entry.getKey());
if (metric != null) {
InstanceType type = instanceTypeMap.getOrDefault(run.getInstance().getNodeType(), InstanceType.builder().vCPU(1).build());
double cpuUsageRate = metric / MILLIS / type.getVCPU();
if (Precision.compareTo(cpuUsageRate, idleCpuLevel, ONE_THOUSANDTH) < 0) {
processIdleRun(run, actionTimeout, action, runsToNotify, runsToUpdate, cpuUsageRate);
} else if (run.getLastIdleNotificationTime() != null) {
// No action is longer needed, clear timeout
run.setLastIdleNotificationTime(null);
runsToUpdate.add(run);
}
}
}
notificationManager.notifyIdleRuns(runsToNotify, NotificationType.IDLE_RUN);
return runsToUpdate;
}
use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.
the class PodMonitor method savePodStatus.
private void savePodStatus(PipelineRun run, Pod pod, KubernetesClient client) {
StringBuilder status = new StringBuilder(run.getPodStatus() == null ? "" : run.getPodStatus());
if (pod == null) {
status.append(KubernetesConstants.NODE_LOST);
pipelineRunManager.updatePodStatus(run.getId(), status.toString());
} else {
List<ContainerStatus> containerStatuses = pod.getStatus().getContainerStatuses();
// if something happens in container
if (!CollectionUtils.isEmpty(containerStatuses)) {
status.append(containerStatuses.stream().filter(containerStatus -> containerStatus.getState() != null && containerStatus.getState().getTerminated() != null && containerStatus.getState().getTerminated().getExitCode() != 0).map(containerStatus -> String.format("%s (%s)", containerStatus.getState().getTerminated().getReason(), containerStatus.getState().getTerminated().getExitCode())).collect(Collectors.joining(",")));
}
if (StringUtils.isEmpty(status.toString())) {
return;
}
Node node = StringUtils.isBlank(run.getInstance().getNodeName()) ? null : client.nodes().withName(run.getInstance().getNodeName()).get();
if (node == null) {
node = findAvailableNodeByRunIdLabel(client, run.getId().toString());
}
if (node == null) {
pipelineRunManager.updatePodStatus(run.getId(), status.toString());
return;
}
pipelineRunManager.updatePodStatus(run.getId(), kubernetesManager.updateStatusWithNodeConditions(status, node));
}
}
use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.
the class PodMonitor method updateStatus.
/**
* Queries statuses of pods of running tasks and adjust task statuses corresponding to pods statuses
*/
public void updateStatus() {
LOGGER.debug(messageHelper.getMessage(MessageConstants.DEBUG_MONITOR_CHECK_RUNNING));
List<PipelineRun> running = pipelineRunManager.loadRunningAndTerminatedPipelineRuns();
for (PipelineRun run : running) {
if (!run.getExecutionPreferences().getEnvironment().isMonitored()) {
if (run.getStatus().isFinal()) {
run.setTerminating(false);
pipelineRunManager.updatePipelineStatus(run);
}
LOGGER.debug("Skipping run {} in exec environment {}", run.getId(), run.getExecutionPreferences().getEnvironment());
continue;
}
LOGGER.debug("RUN ID {} status {} terminating {}", run.getId(), run.getStatus(), run.isTerminating());
try (KubernetesClient client = kubernetesManager.getKubernetesClient()) {
Pod pod = client.pods().inNamespace(kubeNamespace).withName(run.getPodId()).get();
// check maybe run was already processed with master node
PipelineRun currentRunState = pipelineRunManager.loadPipelineRun(run.getId());
if (pod == null && currentRunState.getStatus().isFinal()) {
LOGGER.debug("Run ID {} is already in final status {}", run.getId(), currentRunState.getStatus());
setRunFinished(currentRunState, pod, client);
continue;
}
if (pod == null || run.isTerminating()) {
setRunFinished(run, pod, client);
} else {
PodStatus status = pod.getStatus();
// update pod IP, if it is not set yet
if (StringUtils.isEmpty(run.getPodIP())) {
if (StringUtils.isEmpty(status.getPodIP())) {
notifyIfExceedsThreshold(run, pod, NotificationType.LONG_INIT);
} else {
run.setPodIP(status.getPodIP());
pipelineRunManager.updatePodIP(run);
}
}
if (status.getPhase().equals(KubernetesConstants.POD_SUCCEEDED_PHASE)) {
run.setStatus(TaskStatus.SUCCESS);
run.setEndDate(DateUtils.now());
run.setTerminating(false);
// check that all tasks managed to reports its statuses
if (!checkChildrenPods(run, client, pod)) {
continue;
}
} else if (status.getPhase().equals(KubernetesConstants.POD_FAILED_PHASE) || (status.getReason() != null && status.getReason().equals(KubernetesConstants.NODE_LOST))) {
setRunFinished(run, pod, client);
} else {
notifyIfExceedsThreshold(run, pod, NotificationType.LONG_RUNNING);
continue;
}
}
pipelineRunManager.updatePipelineStatus(run);
} catch (Exception e) {
LOGGER.error(e.getMessage(), e);
}
}
LOGGER.debug(messageHelper.getMessage(MessageConstants.DEBUG_MONITOR_CHECK_FINISHED));
}
Aggregations