Search in sources :

Example 41 with PipelineRun

use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.

the class AutoscaleManager method getOrderedPipelines.

private List<Pod> getOrderedPipelines(List<Pod> items, KubernetesClient client) {
    Map<Pod, Long> parentIds = new HashMap<>();
    Map<Pod, Long> priorityScore = new HashMap<>();
    List<Pod> checkedPods = new ArrayList<>();
    for (Pod pod : items) {
        Long runId = Long.parseLong(pod.getMetadata().getLabels().get(KubernetesConstants.RUN_ID_LABEL));
        try {
            PipelineRun run = pipelineRunManager.loadPipelineRun(runId);
            if (run.getStatus().isFinal()) {
                LOGGER.debug("Pipeline run {} is already in final status", runId);
                continue;
            }
            if (run.getStatus() == TaskStatus.PAUSED) {
                LOGGER.debug("Pipeline run {} is paused", runId);
                continue;
            }
            List<PipelineRunParameter> runParameters = run.getPipelineRunParameters();
            if (!preferenceManager.getPreference(SystemPreferences.CLUSTER_RANDOM_SCHEDULING)) {
                getParentId(parentIds, pod, runParameters);
            }
            checkedPods.add(pod);
            priorityScore.put(pod, getParameterValue(runParameters, "priority-score", 0L));
        } catch (IllegalArgumentException e) {
            LOGGER.error("Failed to load pipeline run {}.", runId);
            LOGGER.error(e.getMessage(), e);
            // If we failed to load a matching pipeline run for a pod, we delete it here, since
            // PodMonitor wont't process it either
            deletePod(pod, client);
            removeNodeUpTask(runId);
        }
    }
    if (!CollectionUtils.isEmpty(checkedPods)) {
        checkedPods.sort((p1, p2) -> {
            if (!preferenceManager.getPreference(SystemPreferences.CLUSTER_RANDOM_SCHEDULING)) {
                Long parentId1 = parentIds.get(p1);
                Long parentId2 = parentIds.get(p2);
                if (!parentId1.equals(parentId2)) {
                    return Long.compare(parentId1, parentId2);
                }
            }
            return Long.compare(priorityScore.get(p2), priorityScore.get(p1));
        });
        return checkedPods;
    } else {
        return Collections.emptyList();
    }
}
Also used : PipelineRun(com.epam.pipeline.entity.pipeline.PipelineRun) Pod(io.fabric8.kubernetes.api.model.Pod) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) PipelineRunParameter(com.epam.pipeline.entity.pipeline.run.parameter.PipelineRunParameter)

Example 42 with PipelineRun

use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.

the class NodesManager method terminateNode.

public NodeInstance terminateNode(String name) {
    NodeInstance nodeInstance = getNode(name);
    Assert.isTrue(!isNodeProtected(nodeInstance), messageHelper.getMessage(MessageConstants.ERROR_NODE_IS_PROTECTED, name));
    if (nodeInstance.getPipelineRun() != null) {
        PipelineRun run = nodeInstance.getPipelineRun();
        pipelineRunManager.updatePipelineStatusIfNotFinal(run.getId(), TaskStatus.STOPPED, null);
    }
    Optional<NodeInstanceAddress> internalIP = nodeInstance.getAddresses().stream().filter(a -> a.getType() != null && a.getType().equalsIgnoreCase("internalip")).findAny();
    if (internalIP.isPresent()) {
        String command = buildCommand(internalIP.get().getAddress(), nodeInstance.getName(), nodeTerminateScript);
        LOGGER.debug("Terminating node. Command: {}.", command);
        cmdExecutor.executeCommand(command);
    }
    return nodeInstance;
}
Also used : PipelineRun(com.epam.pipeline.entity.pipeline.PipelineRun) Arrays(java.util.Arrays) MessageConstants(com.epam.pipeline.common.MessageConstants) PipelineRunManager(com.epam.pipeline.manager.pipeline.PipelineRunManager) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Value(org.springframework.beans.factory.annotation.Value) PipelineRun(com.epam.pipeline.entity.pipeline.PipelineRun) FilterPodsRequest(com.epam.pipeline.entity.cluster.FilterPodsRequest) Resource(io.fabric8.kubernetes.client.dsl.Resource) MessageHelper(com.epam.pipeline.common.MessageHelper) Propagation(org.springframework.transaction.annotation.Propagation) Service(org.springframework.stereotype.Service) NodeInstanceAddress(com.epam.pipeline.entity.cluster.NodeInstanceAddress) Map(java.util.Map) ClusterDao(com.epam.pipeline.dao.cluster.ClusterDao) DefaultKubernetesClient(io.fabric8.kubernetes.client.DefaultKubernetesClient) Node(io.fabric8.kubernetes.api.model.Node) MapUtils(org.apache.commons.collections4.MapUtils) Logger(org.slf4j.Logger) DoneableNode(io.fabric8.kubernetes.api.model.DoneableNode) PodInstance(com.epam.pipeline.entity.cluster.PodInstance) Predicate(java.util.function.Predicate) TaskStatus(com.epam.pipeline.entity.pipeline.TaskStatus) HashedMap(org.apache.commons.collections4.map.HashedMap) Collectors(java.util.stream.Collectors) Config(io.fabric8.kubernetes.client.Config) NodeInstance(com.epam.pipeline.entity.cluster.NodeInstance) List(java.util.List) StringUtils(com.amazonaws.util.StringUtils) KubernetesClient(io.fabric8.kubernetes.client.KubernetesClient) PostConstruct(javax.annotation.PostConstruct) CmdExecutor(com.epam.pipeline.manager.CmdExecutor) Optional(java.util.Optional) FilterNodesVO(com.epam.pipeline.controller.vo.FilterNodesVO) Collections(java.util.Collections) Transactional(org.springframework.transaction.annotation.Transactional) Assert(org.springframework.util.Assert) NodeInstanceAddress(com.epam.pipeline.entity.cluster.NodeInstanceAddress) NodeInstance(com.epam.pipeline.entity.cluster.NodeInstance)

Example 43 with PipelineRun

use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.

the class ResourceMonitoringManager method processRuns.

private List<PipelineRun> processRuns(Map<String, PipelineRun> running, Map<String, Double> cpuMetrics, double idleCpuLevel, int actionTimeout, IdleRunAction action) {
    List<PipelineRun> runsToUpdate = new ArrayList<>(running.size());
    List<Pair<PipelineRun, Double>> runsToNotify = new ArrayList<>(running.size());
    for (Map.Entry<String, PipelineRun> entry : running.entrySet()) {
        PipelineRun run = entry.getValue();
        if (run.isNonPause()) {
            continue;
        }
        Double metric = cpuMetrics.get(entry.getKey());
        if (metric != null) {
            InstanceType type = instanceTypeMap.getOrDefault(run.getInstance().getNodeType(), InstanceType.builder().vCPU(1).build());
            double cpuUsageRate = metric / MILLIS / type.getVCPU();
            if (Precision.compareTo(cpuUsageRate, idleCpuLevel, ONE_THOUSANDTH) < 0) {
                processIdleRun(run, actionTimeout, action, runsToNotify, runsToUpdate, cpuUsageRate);
            } else if (run.getLastIdleNotificationTime() != null) {
                // No action is longer needed, clear timeout
                run.setLastIdleNotificationTime(null);
                runsToUpdate.add(run);
            }
        }
    }
    notificationManager.notifyIdleRuns(runsToNotify, NotificationType.IDLE_RUN);
    return runsToUpdate;
}
Also used : PipelineRun(com.epam.pipeline.entity.pipeline.PipelineRun) ArrayList(java.util.ArrayList) InstanceType(com.epam.pipeline.entity.cluster.InstanceType) HashMap(java.util.HashMap) Map(java.util.Map) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 44 with PipelineRun

use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.

the class PodMonitor method savePodStatus.

private void savePodStatus(PipelineRun run, Pod pod, KubernetesClient client) {
    StringBuilder status = new StringBuilder(run.getPodStatus() == null ? "" : run.getPodStatus());
    if (pod == null) {
        status.append(KubernetesConstants.NODE_LOST);
        pipelineRunManager.updatePodStatus(run.getId(), status.toString());
    } else {
        List<ContainerStatus> containerStatuses = pod.getStatus().getContainerStatuses();
        // if something happens in container
        if (!CollectionUtils.isEmpty(containerStatuses)) {
            status.append(containerStatuses.stream().filter(containerStatus -> containerStatus.getState() != null && containerStatus.getState().getTerminated() != null && containerStatus.getState().getTerminated().getExitCode() != 0).map(containerStatus -> String.format("%s (%s)", containerStatus.getState().getTerminated().getReason(), containerStatus.getState().getTerminated().getExitCode())).collect(Collectors.joining(",")));
        }
        if (StringUtils.isEmpty(status.toString())) {
            return;
        }
        Node node = StringUtils.isBlank(run.getInstance().getNodeName()) ? null : client.nodes().withName(run.getInstance().getNodeName()).get();
        if (node == null) {
            node = findAvailableNodeByRunIdLabel(client, run.getId().toString());
        }
        if (node == null) {
            pipelineRunManager.updatePodStatus(run.getId(), status.toString());
            return;
        }
        pipelineRunManager.updatePodStatus(run.getId(), kubernetesManager.updateStatusWithNodeConditions(status, node));
    }
}
Also used : RunLogManager(com.epam.pipeline.manager.pipeline.RunLogManager) MessageConstants(com.epam.pipeline.common.MessageConstants) RestartRunManager(com.epam.pipeline.manager.pipeline.RestartRunManager) Date(java.util.Date) PipelineRunManager(com.epam.pipeline.manager.pipeline.PipelineRunManager) LoggerFactory(org.slf4j.LoggerFactory) ToolManager(com.epam.pipeline.manager.pipeline.ToolManager) SystemPreferences(com.epam.pipeline.manager.preference.SystemPreferences) Autowired(org.springframework.beans.factory.annotation.Autowired) StringUtils(org.apache.commons.lang3.StringUtils) CollectionUtils(org.apache.commons.collections4.CollectionUtils) ExecutionEnvironment(com.epam.pipeline.entity.configuration.ExecutionEnvironment) RunLog(com.epam.pipeline.entity.pipeline.RunLog) ArrayList(java.util.ArrayList) Value(org.springframework.beans.factory.annotation.Value) PipelineRun(com.epam.pipeline.entity.pipeline.PipelineRun) MessageHelper(com.epam.pipeline.common.MessageHelper) Service(org.springframework.stereotype.Service) Duration(java.time.Duration) NotificationType(com.epam.pipeline.entity.notification.NotificationSettings.NotificationType) ContainerStatus(io.fabric8.kubernetes.api.model.ContainerStatus) ConditionalOnProperty(org.springframework.boot.autoconfigure.condition.ConditionalOnProperty) Node(io.fabric8.kubernetes.api.model.Node) KubernetesClientException(io.fabric8.kubernetes.client.KubernetesClientException) DateUtils(com.epam.pipeline.entity.utils.DateUtils) PodStatus(io.fabric8.kubernetes.api.model.PodStatus) Logger(org.slf4j.Logger) NotificationSettingsManager(com.epam.pipeline.manager.notification.NotificationSettingsManager) RunInstance(com.epam.pipeline.entity.pipeline.RunInstance) TaskStatus(com.epam.pipeline.entity.pipeline.TaskStatus) AbstractSchedulingManager(com.epam.pipeline.manager.scheduling.AbstractSchedulingManager) Pod(io.fabric8.kubernetes.api.model.Pod) PipelineTask(com.epam.pipeline.entity.pipeline.PipelineTask) BlockingQueue(java.util.concurrent.BlockingQueue) StateReason(com.amazonaws.services.ec2.model.StateReason) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) List(java.util.List) PodList(io.fabric8.kubernetes.api.model.PodList) NotificationSettings(com.epam.pipeline.entity.notification.NotificationSettings) KubernetesClient(io.fabric8.kubernetes.client.KubernetesClient) PostConstruct(javax.annotation.PostConstruct) NotificationManager(com.epam.pipeline.manager.notification.NotificationManager) Optional(java.util.Optional) ContainerStatus(io.fabric8.kubernetes.api.model.ContainerStatus) Node(io.fabric8.kubernetes.api.model.Node)

Example 45 with PipelineRun

use of com.epam.pipeline.entity.pipeline.PipelineRun in project cloud-pipeline by epam.

the class PodMonitor method updateStatus.

/**
 * Queries statuses of pods of running tasks and adjust task statuses corresponding to pods statuses
 */
public void updateStatus() {
    LOGGER.debug(messageHelper.getMessage(MessageConstants.DEBUG_MONITOR_CHECK_RUNNING));
    List<PipelineRun> running = pipelineRunManager.loadRunningAndTerminatedPipelineRuns();
    for (PipelineRun run : running) {
        if (!run.getExecutionPreferences().getEnvironment().isMonitored()) {
            if (run.getStatus().isFinal()) {
                run.setTerminating(false);
                pipelineRunManager.updatePipelineStatus(run);
            }
            LOGGER.debug("Skipping run {} in exec environment {}", run.getId(), run.getExecutionPreferences().getEnvironment());
            continue;
        }
        LOGGER.debug("RUN ID {} status {} terminating {}", run.getId(), run.getStatus(), run.isTerminating());
        try (KubernetesClient client = kubernetesManager.getKubernetesClient()) {
            Pod pod = client.pods().inNamespace(kubeNamespace).withName(run.getPodId()).get();
            // check maybe run was already processed with master node
            PipelineRun currentRunState = pipelineRunManager.loadPipelineRun(run.getId());
            if (pod == null && currentRunState.getStatus().isFinal()) {
                LOGGER.debug("Run ID {} is already in final status {}", run.getId(), currentRunState.getStatus());
                setRunFinished(currentRunState, pod, client);
                continue;
            }
            if (pod == null || run.isTerminating()) {
                setRunFinished(run, pod, client);
            } else {
                PodStatus status = pod.getStatus();
                // update pod IP, if it is not set yet
                if (StringUtils.isEmpty(run.getPodIP())) {
                    if (StringUtils.isEmpty(status.getPodIP())) {
                        notifyIfExceedsThreshold(run, pod, NotificationType.LONG_INIT);
                    } else {
                        run.setPodIP(status.getPodIP());
                        pipelineRunManager.updatePodIP(run);
                    }
                }
                if (status.getPhase().equals(KubernetesConstants.POD_SUCCEEDED_PHASE)) {
                    run.setStatus(TaskStatus.SUCCESS);
                    run.setEndDate(DateUtils.now());
                    run.setTerminating(false);
                    // check that all tasks managed to reports its statuses
                    if (!checkChildrenPods(run, client, pod)) {
                        continue;
                    }
                } else if (status.getPhase().equals(KubernetesConstants.POD_FAILED_PHASE) || (status.getReason() != null && status.getReason().equals(KubernetesConstants.NODE_LOST))) {
                    setRunFinished(run, pod, client);
                } else {
                    notifyIfExceedsThreshold(run, pod, NotificationType.LONG_RUNNING);
                    continue;
                }
            }
            pipelineRunManager.updatePipelineStatus(run);
        } catch (Exception e) {
            LOGGER.error(e.getMessage(), e);
        }
    }
    LOGGER.debug(messageHelper.getMessage(MessageConstants.DEBUG_MONITOR_CHECK_FINISHED));
}
Also used : PipelineRun(com.epam.pipeline.entity.pipeline.PipelineRun) PodStatus(io.fabric8.kubernetes.api.model.PodStatus) KubernetesClient(io.fabric8.kubernetes.client.KubernetesClient) Pod(io.fabric8.kubernetes.api.model.Pod) KubernetesClientException(io.fabric8.kubernetes.client.KubernetesClientException)

Aggregations

PipelineRun (com.epam.pipeline.entity.pipeline.PipelineRun)88 Test (org.junit.Test)31 Transactional (org.springframework.transaction.annotation.Transactional)29 AbstractSpringTest (com.epam.pipeline.AbstractSpringTest)23 EnvVarsBuilderTest (com.epam.pipeline.manager.execution.EnvVarsBuilderTest)22 ArrayList (java.util.ArrayList)18 RunInstance (com.epam.pipeline.entity.pipeline.RunInstance)17 Date (java.util.Date)15 Pipeline (com.epam.pipeline.entity.pipeline.Pipeline)14 List (java.util.List)14 PipelineConfiguration (com.epam.pipeline.entity.configuration.PipelineConfiguration)13 Map (java.util.Map)11 LocalDateTime (java.time.LocalDateTime)10 Arrays (java.util.Arrays)10 Collectors (java.util.stream.Collectors)10 PipelineUser (com.epam.pipeline.entity.user.PipelineUser)9 Collections (java.util.Collections)9 HashMap (java.util.HashMap)9 MessageHelper (com.epam.pipeline.common.MessageHelper)8 RunSid (com.epam.pipeline.entity.pipeline.run.parameter.RunSid)8