Search in sources :

Example 1 with CmdExecutionException

use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.

the class AutoscaleManagerTest method testAutoChangeToSpot.

@Test
public void testAutoChangeToSpot() {
    when(clusterManager.scaleUp(Mockito.eq(TEST_RUN_ID.toString()), argThat(Matchers.hasProperty("spot", Matchers.is(true))))).thenThrow(new CmdExecutionException("", 5, ""));
    // this time spot scheduling should fail
    autoscaleManager.runAutoscaling();
    verify(clusterManager).scaleUp(Mockito.eq(TEST_RUN_ID.toString()), argThat(Matchers.hasProperty("spot", Matchers.is(true))));
    // this time it should be a on-demand request
    autoscaleManager.runAutoscaling();
    verify(clusterManager, times(2)).scaleUp(Mockito.eq(TEST_RUN_ID.toString()), argThat(Matchers.hasProperty("spot", Matchers.is(false))));
}
Also used : CmdExecutionException(com.epam.pipeline.exception.CmdExecutionException) Test(org.junit.Test)

Example 2 with CmdExecutionException

use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.

the class CmdExecutor method executeCommand.

public String executeCommand(String command, String[] envVars, File context, boolean silent) {
    StringBuilder output = new StringBuilder();
    StringBuilder errors = new StringBuilder();
    Process p;
    try {
        p = Runtime.getRuntime().exec(command, envVars, context);
        Thread stdReader = new Thread(() -> readOutputStream(command, output, new InputStreamReader(p.getInputStream())));
        Thread errReader = new Thread(() -> readOutputStream(command, errors, new InputStreamReader(p.getErrorStream())));
        stdReader.start();
        errReader.start();
        int exitCode = p.waitFor();
        stdReader.join();
        errReader.join();
        if (exitCode != 0) {
            if (!silent) {
                LOGGER.error("Command '{}' err output: {}.", command, errors.toString());
            }
            throw new CmdExecutionException(command, exitCode, errors.toString());
        }
    } catch (IOException e) {
        throw new CmdExecutionException(command, e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new CmdExecutionException(command, e);
    }
    return output.toString();
}
Also used : InputStreamReader(java.io.InputStreamReader) CmdExecutionException(com.epam.pipeline.exception.CmdExecutionException) IOException(java.io.IOException)

Example 3 with CmdExecutionException

use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.

the class AutoscaleManager method processPod.

private void processPod(Pod pod, KubernetesClient client, Set<String> scheduledRuns, List<CompletableFuture<Void>> tasks, Set<String> allPods, Set<String> nodes, Set<String> reassignedNodes) {
    LOGGER.debug("Found an unscheduled pod: {}.", pod.getMetadata().getName());
    Map<String, String> labels = pod.getMetadata().getLabels();
    String runId = labels.get(KubernetesConstants.RUN_ID_LABEL);
    long longId = Long.parseLong(runId);
    if (nodeUpTaskInProgress.contains(longId)) {
        LOGGER.debug("Nodeup task for ID {} is already in progress.", runId);
        return;
    }
    // Check whether node with required RunID is available
    if (nodes.contains(runId)) {
        LOGGER.debug("Node with required ID {} already exists.", runId);
        return;
    }
    // check max nodeup retry count
    // TODO: should we lock here?
    int retryCount = nodeUpAttempts.getOrDefault(longId, 0);
    int nodeUpRetryCount = preferenceManager.getPreference(SystemPreferences.CLUSTER_NODEUP_RETRY_COUNT);
    if (retryCount >= nodeUpRetryCount) {
        LOGGER.debug("Exceeded max nodeup attempts ({}) for run ID {}. Setting run status 'FAILURE'.", retryCount, runId);
        pipelineRunManager.updatePipelineStatusIfNotFinal(longId, TaskStatus.FAILURE, new Date());
        removeNodeUpTask(longId);
        return;
    }
    try {
        RunInstance requiredInstance = getNewRunInstance(runId);
        // check whether aws instance already exists
        RunInstance awsInstance = clusterManager.describeInstance(runId, requiredInstance);
        if (awsInstance != null && awsInstance.getNodeId() != null) {
            LOGGER.debug("Found {} instance for run ID {}.", awsInstance.getNodeId(), runId);
            createNodeForRun(tasks, runId, requiredInstance);
            return;
        }
        List<String> freeNodes = nodes.stream().filter(nodeId -> !allPods.contains(nodeId) && !reassignedNodes.contains(nodeId) && isNodeAvailable(client, nodeId)).collect(Collectors.toList());
        LOGGER.debug("Found {} free nodes.", freeNodes.size());
        // Try to reassign one of idle nodes
        for (String previousId : freeNodes) {
            LOGGER.debug("Found free node ID {}.", previousId);
            RunInstance previousInstance = getPreviousRunInstance(previousId);
            if (clusterManager.requirementsMatch(requiredInstance, previousInstance)) {
                LOGGER.debug("Reassigning node ID {} to run {}.", previousId, runId);
                boolean successfullyReassigned = clusterManager.reassignNode(previousId, runId);
                if (successfullyReassigned) {
                    scheduledRuns.add(runId);
                    pipelineRunManager.updateRunInstance(longId, previousInstance);
                    reassignedNodes.add(previousId);
                    return;
                }
            }
        }
        // Check max cluster capacity
        int currentClusterSize = getCurrentClusterSize(client);
        NodeList nodeList = getAvailableNodes(client);
        Integer maxClusterSize = preferenceManager.getPreference(SystemPreferences.CLUSTER_MAX_SIZE);
        if (currentClusterSize > maxClusterSize) {
            LOGGER.debug("Exceeded maximum cluster size {} - current size {}.", maxClusterSize, currentClusterSize);
            return;
        }
        if (currentClusterSize == maxClusterSize && preferenceManager.getPreference(SystemPreferences.CLUSTER_KILL_NOT_MATCHING_NODES)) {
            LOGGER.debug("Current cluster size {} has reached limit {}. Checking free nodes.", currentClusterSize, maxClusterSize);
            List<String> nonMatchingFreeNodes = freeNodes.stream().filter(id -> !reassignedNodes.contains(id)).collect(Collectors.toList());
            if (!CollectionUtils.isEmpty(nonMatchingFreeNodes)) {
                String nodeId = nonMatchingFreeNodes.get(0);
                // to remove node from free
                reassignedNodes.add(nodeId);
                LOGGER.debug("Scaling down unused node {}.", nodeId);
                clusterManager.scaleDown(nodeId);
            } else {
                LOGGER.debug("Exceeded maximum cluster size {}.", nodeList.getItems().size() + nodeUpTaskInProgress.size());
                LOGGER.debug("Leaving pending run {}.", runId);
                return;
            }
        }
        int nodeUpTasksSize = nodeUpTaskInProgress.size();
        int maxNodeUpThreads = preferenceManager.getPreference(SystemPreferences.CLUSTER_NODEUP_MAX_THREADS);
        if (nodeUpTasksSize >= maxNodeUpThreads) {
            LOGGER.debug("Exceeded maximum node up tasks queue size {}.", nodeUpTasksSize);
            return;
        }
        scheduledRuns.add(runId);
        createNodeForRun(tasks, runId, requiredInstance);
    } catch (GitClientException | CmdExecutionException | IllegalArgumentException e) {
        LOGGER.error("Failed to create node for run {}.", runId);
        LOGGER.error("Failed to get pipeline configuration: " + e.getMessage(), e);
    }
}
Also used : GitClientException(com.epam.pipeline.exception.git.GitClientException) Date(java.util.Date) PipelineRunManager(com.epam.pipeline.manager.pipeline.PipelineRunManager) LoggerFactory(org.slf4j.LoggerFactory) SystemPreferences(com.epam.pipeline.manager.preference.SystemPreferences) Autowired(org.springframework.beans.factory.annotation.Autowired) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) CollectionUtils(org.apache.commons.collections4.CollectionUtils) ArrayList(java.util.ArrayList) Value(org.springframework.beans.factory.annotation.Value) HashSet(java.util.HashSet) PipelineRun(com.epam.pipeline.entity.pipeline.PipelineRun) ParallelExecutorService(com.epam.pipeline.manager.parallel.ParallelExecutorService) Service(org.springframework.stereotype.Service) Duration(java.time.Duration) Map(java.util.Map) ConditionalOnProperty(org.springframework.boot.autoconfigure.condition.ConditionalOnProperty) PipelineConfiguration(com.epam.pipeline.entity.configuration.PipelineConfiguration) Node(io.fabric8.kubernetes.api.model.Node) PodCondition(io.fabric8.kubernetes.api.model.PodCondition) KubernetesClientException(io.fabric8.kubernetes.client.KubernetesClientException) NodeCondition(io.fabric8.kubernetes.api.model.NodeCondition) PreferenceManager(com.epam.pipeline.manager.preference.PreferenceManager) Logger(org.slf4j.Logger) PipelineRunParameter(com.epam.pipeline.entity.pipeline.run.parameter.PipelineRunParameter) RunInstance(com.epam.pipeline.entity.pipeline.RunInstance) TaskStatus(com.epam.pipeline.entity.pipeline.TaskStatus) AbstractSchedulingManager(com.epam.pipeline.manager.scheduling.AbstractSchedulingManager) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Pod(io.fabric8.kubernetes.api.model.Pod) Set(java.util.Set) Instant(java.time.Instant) HasMetadata(io.fabric8.kubernetes.api.model.HasMetadata) Collectors(java.util.stream.Collectors) NodeList(io.fabric8.kubernetes.api.model.NodeList) Objects(java.util.Objects) Config(io.fabric8.kubernetes.client.Config) List(java.util.List) CmdExecutionException(com.epam.pipeline.exception.CmdExecutionException) PodList(io.fabric8.kubernetes.api.model.PodList) KubernetesClient(io.fabric8.kubernetes.client.KubernetesClient) NumberUtils(org.apache.commons.lang3.math.NumberUtils) PostConstruct(javax.annotation.PostConstruct) Optional(java.util.Optional) Collections(java.util.Collections) NodeList(io.fabric8.kubernetes.api.model.NodeList) GitClientException(com.epam.pipeline.exception.git.GitClientException) RunInstance(com.epam.pipeline.entity.pipeline.RunInstance) Date(java.util.Date) CmdExecutionException(com.epam.pipeline.exception.CmdExecutionException)

Example 4 with CmdExecutionException

use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.

the class NFSStorageProvider method mount.

private synchronized File mount(NFSDataStorage dataStorage) {
    File mntDir = Paths.get(rootMountPoint, getMountDirName(dataStorage.getPath())).toFile();
    try {
        if (!mntDir.exists()) {
            Assert.isTrue(mntDir.mkdirs(), messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT_DIRECTORY_NOT_CREATED));
            String rootNfsPath = getNfsRootPath(dataStorage.getPath());
            String mountOptions = String.format(DEFAULT_NFS_OPTIONS_PATTERN, rsize, wsize);
            String mountCmd = String.format(NFS_MOUNT_CMD_PATTERN, mountOptions, rootNfsPath, mntDir.getAbsolutePath());
            try {
                cmdExecutor.executeCommand(mountCmd);
            } catch (CmdExecutionException e) {
                FileUtils.deleteDirectory(mntDir);
                LOGGER.error(messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT_2, mountCmd, e.getMessage()));
                throw new DataStorageException(messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT, dataStorage.getName(), dataStorage.getPath()), e);
            }
        }
    } catch (IOException e) {
        throw new DataStorageException(messageHelper.getMessage(messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT, dataStorage.getName(), dataStorage.getPath())), e);
    }
    String storageName = getStorageName(dataStorage.getPath());
    return new File(mntDir, storageName);
}
Also used : DataStorageException(com.epam.pipeline.entity.datastorage.DataStorageException) CmdExecutionException(com.epam.pipeline.exception.CmdExecutionException) IOException(java.io.IOException) DataStorageFile(com.epam.pipeline.entity.datastorage.DataStorageFile) File(java.io.File)

Example 5 with CmdExecutionException

use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.

the class DockerContainerOperationManager method commitContainer.

public PipelineRun commitContainer(PipelineRun run, DockerRegistry registry, String newImageName, boolean clearContainer, boolean stopPipeline) {
    final String containerId = kubernetesManager.getContainerIdFromKubernetesPod(run.getPodId(), run.getDockerImage());
    final String apiToken = authManager.issueTokenForCurrentUser(null).getToken();
    String dockerLogin;
    String dockerPassword;
    // Let's use pipe auth if it's enabled for registry
    if (registry.isPipelineAuth()) {
        dockerLogin = authManager.getAuthorizedUser();
        dockerPassword = apiToken;
    } else {
        dockerLogin = registry.getUserName() == null ? EMPTY : registry.getUserName();
        dockerPassword = registry.getPassword() == null ? EMPTY : registry.getPassword();
    }
    if (newImageName.startsWith(registry.getPath())) {
        newImageName = newImageName.replace(registry.getPath() + DELIMITER, EMPTY);
    }
    Matcher matcher = GROUP_AND_IMAGE.matcher(newImageName);
    Assert.isTrue(matcher.find(), messageHelper.getMessage(MessageConstants.ERROR_TOOL_GROUP_IS_NOT_PROVIDED, newImageName));
    String toolGroupName = matcher.group(1);
    ToolGroup toolGroup = toolGroupManager.loadByNameOrId(registry.getPath() + DELIMITER + toolGroupName);
    try {
        Assert.notNull(containerId, messageHelper.getMessage(MessageConstants.ERROR_CONTAINER_ID_FOR_RUN_NOT_FOUND, run.getId()));
        String commitContainerCommand = String.format(COMMIT_COMMAND_TEMPLATE, commitRunStarterScriptUrl, preferenceManager.getPreference(SystemPreferences.BASE_API_HOST), apiToken, commitScriptsDistributionsUrl, preferenceManager.getPreference(SystemPreferences.BASE_PIPE_DISTR_URL), run.getId(), containerId, clearContainer, stopPipeline, preferenceManager.getPreference(SystemPreferences.COMMIT_TIMEOUT), registry.getPath(), registry.getId(), toolGroup.getId(), newImageName, dockerLogin, dockerPassword, registry.isPipelineAuth());
        Process sshConnection = submitCommandViaSSH(run.getInstance().getNodeIP(), commitContainerCommand);
        boolean isFinished = sshConnection.waitFor(preferenceManager.getPreference(SystemPreferences.COMMIT_TIMEOUT), TimeUnit.SECONDS);
        Assert.state(isFinished && sshConnection.exitValue() == 0, messageHelper.getMessage(MessageConstants.ERROR_RUN_PIPELINES_COMMIT_FAILED, run.getId()));
    } catch (IllegalStateException | IllegalArgumentException | IOException e) {
        LOGGER.error(e.getMessage());
        updatePipelineRunCommitStatus(run, CommitStatus.FAILURE);
        throw new CmdExecutionException(COMMIT_COMMAND_DESCRIPTION, e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        updatePipelineRunCommitStatus(run, CommitStatus.FAILURE);
        throw new CmdExecutionException(COMMIT_COMMAND_DESCRIPTION, e);
    }
    updatePipelineRunCommitStatus(run, CommitStatus.COMMITTING);
    return run;
}
Also used : ToolGroup(com.epam.pipeline.entity.pipeline.ToolGroup) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) CmdExecutionException(com.epam.pipeline.exception.CmdExecutionException)

Aggregations

CmdExecutionException (com.epam.pipeline.exception.CmdExecutionException)7 IOException (java.io.IOException)3 PipelineConfiguration (com.epam.pipeline.entity.configuration.PipelineConfiguration)2 PipelineRun (com.epam.pipeline.entity.pipeline.PipelineRun)2 RunInstance (com.epam.pipeline.entity.pipeline.RunInstance)2 TaskStatus (com.epam.pipeline.entity.pipeline.TaskStatus)2 PipelineRunParameter (com.epam.pipeline.entity.pipeline.run.parameter.PipelineRunParameter)2 GitClientException (com.epam.pipeline.exception.git.GitClientException)2 ParallelExecutorService (com.epam.pipeline.manager.parallel.ParallelExecutorService)2 PipelineRunManager (com.epam.pipeline.manager.pipeline.PipelineRunManager)2 PreferenceManager (com.epam.pipeline.manager.preference.PreferenceManager)2 SystemPreferences (com.epam.pipeline.manager.preference.SystemPreferences)2 AbstractSchedulingManager (com.epam.pipeline.manager.scheduling.AbstractSchedulingManager)2 HasMetadata (io.fabric8.kubernetes.api.model.HasMetadata)2 Node (io.fabric8.kubernetes.api.model.Node)2 NodeCondition (io.fabric8.kubernetes.api.model.NodeCondition)2 NodeList (io.fabric8.kubernetes.api.model.NodeList)2 Pod (io.fabric8.kubernetes.api.model.Pod)2 PodCondition (io.fabric8.kubernetes.api.model.PodCondition)2 PodList (io.fabric8.kubernetes.api.model.PodList)2