use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.
the class AutoscaleManagerTest method testAutoChangeToSpot.
@Test
public void testAutoChangeToSpot() {
when(clusterManager.scaleUp(Mockito.eq(TEST_RUN_ID.toString()), argThat(Matchers.hasProperty("spot", Matchers.is(true))))).thenThrow(new CmdExecutionException("", 5, ""));
// this time spot scheduling should fail
autoscaleManager.runAutoscaling();
verify(clusterManager).scaleUp(Mockito.eq(TEST_RUN_ID.toString()), argThat(Matchers.hasProperty("spot", Matchers.is(true))));
// this time it should be a on-demand request
autoscaleManager.runAutoscaling();
verify(clusterManager, times(2)).scaleUp(Mockito.eq(TEST_RUN_ID.toString()), argThat(Matchers.hasProperty("spot", Matchers.is(false))));
}
use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.
the class CmdExecutor method executeCommand.
public String executeCommand(String command, String[] envVars, File context, boolean silent) {
StringBuilder output = new StringBuilder();
StringBuilder errors = new StringBuilder();
Process p;
try {
p = Runtime.getRuntime().exec(command, envVars, context);
Thread stdReader = new Thread(() -> readOutputStream(command, output, new InputStreamReader(p.getInputStream())));
Thread errReader = new Thread(() -> readOutputStream(command, errors, new InputStreamReader(p.getErrorStream())));
stdReader.start();
errReader.start();
int exitCode = p.waitFor();
stdReader.join();
errReader.join();
if (exitCode != 0) {
if (!silent) {
LOGGER.error("Command '{}' err output: {}.", command, errors.toString());
}
throw new CmdExecutionException(command, exitCode, errors.toString());
}
} catch (IOException e) {
throw new CmdExecutionException(command, e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new CmdExecutionException(command, e);
}
return output.toString();
}
use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.
the class AutoscaleManager method processPod.
private void processPod(Pod pod, KubernetesClient client, Set<String> scheduledRuns, List<CompletableFuture<Void>> tasks, Set<String> allPods, Set<String> nodes, Set<String> reassignedNodes) {
LOGGER.debug("Found an unscheduled pod: {}.", pod.getMetadata().getName());
Map<String, String> labels = pod.getMetadata().getLabels();
String runId = labels.get(KubernetesConstants.RUN_ID_LABEL);
long longId = Long.parseLong(runId);
if (nodeUpTaskInProgress.contains(longId)) {
LOGGER.debug("Nodeup task for ID {} is already in progress.", runId);
return;
}
// Check whether node with required RunID is available
if (nodes.contains(runId)) {
LOGGER.debug("Node with required ID {} already exists.", runId);
return;
}
// check max nodeup retry count
// TODO: should we lock here?
int retryCount = nodeUpAttempts.getOrDefault(longId, 0);
int nodeUpRetryCount = preferenceManager.getPreference(SystemPreferences.CLUSTER_NODEUP_RETRY_COUNT);
if (retryCount >= nodeUpRetryCount) {
LOGGER.debug("Exceeded max nodeup attempts ({}) for run ID {}. Setting run status 'FAILURE'.", retryCount, runId);
pipelineRunManager.updatePipelineStatusIfNotFinal(longId, TaskStatus.FAILURE, new Date());
removeNodeUpTask(longId);
return;
}
try {
RunInstance requiredInstance = getNewRunInstance(runId);
// check whether aws instance already exists
RunInstance awsInstance = clusterManager.describeInstance(runId, requiredInstance);
if (awsInstance != null && awsInstance.getNodeId() != null) {
LOGGER.debug("Found {} instance for run ID {}.", awsInstance.getNodeId(), runId);
createNodeForRun(tasks, runId, requiredInstance);
return;
}
List<String> freeNodes = nodes.stream().filter(nodeId -> !allPods.contains(nodeId) && !reassignedNodes.contains(nodeId) && isNodeAvailable(client, nodeId)).collect(Collectors.toList());
LOGGER.debug("Found {} free nodes.", freeNodes.size());
// Try to reassign one of idle nodes
for (String previousId : freeNodes) {
LOGGER.debug("Found free node ID {}.", previousId);
RunInstance previousInstance = getPreviousRunInstance(previousId);
if (clusterManager.requirementsMatch(requiredInstance, previousInstance)) {
LOGGER.debug("Reassigning node ID {} to run {}.", previousId, runId);
boolean successfullyReassigned = clusterManager.reassignNode(previousId, runId);
if (successfullyReassigned) {
scheduledRuns.add(runId);
pipelineRunManager.updateRunInstance(longId, previousInstance);
reassignedNodes.add(previousId);
return;
}
}
}
// Check max cluster capacity
int currentClusterSize = getCurrentClusterSize(client);
NodeList nodeList = getAvailableNodes(client);
Integer maxClusterSize = preferenceManager.getPreference(SystemPreferences.CLUSTER_MAX_SIZE);
if (currentClusterSize > maxClusterSize) {
LOGGER.debug("Exceeded maximum cluster size {} - current size {}.", maxClusterSize, currentClusterSize);
return;
}
if (currentClusterSize == maxClusterSize && preferenceManager.getPreference(SystemPreferences.CLUSTER_KILL_NOT_MATCHING_NODES)) {
LOGGER.debug("Current cluster size {} has reached limit {}. Checking free nodes.", currentClusterSize, maxClusterSize);
List<String> nonMatchingFreeNodes = freeNodes.stream().filter(id -> !reassignedNodes.contains(id)).collect(Collectors.toList());
if (!CollectionUtils.isEmpty(nonMatchingFreeNodes)) {
String nodeId = nonMatchingFreeNodes.get(0);
// to remove node from free
reassignedNodes.add(nodeId);
LOGGER.debug("Scaling down unused node {}.", nodeId);
clusterManager.scaleDown(nodeId);
} else {
LOGGER.debug("Exceeded maximum cluster size {}.", nodeList.getItems().size() + nodeUpTaskInProgress.size());
LOGGER.debug("Leaving pending run {}.", runId);
return;
}
}
int nodeUpTasksSize = nodeUpTaskInProgress.size();
int maxNodeUpThreads = preferenceManager.getPreference(SystemPreferences.CLUSTER_NODEUP_MAX_THREADS);
if (nodeUpTasksSize >= maxNodeUpThreads) {
LOGGER.debug("Exceeded maximum node up tasks queue size {}.", nodeUpTasksSize);
return;
}
scheduledRuns.add(runId);
createNodeForRun(tasks, runId, requiredInstance);
} catch (GitClientException | CmdExecutionException | IllegalArgumentException e) {
LOGGER.error("Failed to create node for run {}.", runId);
LOGGER.error("Failed to get pipeline configuration: " + e.getMessage(), e);
}
}
use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.
the class NFSStorageProvider method mount.
private synchronized File mount(NFSDataStorage dataStorage) {
File mntDir = Paths.get(rootMountPoint, getMountDirName(dataStorage.getPath())).toFile();
try {
if (!mntDir.exists()) {
Assert.isTrue(mntDir.mkdirs(), messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT_DIRECTORY_NOT_CREATED));
String rootNfsPath = getNfsRootPath(dataStorage.getPath());
String mountOptions = String.format(DEFAULT_NFS_OPTIONS_PATTERN, rsize, wsize);
String mountCmd = String.format(NFS_MOUNT_CMD_PATTERN, mountOptions, rootNfsPath, mntDir.getAbsolutePath());
try {
cmdExecutor.executeCommand(mountCmd);
} catch (CmdExecutionException e) {
FileUtils.deleteDirectory(mntDir);
LOGGER.error(messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT_2, mountCmd, e.getMessage()));
throw new DataStorageException(messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT, dataStorage.getName(), dataStorage.getPath()), e);
}
}
} catch (IOException e) {
throw new DataStorageException(messageHelper.getMessage(messageHelper.getMessage(MessageConstants.ERROR_DATASTORAGE_NFS_MOUNT, dataStorage.getName(), dataStorage.getPath())), e);
}
String storageName = getStorageName(dataStorage.getPath());
return new File(mntDir, storageName);
}
use of com.epam.pipeline.exception.CmdExecutionException in project cloud-pipeline by epam.
the class DockerContainerOperationManager method commitContainer.
public PipelineRun commitContainer(PipelineRun run, DockerRegistry registry, String newImageName, boolean clearContainer, boolean stopPipeline) {
final String containerId = kubernetesManager.getContainerIdFromKubernetesPod(run.getPodId(), run.getDockerImage());
final String apiToken = authManager.issueTokenForCurrentUser(null).getToken();
String dockerLogin;
String dockerPassword;
// Let's use pipe auth if it's enabled for registry
if (registry.isPipelineAuth()) {
dockerLogin = authManager.getAuthorizedUser();
dockerPassword = apiToken;
} else {
dockerLogin = registry.getUserName() == null ? EMPTY : registry.getUserName();
dockerPassword = registry.getPassword() == null ? EMPTY : registry.getPassword();
}
if (newImageName.startsWith(registry.getPath())) {
newImageName = newImageName.replace(registry.getPath() + DELIMITER, EMPTY);
}
Matcher matcher = GROUP_AND_IMAGE.matcher(newImageName);
Assert.isTrue(matcher.find(), messageHelper.getMessage(MessageConstants.ERROR_TOOL_GROUP_IS_NOT_PROVIDED, newImageName));
String toolGroupName = matcher.group(1);
ToolGroup toolGroup = toolGroupManager.loadByNameOrId(registry.getPath() + DELIMITER + toolGroupName);
try {
Assert.notNull(containerId, messageHelper.getMessage(MessageConstants.ERROR_CONTAINER_ID_FOR_RUN_NOT_FOUND, run.getId()));
String commitContainerCommand = String.format(COMMIT_COMMAND_TEMPLATE, commitRunStarterScriptUrl, preferenceManager.getPreference(SystemPreferences.BASE_API_HOST), apiToken, commitScriptsDistributionsUrl, preferenceManager.getPreference(SystemPreferences.BASE_PIPE_DISTR_URL), run.getId(), containerId, clearContainer, stopPipeline, preferenceManager.getPreference(SystemPreferences.COMMIT_TIMEOUT), registry.getPath(), registry.getId(), toolGroup.getId(), newImageName, dockerLogin, dockerPassword, registry.isPipelineAuth());
Process sshConnection = submitCommandViaSSH(run.getInstance().getNodeIP(), commitContainerCommand);
boolean isFinished = sshConnection.waitFor(preferenceManager.getPreference(SystemPreferences.COMMIT_TIMEOUT), TimeUnit.SECONDS);
Assert.state(isFinished && sshConnection.exitValue() == 0, messageHelper.getMessage(MessageConstants.ERROR_RUN_PIPELINES_COMMIT_FAILED, run.getId()));
} catch (IllegalStateException | IllegalArgumentException | IOException e) {
LOGGER.error(e.getMessage());
updatePipelineRunCommitStatus(run, CommitStatus.FAILURE);
throw new CmdExecutionException(COMMIT_COMMAND_DESCRIPTION, e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
updatePipelineRunCommitStatus(run, CommitStatus.FAILURE);
throw new CmdExecutionException(COMMIT_COMMAND_DESCRIPTION, e);
}
updatePipelineRunCommitStatus(run, CommitStatus.COMMITTING);
return run;
}
Aggregations