use of com.epam.pipeline.entity.pipeline.RunInstance in project cloud-pipeline by epam.
the class PipelineRunManager method createRestartRun.
private PipelineRun createRestartRun(final PipelineRun run) {
PipelineRun restartedRun = new PipelineRun();
Long runId = pipelineRunDao.createRunId();
restartedRun.setId(runId);
restartedRun.setStartDate(DateUtils.now());
Optional<Pipeline> pipeline = Optional.ofNullable(run.getPipelineId()).map(pipelineId -> pipelineManager.load(pipelineId));
pipeline.ifPresent(p -> {
restartedRun.setPipelineName(p.getName());
restartedRun.setRepository(p.getRepository());
restartedRun.setPipelineId(p.getId());
restartedRun.setVersion(run.getVersion());
restartedRun.setRevisionName(gitManager.getRevisionName(run.getVersion()));
});
if (!pipeline.isPresent()) {
fillMissingPipelineFields(restartedRun);
}
restartedRun.setStatus(TaskStatus.RUNNING);
restartedRun.setCommitStatus(CommitStatus.NOT_COMMITTED);
restartedRun.setLastChangeCommitTime(DateUtils.now());
restartedRun.setPodId(getRootPodIDFromPipeline(restartedRun));
restartedRun.setParams(run.getParams());
restartedRun.parseParameters();
restartedRun.setTimeout(run.getTimeout());
restartedRun.setDockerImage(run.getDockerImage());
restartedRun.setCmdTemplate(run.getCmdTemplate());
restartedRun.setNodeCount(run.getNodeCount());
RunInstance instance = copyInstance(run.getInstance());
restartedRun.setInstance(instance);
setRunPrice(instance, restartedRun);
restartedRun.setSshPassword(PasswordGenerator.generatePassword());
restartedRun.setOwner(run.getOwner());
restartedRun.setEntitiesIds(run.getEntitiesIds());
restartedRun.setConfigurationId(run.getConfigurationId());
restartedRun.setExecutionPreferences(run.getExecutionPreferences());
restartedRun.setRunSids(run.getRunSids());
return restartedRun;
}
use of com.epam.pipeline.entity.pipeline.RunInstance in project cloud-pipeline by epam.
the class PipelineRunDaoTest method createPipelineRun.
private PipelineRun createPipelineRun(Long pipelineId, String params, TaskStatus status, Long parentRunId, Long entitiesId, Boolean isSpot, Long configurationId, List<RunSid> runSids) {
PipelineRun run = new PipelineRun();
run.setPipelineId(pipelineId);
run.setVersion(TEST_REVISION_1);
run.setStartDate(new Date());
run.setEndDate(new Date());
run.setStatus(status);
run.setCommitStatus(CommitStatus.NOT_COMMITTED);
run.setLastChangeCommitTime(new Date());
run.setPodId(TEST_POD_ID);
run.setParams(params);
run.setOwner(USER);
run.setParentRunId(parentRunId);
run.setRunSids(runSids);
run.setServiceUrl(TEST_SERVICE_URL);
RunInstance instance = new RunInstance();
instance.setSpot(isSpot);
instance.setNodeId("1");
run.setInstance(instance);
run.setEntitiesIds(Collections.singletonList(entitiesId));
run.setConfigurationId(configurationId);
pipelineRunDao.createPipelineRun(run);
return run;
}
use of com.epam.pipeline.entity.pipeline.RunInstance in project cloud-pipeline by epam.
the class ObjectCreatorUtils method createPipelineRun.
public static PipelineRun createPipelineRun(Long runId, Long pipelineId, Long parentRunId) {
PipelineRun run = new PipelineRun();
run.setId(runId);
run.setPipelineId(pipelineId);
run.setVersion(TEST_REVISION_1);
run.setStartDate(new Date());
run.setEndDate(new Date());
run.setStatus(TaskStatus.RUNNING);
run.setCommitStatus(CommitStatus.NOT_COMMITTED);
run.setLastChangeCommitTime(new Date());
run.setPodId(TEST_POD_ID);
run.setOwner(TEST_NAME);
run.setParentRunId(parentRunId);
run.setServiceUrl(TEST_SERVICE_URL);
RunInstance instance = new RunInstance();
instance.setSpot(true);
instance.setNodeId("1");
run.setInstance(instance);
return run;
}
use of com.epam.pipeline.entity.pipeline.RunInstance in project cloud-pipeline by epam.
the class AutoscaleManager method processPod.
private void processPod(Pod pod, KubernetesClient client, Set<String> scheduledRuns, List<CompletableFuture<Void>> tasks, Set<String> allPods, Set<String> nodes, Set<String> reassignedNodes) {
LOGGER.debug("Found an unscheduled pod: {}.", pod.getMetadata().getName());
Map<String, String> labels = pod.getMetadata().getLabels();
String runId = labels.get(KubernetesConstants.RUN_ID_LABEL);
long longId = Long.parseLong(runId);
if (nodeUpTaskInProgress.contains(longId)) {
LOGGER.debug("Nodeup task for ID {} is already in progress.", runId);
return;
}
// Check whether node with required RunID is available
if (nodes.contains(runId)) {
LOGGER.debug("Node with required ID {} already exists.", runId);
return;
}
// check max nodeup retry count
// TODO: should we lock here?
int retryCount = nodeUpAttempts.getOrDefault(longId, 0);
int nodeUpRetryCount = preferenceManager.getPreference(SystemPreferences.CLUSTER_NODEUP_RETRY_COUNT);
if (retryCount >= nodeUpRetryCount) {
LOGGER.debug("Exceeded max nodeup attempts ({}) for run ID {}. Setting run status 'FAILURE'.", retryCount, runId);
pipelineRunManager.updatePipelineStatusIfNotFinal(longId, TaskStatus.FAILURE, new Date());
removeNodeUpTask(longId);
return;
}
try {
RunInstance requiredInstance = getNewRunInstance(runId);
// check whether aws instance already exists
RunInstance awsInstance = clusterManager.describeInstance(runId, requiredInstance);
if (awsInstance != null && awsInstance.getNodeId() != null) {
LOGGER.debug("Found {} instance for run ID {}.", awsInstance.getNodeId(), runId);
createNodeForRun(tasks, runId, requiredInstance);
return;
}
List<String> freeNodes = nodes.stream().filter(nodeId -> !allPods.contains(nodeId) && !reassignedNodes.contains(nodeId) && isNodeAvailable(client, nodeId)).collect(Collectors.toList());
LOGGER.debug("Found {} free nodes.", freeNodes.size());
// Try to reassign one of idle nodes
for (String previousId : freeNodes) {
LOGGER.debug("Found free node ID {}.", previousId);
RunInstance previousInstance = getPreviousRunInstance(previousId);
if (clusterManager.requirementsMatch(requiredInstance, previousInstance)) {
LOGGER.debug("Reassigning node ID {} to run {}.", previousId, runId);
boolean successfullyReassigned = clusterManager.reassignNode(previousId, runId);
if (successfullyReassigned) {
scheduledRuns.add(runId);
pipelineRunManager.updateRunInstance(longId, previousInstance);
reassignedNodes.add(previousId);
return;
}
}
}
// Check max cluster capacity
int currentClusterSize = getCurrentClusterSize(client);
NodeList nodeList = getAvailableNodes(client);
Integer maxClusterSize = preferenceManager.getPreference(SystemPreferences.CLUSTER_MAX_SIZE);
if (currentClusterSize > maxClusterSize) {
LOGGER.debug("Exceeded maximum cluster size {} - current size {}.", maxClusterSize, currentClusterSize);
return;
}
if (currentClusterSize == maxClusterSize && preferenceManager.getPreference(SystemPreferences.CLUSTER_KILL_NOT_MATCHING_NODES)) {
LOGGER.debug("Current cluster size {} has reached limit {}. Checking free nodes.", currentClusterSize, maxClusterSize);
List<String> nonMatchingFreeNodes = freeNodes.stream().filter(id -> !reassignedNodes.contains(id)).collect(Collectors.toList());
if (!CollectionUtils.isEmpty(nonMatchingFreeNodes)) {
String nodeId = nonMatchingFreeNodes.get(0);
// to remove node from free
reassignedNodes.add(nodeId);
LOGGER.debug("Scaling down unused node {}.", nodeId);
clusterManager.scaleDown(nodeId);
} else {
LOGGER.debug("Exceeded maximum cluster size {}.", nodeList.getItems().size() + nodeUpTaskInProgress.size());
LOGGER.debug("Leaving pending run {}.", runId);
return;
}
}
int nodeUpTasksSize = nodeUpTaskInProgress.size();
int maxNodeUpThreads = preferenceManager.getPreference(SystemPreferences.CLUSTER_NODEUP_MAX_THREADS);
if (nodeUpTasksSize >= maxNodeUpThreads) {
LOGGER.debug("Exceeded maximum node up tasks queue size {}.", nodeUpTasksSize);
return;
}
scheduledRuns.add(runId);
createNodeForRun(tasks, runId, requiredInstance);
} catch (GitClientException | CmdExecutionException | IllegalArgumentException e) {
LOGGER.error("Failed to create node for run {}.", runId);
LOGGER.error("Failed to get pipeline configuration: " + e.getMessage(), e);
}
}
use of com.epam.pipeline.entity.pipeline.RunInstance in project cloud-pipeline by epam.
the class ClusterManagerImpl method configurationToInstance.
@Override
public RunInstance configurationToInstance(PipelineConfiguration configuration) {
RunInstance instance = new RunInstance();
if (configuration.getInstanceType() == null) {
instance.setNodeType(preferenceManager.getPreference(SystemPreferences.CLUSTER_INSTANCE_TYPE));
} else {
instance.setNodeType(configuration.getInstanceType());
}
if (configuration.getInstanceDisk() == null) {
instance.setNodeDisk(preferenceManager.getPreference(SystemPreferences.CLUSTER_INSTANCE_HDD));
} else {
instance.setNodeDisk(Integer.parseInt(configuration.getInstanceDisk()));
}
instance.setEffectiveNodeDisk(instance.getNodeDisk());
if (configuration.getInstanceImage() == null) {
instance.setNodeImage(preferenceManager.getPreference(SystemPreferences.CLUSTER_INSTANCE_IMAGE));
} else {
instance.setNodeImage(configuration.getInstanceImage());
}
instance.setAwsRegionId(Optional.ofNullable(configuration.getAwsRegionId()).map(regionId -> awsRegionManager.load(regionId)).orElse(awsRegionManager.loadDefaultRegion()).getAwsRegionName());
return instance;
}
Aggregations