use of org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription in project airavata by apache.
the class BatchQueueValidator method validateUserConfiguration.
private List<ValidatorResult> validateUserConfiguration(ExperimentModel experiment, ProcessModel processModel) throws AppCatalogException {
List<ValidatorResult> validatorResultList = new ArrayList<ValidatorResult>();
try {
UserConfigurationDataModel userConfigurationData = experiment.getUserConfigurationData();
ComputationalResourceSchedulingModel computationalResourceScheduling = userConfigurationData.getComputationalResourceScheduling();
if (userConfigurationData.isAiravataAutoSchedule()) {
logger.info("User enabled Auto-Schedule. Hence we don't do validation..");
ValidatorResult validatorResult = new ValidatorResult();
validatorResult.setResult(true);
validatorResultList.add(validatorResult);
} else {
ComputeResourceDescription computeResource;
if (processModel == null) {
computeResource = appCatalog.getComputeResource().getComputeResource(experiment.getUserConfigurationData().getComputationalResourceScheduling().getResourceHostId());
} else {
computeResource = appCatalog.getComputeResource().getComputeResource(processModel.getProcessResourceSchedule().getResourceHostId());
}
List<BatchQueue> batchQueues = computeResource.getBatchQueues();
if (batchQueues != null && !batchQueues.isEmpty()) {
if (computationalResourceScheduling != null) {
String experimentQueueName = computationalResourceScheduling.getQueueName().trim();
int experimentWallTimeLimit = computationalResourceScheduling.getWallTimeLimit();
int experimentNodeCount = computationalResourceScheduling.getNodeCount();
int experimentCPUCount = computationalResourceScheduling.getTotalCPUCount();
ValidatorResult queueNameResult = new ValidatorResult();
// Set the validation to false. Once all the queue's are looped, if nothing matches, then this gets passed.
queueNameResult.setResult(false);
queueNameResult.setErrorDetails("The specified queue " + experimentQueueName + " does not exist. If you believe this is an error, contact the administrator to verify App-Catalog Configurations");
for (BatchQueue queue : batchQueues) {
String resourceQueueName = queue.getQueueName();
int maxQueueRunTime = queue.getMaxRunTime();
int maxNodeCount = queue.getMaxNodes();
int maxcpuCount = queue.getMaxProcessors();
if (resourceQueueName != null && resourceQueueName.equals(experimentQueueName)) {
queueNameResult.setResult(true);
queueNameResult.setErrorDetails("");
// Validate if the specified wall time is within allowable limit
ValidatorResult wallTimeResult = new ValidatorResult();
if (experimentWallTimeLimit == 0) {
wallTimeResult.setResult(false);
wallTimeResult.setErrorDetails("Walltime cannot be zero for queue " + resourceQueueName);
} else {
if (maxQueueRunTime == 0) {
wallTimeResult.setResult(true);
wallTimeResult.setErrorDetails("Maximum wall time is not configured for the queue," + "Validation is being skipped");
logger.info("Maximum wall time is not configured for the queue" + "Validation is being skipped");
} else {
if (maxQueueRunTime < experimentWallTimeLimit) {
wallTimeResult.setResult(false);
wallTimeResult.setErrorDetails("Job Execution walltime " + experimentWallTimeLimit + "exceeds the allowable walltime" + maxQueueRunTime + "for queue " + resourceQueueName);
} else {
wallTimeResult.setResult(true);
wallTimeResult.setErrorDetails("");
}
}
}
// validate max node count
ValidatorResult nodeCountResult = new ValidatorResult();
if (maxNodeCount == 0) {
nodeCountResult.setResult(true);
nodeCountResult.setErrorDetails("Max node count is not configured for the queue," + "Validation is being skipped");
logger.info("Max node count is not configured for the queue" + "Validation is being skipped");
} else {
if (experimentNodeCount == 0) {
nodeCountResult.setResult(false);
nodeCountResult.setErrorDetails("Job Execution node count cannot be zero for queue " + resourceQueueName);
} else {
if (maxNodeCount < experimentNodeCount) {
nodeCountResult.setResult(false);
nodeCountResult.setErrorDetails("Job Execution node count " + experimentNodeCount + "exceeds the allowable node count" + maxNodeCount + "for queue " + resourceQueueName);
} else {
nodeCountResult.setResult(true);
nodeCountResult.setErrorDetails("");
}
}
}
// validate cpu count
ValidatorResult cpuCountResult = new ValidatorResult();
if (maxcpuCount == 0) {
cpuCountResult.setResult(true);
cpuCountResult.setErrorDetails("Max cpu count is not configured for the queue," + "Validation is being skipped");
logger.info("Max cpu count is not configured for the queue" + "Validation is being skipped");
} else {
if (experimentCPUCount == 0) {
cpuCountResult.setResult(false);
cpuCountResult.setErrorDetails("Job Execution cpu count cannot be zero for queue " + resourceQueueName);
} else {
if (maxcpuCount < experimentCPUCount) {
cpuCountResult.setResult(false);
cpuCountResult.setErrorDetails("Job Execution cpu count " + experimentCPUCount + "exceeds the allowable cpu count" + maxcpuCount + "for queue " + resourceQueueName);
} else {
cpuCountResult.setResult(true);
cpuCountResult.setErrorDetails("");
}
}
}
validatorResultList.add(wallTimeResult);
validatorResultList.add(nodeCountResult);
validatorResultList.add(cpuCountResult);
}
}
validatorResultList.add(queueNameResult);
}
} else {
// for some compute resources, you dnt need to specify queue names
ValidatorResult result = new ValidatorResult();
logger.info("There are not queues defined under the compute resource. Airavata assumes this experiment " + "does not need a queue name...");
result.setResult(true);
validatorResultList.add(result);
}
}
} catch (AppCatalogException e) {
logger.error("Error while getting information from App catalog", e);
throw new AppCatalogException("Error while getting information from App catalog", e);
}
return validatorResultList;
}
use of org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription in project airavata by apache.
the class OrchestratorServerHandler method getAppDeploymentForModule.
private ApplicationDeploymentDescription getAppDeploymentForModule(ProcessModel processModel, String selectedModuleId) throws AppCatalogException, ClassNotFoundException, ApplicationSettingsException, InstantiationException, IllegalAccessException {
Map<String, String> moduleIdFilter = new HashMap<String, String>();
moduleIdFilter.put(AppCatAbstractResource.ApplicationDeploymentConstants.APP_MODULE_ID, selectedModuleId);
if (processModel.getProcessResourceSchedule() != null && processModel.getProcessResourceSchedule().getResourceHostId() != null) {
moduleIdFilter.put(AppCatAbstractResource.ApplicationDeploymentConstants.COMPUTE_HOST_ID, processModel.getProcessResourceSchedule().getResourceHostId());
}
List<ApplicationDeploymentDescription> applicationDeployements = appCatalog.getApplicationDeployment().getApplicationDeployements(moduleIdFilter);
Map<ComputeResourceDescription, ApplicationDeploymentDescription> deploymentMap = new HashMap<ComputeResourceDescription, ApplicationDeploymentDescription>();
ComputeResource computeResource = appCatalog.getComputeResource();
for (ApplicationDeploymentDescription deploymentDescription : applicationDeployements) {
deploymentMap.put(computeResource.getComputeResource(deploymentDescription.getComputeHostId()), deploymentDescription);
}
List<ComputeResourceDescription> computeHostList = Arrays.asList(deploymentMap.keySet().toArray(new ComputeResourceDescription[] {}));
Class<? extends HostScheduler> aClass = Class.forName(ServerSettings.getHostScheduler()).asSubclass(HostScheduler.class);
HostScheduler hostScheduler = aClass.newInstance();
ComputeResourceDescription ComputeResourceDescription = hostScheduler.schedule(computeHostList);
return deploymentMap.get(ComputeResourceDescription);
}
use of org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription in project airavata by apache.
the class ClusterStatusMonitorJob method execute.
@Override
public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException {
try {
String superTenantGatewayId = ServerSettings.getSuperTenantGatewayId();
RegistryService.Client registryClient = getRegistryClient();
List<ComputeResourceProfile> computeResourceProfiles = new ArrayList<>();
List<ComputeResourcePreference> computeResourcePreferences = null;
try {
computeResourcePreferences = registryClient.getAllGatewayComputeResourcePreferences(superTenantGatewayId);
} catch (Exception ex) {
logger.warn("Could not find super tenant compute resources preferences for cluster status monitoring...");
}
if (computeResourcePreferences != null && computeResourcePreferences.size() > 0) {
computeResourcePreferences.stream().forEach(p -> {
try {
String computeResourceId = p.getComputeResourceId();
String credentialStoreToken = p.getResourceSpecificCredentialStoreToken();
String loginUserName = p.getLoginUserName();
String hostName = null;
if (credentialStoreToken == null || credentialStoreToken.equals("")) {
credentialStoreToken = registryClient.getGatewayResourceProfile(superTenantGatewayId).getCredentialStoreToken();
}
int port = -1;
ArrayList queueNames = new ArrayList<>();
ComputeResourceDescription computeResourceDescription = registryClient.getComputeResource(computeResourceId);
hostName = computeResourceDescription.getHostName();
// FIXME This should come from compute resource description
port = 22;
computeResourceDescription.getBatchQueues().stream().forEach(q -> {
queueNames.add(q.getQueueName());
});
List<JobSubmissionInterface> jobSubmissionInterfaces = computeResourceDescription.getJobSubmissionInterfaces();
if (jobSubmissionInterfaces != null && jobSubmissionInterfaces.size() > 0) {
if (jobSubmissionInterfaces.get(0).getJobSubmissionProtocol().equals(JobSubmissionProtocol.SSH)) {
String resourceManagerType = registryClient.getSSHJobSubmission(jobSubmissionInterfaces.get(0).getJobSubmissionInterfaceId()).getResourceJobManager().getResourceJobManagerType().name();
ComputeResourceProfile computeResourceProfile = new ComputeResourceProfile(hostName, loginUserName, port, credentialStoreToken, queueNames, resourceManagerType);
computeResourceProfiles.add(computeResourceProfile);
}
}
} catch (TException e) {
logger.error(e.getMessage());
}
});
}
ArrayList<QueueStatusModel> queueStatuses = new ArrayList<>();
for (ComputeResourceProfile computeResourceProfile : computeResourceProfiles) {
String userName = computeResourceProfile.getUserName();
String hostName = computeResourceProfile.getHostName();
int port = computeResourceProfile.getPort();
try {
JSch jsch = new JSch();
CredentialStoreService.Client credentialClient = getCredentialStoreClient();
SSHCredential sshCredential = credentialClient.getSSHCredential(computeResourceProfile.getCredentialStoreToken(), superTenantGatewayId);
jsch.addIdentity(hostName, sshCredential.getPrivateKey().getBytes(), sshCredential.getPublicKey().getBytes(), sshCredential.getPassphrase().getBytes());
Session session = jsch.getSession(userName, hostName, port);
java.util.Properties config = new java.util.Properties();
config.put("StrictHostKeyChecking", "no");
session.setConfig(config);
logger.debug("Connected to " + hostName);
session.connect();
for (String queue : computeResourceProfile.getQueueNames()) {
String command = "";
if (computeResourceProfile.getResourceManagerType().equals("SLURM"))
command = "sinfo -s -p " + queue + " -o \"%a %F\" | tail -1";
else if (computeResourceProfile.getResourceManagerType().equals("PBS"))
command = "qstat -Q " + queue + "| tail -1";
if (command.equals("")) {
logger.warn("No matching resource manager type found for " + computeResourceProfile.getResourceManagerType());
continue;
}
Channel channel = session.openChannel("exec");
((ChannelExec) channel).setCommand(command);
channel.setInputStream(null);
((ChannelExec) channel).setErrStream(System.err);
InputStream in = channel.getInputStream();
channel.connect();
byte[] tmp = new byte[1024];
String result = "";
while (true) {
while (in.available() > 0) {
int i = in.read(tmp, 0, 1024);
if (i < 0)
break;
result += new String(tmp, 0, i);
}
if (channel.isClosed()) {
if (in.available() > 0)
continue;
logger.debug(hostName + " " + queue + " " + "exit-status: " + channel.getExitStatus());
break;
}
try {
Thread.sleep(1000);
} catch (Exception ee) {
}
}
channel.disconnect();
if (result != null && result.length() > 0) {
QueueStatusModel queueStatus = null;
if (computeResourceProfile.getResourceManagerType().equals("SLURM")) {
String[] sparts = result.split(" ");
boolean isUp = sparts[0].equalsIgnoreCase("up");
String knts = sparts[1];
sparts = knts.split("/");
int running = Integer.parseInt(sparts[0].trim());
int queued = Integer.parseInt(sparts[1].trim());
queueStatus = new QueueStatusModel(hostName, queue, isUp, running, queued, System.currentTimeMillis());
} else if (computeResourceProfile.getResourceManagerType().equals("PBS")) {
result = result.replaceAll("\\s+", " ");
String[] sparts = result.split(" ");
boolean isUp = sparts[3].equalsIgnoreCase("yes");
int running = Integer.parseInt(sparts[6].trim());
int queued = Integer.parseInt(sparts[5].trim());
queueStatus = new QueueStatusModel(hostName, queue, isUp, running, queued, System.currentTimeMillis());
}
if (queueStatus != null)
queueStatuses.add(queueStatus);
}
}
session.disconnect();
} catch (Exception ex) {
logger.error("Failed to get cluster status from " + computeResourceProfile.getHostName());
logger.error(ex.getMessage(), ex);
}
}
if (queueStatuses != null && queueStatuses.size() > 0) {
registryClient.registerQueueStatuses(queueStatuses);
}
} catch (Exception e) {
throw new JobExecutionException(e);
}
}
use of org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription in project airavata by apache.
the class AppDeploymentTest method testAppDeployment.
@Test
public void testAppDeployment() throws Exception {
ApplicationDeployment appDep = appcatalog.getApplicationDeployment();
ApplicationInterface appInt = appcatalog.getApplicationInterface();
ComputeResource computeRs = appcatalog.getComputeResource();
ComputeResourceDescription cm = new ComputeResourceDescription();
cm.setHostName("localhost");
cm.setResourceDescription("test compute host");
String hostId = computeRs.addComputeResource(cm);
ApplicationModule module = new ApplicationModule();
module.setAppModuleName("WRF");
module.setAppModuleVersion("1.0.0");
String wrfModuleId = appInt.addApplicationModule(module, ServerSettings.getDefaultUserGateway());
ApplicationDeploymentDescription description = new ApplicationDeploymentDescription();
description.setAppModuleId(wrfModuleId);
description.setComputeHostId(hostId);
description.setExecutablePath("/home/a/b/c");
description.setAppDeploymentDescription("test app deployment");
CommandObject cmd1 = new CommandObject();
cmd1.setCommand("cmd1");
cmd1.setCommandOrder(1);
CommandObject cmd2 = new CommandObject();
cmd2.setCommand("cmd1");
cmd2.setCommandOrder(1);
description.addToModuleLoadCmds(cmd1);
description.addToModuleLoadCmds(cmd2);
List<SetEnvPaths> libPrepandPaths = new ArrayList<SetEnvPaths>();
libPrepandPaths.add(createSetEnvPath("name1", "val1", 1));
libPrepandPaths.add(createSetEnvPath("name2", "val2", 2));
description.setLibPrependPaths(libPrepandPaths);
List<SetEnvPaths> libApendPaths = new ArrayList<SetEnvPaths>();
libApendPaths.add(createSetEnvPath("name3", "val3", 1));
libApendPaths.add(createSetEnvPath("name4", "val4", 2));
description.setLibAppendPaths(libApendPaths);
List<SetEnvPaths> appEvns = new ArrayList<SetEnvPaths>();
appEvns.add(createSetEnvPath("name5", "val5", 1));
appEvns.add(createSetEnvPath("name6", "val6", 2));
description.setSetEnvironment(appEvns);
String appDepId = appDep.addApplicationDeployment(description, ServerSettings.getDefaultUserGateway());
ApplicationDeploymentDescription app = null;
if (appDep.isAppDeploymentExists(appDepId)) {
app = appDep.getApplicationDeployement(appDepId);
System.out.println("*********** application deployment id ********* : " + app.getAppDeploymentId());
System.out.println("*********** application deployment desc ********* : " + app.getAppDeploymentDescription());
}
description.setAppDeploymentDescription("test app deployment2");
appDep.updateApplicationDeployment(appDepId, description);
if (appDep.isAppDeploymentExists(appDepId)) {
app = appDep.getApplicationDeployement(appDepId);
System.out.println("*********** application deployment desc ********* : " + app.getAppDeploymentDescription());
}
Map<String, String> moduleIdFilter = new HashMap<String, String>();
moduleIdFilter.put(AppCatAbstractResource.ApplicationDeploymentConstants.APP_MODULE_ID, wrfModuleId);
List<ApplicationDeploymentDescription> applicationDeployements = appDep.getApplicationDeployements(moduleIdFilter);
System.out.println("******** Size of App deployments for module *********** : " + applicationDeployements.size());
Map<String, String> hostFilter = new HashMap<String, String>();
hostFilter.put(AppCatAbstractResource.ApplicationDeploymentConstants.COMPUTE_HOST_ID, hostId);
List<ApplicationDeploymentDescription> applicationDeployementsForHost = appDep.getApplicationDeployements(hostFilter);
System.out.println("******** Size of App deployments for host *********** : " + applicationDeployementsForHost.size());
List<String> allApplicationDeployementIds = appDep.getAllApplicationDeployementIds();
System.out.println("******** Size of all App deployments ids *********** : " + allApplicationDeployementIds.size());
List<ApplicationDeploymentDescription> allApplicationDeployements = appDep.getAllApplicationDeployements(ServerSettings.getDefaultUserGateway());
System.out.println("******** Size of all App deployments *********** : " + allApplicationDeployements.size());
assertTrue("App interface saved successfully", app != null);
}
use of org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription in project airavata by apache.
the class GatewayProfileTest method gatewayProfileTest.
@Test
public void gatewayProfileTest() throws Exception {
GwyResourceProfile gatewayProfile = appcatalog.getGatewayProfile();
GatewayResourceProfile gf = new GatewayResourceProfile();
ComputeResource computeRs = appcatalog.getComputeResource();
ComputeResourceDescription cm1 = new ComputeResourceDescription();
cm1.setHostName("localhost");
cm1.setResourceDescription("test compute host");
String hostId1 = computeRs.addComputeResource(cm1);
ComputeResourceDescription cm2 = new ComputeResourceDescription();
cm2.setHostName("localhost");
cm2.setResourceDescription("test compute host");
String hostId2 = computeRs.addComputeResource(cm2);
ComputeResourcePreference preference1 = new ComputeResourcePreference();
preference1.setComputeResourceId(hostId1);
preference1.setOverridebyAiravata(true);
preference1.setPreferredJobSubmissionProtocol(JobSubmissionProtocol.SSH);
preference1.setPreferredDataMovementProtocol(DataMovementProtocol.SCP);
preference1.setPreferredBatchQueue("queue1");
preference1.setScratchLocation("/tmp");
preference1.setAllocationProjectNumber("project1");
ComputeResourcePreference preference2 = new ComputeResourcePreference();
preference2.setComputeResourceId(hostId2);
preference2.setOverridebyAiravata(true);
preference2.setPreferredJobSubmissionProtocol(JobSubmissionProtocol.LOCAL);
preference2.setPreferredDataMovementProtocol(DataMovementProtocol.GridFTP);
preference2.setPreferredBatchQueue("queue2");
preference2.setScratchLocation("/tmp");
preference2.setAllocationProjectNumber("project2");
List<ComputeResourcePreference> list = new ArrayList<ComputeResourcePreference>();
list.add(preference1);
list.add(preference2);
gf.setComputeResourcePreferences(list);
gf.setGatewayID("testGateway");
String gwId = gatewayProfile.addGatewayResourceProfile(gf);
GatewayResourceProfile retrievedProfile = null;
if (gatewayProfile.isGatewayResourceProfileExists(gwId)) {
retrievedProfile = gatewayProfile.getGatewayProfile(gwId);
System.out.println("************ gateway id ************** :" + retrievedProfile.getGatewayID());
}
List<ComputeResourcePreference> preferences = gatewayProfile.getAllComputeResourcePreferences(gwId);
System.out.println("compute preferences size : " + preferences.size());
if (preferences != null && !preferences.isEmpty()) {
for (ComputeResourcePreference cm : preferences) {
System.out.println("******** host id ********* : " + cm.getComputeResourceId());
System.out.println(cm.getPreferredBatchQueue());
System.out.println(cm.getPreferredDataMovementProtocol());
System.out.println(cm.getPreferredJobSubmissionProtocol());
}
}
assertTrue("App interface saved successfully", retrievedProfile != null);
}
Aggregations