use of org.apache.airavata.model.status.QueueStatusModel in project airavata by apache.
the class RegistryServerHandler method getLatestQueueStatuses.
/**
* * Get queue statuses of all compute resources
* *
*/
@Override
public List<QueueStatusModel> getLatestQueueStatuses() throws RegistryServiceException, TException {
try {
experimentCatalog = RegistryFactory.getExperimentCatalog(ServerSettings.getDefaultUserGateway());
List<Object> temp = experimentCatalog.get(ExperimentCatalogModelType.QUEUE_STATUS, null, null, -1, 0, null, null);
List<QueueStatusModel> queueStatusModels = new ArrayList<>();
temp.stream().forEach(t -> {
queueStatusModels.add((QueueStatusModel) t);
});
return queueStatusModels;
} catch (RegistryException | ApplicationSettingsException e) {
logger.error("Error while reading queue status models....", e);
RegistryServiceException exception = new RegistryServiceException();
exception.setMessage("Error while reading queue status models.... : " + e.getMessage());
throw exception;
}
}
use of org.apache.airavata.model.status.QueueStatusModel in project airavata by apache.
the class ClusterStatusMonitorJob method execute.
@Override
public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException {
try {
String superTenantGatewayId = ServerSettings.getSuperTenantGatewayId();
RegistryService.Client registryClient = getRegistryClient();
List<ComputeResourceProfile> computeResourceProfiles = new ArrayList<>();
List<ComputeResourcePreference> computeResourcePreferences = null;
try {
computeResourcePreferences = registryClient.getAllGatewayComputeResourcePreferences(superTenantGatewayId);
} catch (Exception ex) {
logger.warn("Could not find super tenant compute resources preferences for cluster status monitoring...");
}
if (computeResourcePreferences != null && computeResourcePreferences.size() > 0) {
computeResourcePreferences.stream().forEach(p -> {
try {
String computeResourceId = p.getComputeResourceId();
String credentialStoreToken = p.getResourceSpecificCredentialStoreToken();
String loginUserName = p.getLoginUserName();
String hostName = null;
if (credentialStoreToken == null || credentialStoreToken.equals("")) {
credentialStoreToken = registryClient.getGatewayResourceProfile(superTenantGatewayId).getCredentialStoreToken();
}
int port = -1;
ArrayList queueNames = new ArrayList<>();
ComputeResourceDescription computeResourceDescription = registryClient.getComputeResource(computeResourceId);
hostName = computeResourceDescription.getHostName();
// FIXME This should come from compute resource description
port = 22;
computeResourceDescription.getBatchQueues().stream().forEach(q -> {
queueNames.add(q.getQueueName());
});
List<JobSubmissionInterface> jobSubmissionInterfaces = computeResourceDescription.getJobSubmissionInterfaces();
if (jobSubmissionInterfaces != null && jobSubmissionInterfaces.size() > 0) {
if (jobSubmissionInterfaces.get(0).getJobSubmissionProtocol().equals(JobSubmissionProtocol.SSH)) {
String resourceManagerType = registryClient.getSSHJobSubmission(jobSubmissionInterfaces.get(0).getJobSubmissionInterfaceId()).getResourceJobManager().getResourceJobManagerType().name();
ComputeResourceProfile computeResourceProfile = new ComputeResourceProfile(hostName, loginUserName, port, credentialStoreToken, queueNames, resourceManagerType);
computeResourceProfiles.add(computeResourceProfile);
}
}
} catch (TException e) {
logger.error(e.getMessage());
}
});
}
ArrayList<QueueStatusModel> queueStatuses = new ArrayList<>();
for (ComputeResourceProfile computeResourceProfile : computeResourceProfiles) {
String userName = computeResourceProfile.getUserName();
String hostName = computeResourceProfile.getHostName();
int port = computeResourceProfile.getPort();
try {
JSch jsch = new JSch();
CredentialStoreService.Client credentialClient = getCredentialStoreClient();
SSHCredential sshCredential = credentialClient.getSSHCredential(computeResourceProfile.getCredentialStoreToken(), superTenantGatewayId);
jsch.addIdentity(hostName, sshCredential.getPrivateKey().getBytes(), sshCredential.getPublicKey().getBytes(), sshCredential.getPassphrase().getBytes());
Session session = jsch.getSession(userName, hostName, port);
java.util.Properties config = new java.util.Properties();
config.put("StrictHostKeyChecking", "no");
session.setConfig(config);
logger.debug("Connected to " + hostName);
session.connect();
for (String queue : computeResourceProfile.getQueueNames()) {
String command = "";
if (computeResourceProfile.getResourceManagerType().equals("SLURM"))
command = "sinfo -s -p " + queue + " -o \"%a %F\" | tail -1";
else if (computeResourceProfile.getResourceManagerType().equals("PBS"))
command = "qstat -Q " + queue + "| tail -1";
if (command.equals("")) {
logger.warn("No matching resource manager type found for " + computeResourceProfile.getResourceManagerType());
continue;
}
Channel channel = session.openChannel("exec");
((ChannelExec) channel).setCommand(command);
channel.setInputStream(null);
((ChannelExec) channel).setErrStream(System.err);
InputStream in = channel.getInputStream();
channel.connect();
byte[] tmp = new byte[1024];
String result = "";
while (true) {
while (in.available() > 0) {
int i = in.read(tmp, 0, 1024);
if (i < 0)
break;
result += new String(tmp, 0, i);
}
if (channel.isClosed()) {
if (in.available() > 0)
continue;
logger.debug(hostName + " " + queue + " " + "exit-status: " + channel.getExitStatus());
break;
}
try {
Thread.sleep(1000);
} catch (Exception ee) {
}
}
channel.disconnect();
if (result != null && result.length() > 0) {
QueueStatusModel queueStatus = null;
if (computeResourceProfile.getResourceManagerType().equals("SLURM")) {
String[] sparts = result.split(" ");
boolean isUp = sparts[0].equalsIgnoreCase("up");
String knts = sparts[1];
sparts = knts.split("/");
int running = Integer.parseInt(sparts[0].trim());
int queued = Integer.parseInt(sparts[1].trim());
queueStatus = new QueueStatusModel(hostName, queue, isUp, running, queued, System.currentTimeMillis());
} else if (computeResourceProfile.getResourceManagerType().equals("PBS")) {
result = result.replaceAll("\\s+", " ");
String[] sparts = result.split(" ");
boolean isUp = sparts[3].equalsIgnoreCase("yes");
int running = Integer.parseInt(sparts[6].trim());
int queued = Integer.parseInt(sparts[5].trim());
queueStatus = new QueueStatusModel(hostName, queue, isUp, running, queued, System.currentTimeMillis());
}
if (queueStatus != null)
queueStatuses.add(queueStatus);
}
}
session.disconnect();
} catch (Exception ex) {
logger.error("Failed to get cluster status from " + computeResourceProfile.getHostName());
logger.error(ex.getMessage(), ex);
}
}
if (queueStatuses != null && queueStatuses.size() > 0) {
registryClient.registerQueueStatuses(queueStatuses);
}
} catch (Exception e) {
throw new JobExecutionException(e);
}
}
Aggregations