Search in sources :

Example 1 with QueueStatusModel

use of org.apache.airavata.model.status.QueueStatusModel in project airavata by apache.

the class RegistryServerHandler method getLatestQueueStatuses.

/**
 * * Get queue statuses of all compute resources
 * *
 */
@Override
public List<QueueStatusModel> getLatestQueueStatuses() throws RegistryServiceException, TException {
    try {
        experimentCatalog = RegistryFactory.getExperimentCatalog(ServerSettings.getDefaultUserGateway());
        List<Object> temp = experimentCatalog.get(ExperimentCatalogModelType.QUEUE_STATUS, null, null, -1, 0, null, null);
        List<QueueStatusModel> queueStatusModels = new ArrayList<>();
        temp.stream().forEach(t -> {
            queueStatusModels.add((QueueStatusModel) t);
        });
        return queueStatusModels;
    } catch (RegistryException | ApplicationSettingsException e) {
        logger.error("Error while reading queue status models....", e);
        RegistryServiceException exception = new RegistryServiceException();
        exception.setMessage("Error while reading queue status models.... : " + e.getMessage());
        throw exception;
    }
}
Also used : ApplicationSettingsException(org.apache.airavata.common.exception.ApplicationSettingsException) QueueStatusModel(org.apache.airavata.model.status.QueueStatusModel) RegistryServiceException(org.apache.airavata.registry.api.exception.RegistryServiceException)

Example 2 with QueueStatusModel

use of org.apache.airavata.model.status.QueueStatusModel in project airavata by apache.

the class ClusterStatusMonitorJob method execute.

@Override
public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException {
    try {
        String superTenantGatewayId = ServerSettings.getSuperTenantGatewayId();
        RegistryService.Client registryClient = getRegistryClient();
        List<ComputeResourceProfile> computeResourceProfiles = new ArrayList<>();
        List<ComputeResourcePreference> computeResourcePreferences = null;
        try {
            computeResourcePreferences = registryClient.getAllGatewayComputeResourcePreferences(superTenantGatewayId);
        } catch (Exception ex) {
            logger.warn("Could not find super tenant compute resources preferences for cluster status monitoring...");
        }
        if (computeResourcePreferences != null && computeResourcePreferences.size() > 0) {
            computeResourcePreferences.stream().forEach(p -> {
                try {
                    String computeResourceId = p.getComputeResourceId();
                    String credentialStoreToken = p.getResourceSpecificCredentialStoreToken();
                    String loginUserName = p.getLoginUserName();
                    String hostName = null;
                    if (credentialStoreToken == null || credentialStoreToken.equals("")) {
                        credentialStoreToken = registryClient.getGatewayResourceProfile(superTenantGatewayId).getCredentialStoreToken();
                    }
                    int port = -1;
                    ArrayList queueNames = new ArrayList<>();
                    ComputeResourceDescription computeResourceDescription = registryClient.getComputeResource(computeResourceId);
                    hostName = computeResourceDescription.getHostName();
                    // FIXME This should come from compute resource description
                    port = 22;
                    computeResourceDescription.getBatchQueues().stream().forEach(q -> {
                        queueNames.add(q.getQueueName());
                    });
                    List<JobSubmissionInterface> jobSubmissionInterfaces = computeResourceDescription.getJobSubmissionInterfaces();
                    if (jobSubmissionInterfaces != null && jobSubmissionInterfaces.size() > 0) {
                        if (jobSubmissionInterfaces.get(0).getJobSubmissionProtocol().equals(JobSubmissionProtocol.SSH)) {
                            String resourceManagerType = registryClient.getSSHJobSubmission(jobSubmissionInterfaces.get(0).getJobSubmissionInterfaceId()).getResourceJobManager().getResourceJobManagerType().name();
                            ComputeResourceProfile computeResourceProfile = new ComputeResourceProfile(hostName, loginUserName, port, credentialStoreToken, queueNames, resourceManagerType);
                            computeResourceProfiles.add(computeResourceProfile);
                        }
                    }
                } catch (TException e) {
                    logger.error(e.getMessage());
                }
            });
        }
        ArrayList<QueueStatusModel> queueStatuses = new ArrayList<>();
        for (ComputeResourceProfile computeResourceProfile : computeResourceProfiles) {
            String userName = computeResourceProfile.getUserName();
            String hostName = computeResourceProfile.getHostName();
            int port = computeResourceProfile.getPort();
            try {
                JSch jsch = new JSch();
                CredentialStoreService.Client credentialClient = getCredentialStoreClient();
                SSHCredential sshCredential = credentialClient.getSSHCredential(computeResourceProfile.getCredentialStoreToken(), superTenantGatewayId);
                jsch.addIdentity(hostName, sshCredential.getPrivateKey().getBytes(), sshCredential.getPublicKey().getBytes(), sshCredential.getPassphrase().getBytes());
                Session session = jsch.getSession(userName, hostName, port);
                java.util.Properties config = new java.util.Properties();
                config.put("StrictHostKeyChecking", "no");
                session.setConfig(config);
                logger.debug("Connected to " + hostName);
                session.connect();
                for (String queue : computeResourceProfile.getQueueNames()) {
                    String command = "";
                    if (computeResourceProfile.getResourceManagerType().equals("SLURM"))
                        command = "sinfo -s -p " + queue + " -o \"%a %F\" | tail -1";
                    else if (computeResourceProfile.getResourceManagerType().equals("PBS"))
                        command = "qstat -Q " + queue + "| tail -1";
                    if (command.equals("")) {
                        logger.warn("No matching resource manager type found for " + computeResourceProfile.getResourceManagerType());
                        continue;
                    }
                    Channel channel = session.openChannel("exec");
                    ((ChannelExec) channel).setCommand(command);
                    channel.setInputStream(null);
                    ((ChannelExec) channel).setErrStream(System.err);
                    InputStream in = channel.getInputStream();
                    channel.connect();
                    byte[] tmp = new byte[1024];
                    String result = "";
                    while (true) {
                        while (in.available() > 0) {
                            int i = in.read(tmp, 0, 1024);
                            if (i < 0)
                                break;
                            result += new String(tmp, 0, i);
                        }
                        if (channel.isClosed()) {
                            if (in.available() > 0)
                                continue;
                            logger.debug(hostName + " " + queue + " " + "exit-status: " + channel.getExitStatus());
                            break;
                        }
                        try {
                            Thread.sleep(1000);
                        } catch (Exception ee) {
                        }
                    }
                    channel.disconnect();
                    if (result != null && result.length() > 0) {
                        QueueStatusModel queueStatus = null;
                        if (computeResourceProfile.getResourceManagerType().equals("SLURM")) {
                            String[] sparts = result.split(" ");
                            boolean isUp = sparts[0].equalsIgnoreCase("up");
                            String knts = sparts[1];
                            sparts = knts.split("/");
                            int running = Integer.parseInt(sparts[0].trim());
                            int queued = Integer.parseInt(sparts[1].trim());
                            queueStatus = new QueueStatusModel(hostName, queue, isUp, running, queued, System.currentTimeMillis());
                        } else if (computeResourceProfile.getResourceManagerType().equals("PBS")) {
                            result = result.replaceAll("\\s+", " ");
                            String[] sparts = result.split(" ");
                            boolean isUp = sparts[3].equalsIgnoreCase("yes");
                            int running = Integer.parseInt(sparts[6].trim());
                            int queued = Integer.parseInt(sparts[5].trim());
                            queueStatus = new QueueStatusModel(hostName, queue, isUp, running, queued, System.currentTimeMillis());
                        }
                        if (queueStatus != null)
                            queueStatuses.add(queueStatus);
                    }
                }
                session.disconnect();
            } catch (Exception ex) {
                logger.error("Failed to get cluster status from " + computeResourceProfile.getHostName());
                logger.error(ex.getMessage(), ex);
            }
        }
        if (queueStatuses != null && queueStatuses.size() > 0) {
            registryClient.registerQueueStatuses(queueStatuses);
        }
    } catch (Exception e) {
        throw new JobExecutionException(e);
    }
}
Also used : TException(org.apache.thrift.TException) ComputeResourcePreference(org.apache.airavata.model.appcatalog.gatewayprofile.ComputeResourcePreference) JobSubmissionInterface(org.apache.airavata.model.appcatalog.computeresource.JobSubmissionInterface) ArrayList(java.util.ArrayList) JSch(com.jcraft.jsch.JSch) JobExecutionException(org.quartz.JobExecutionException) QueueStatusModel(org.apache.airavata.model.status.QueueStatusModel) RegistryService(org.apache.airavata.registry.api.RegistryService) SSHCredential(org.apache.airavata.model.credential.store.SSHCredential) ComputeResourceDescription(org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription) InputStream(java.io.InputStream) Channel(com.jcraft.jsch.Channel) TTransportException(org.apache.thrift.transport.TTransportException) TException(org.apache.thrift.TException) JobExecutionException(org.quartz.JobExecutionException) ApplicationSettingsException(org.apache.airavata.common.exception.ApplicationSettingsException) ChannelExec(com.jcraft.jsch.ChannelExec) CredentialStoreService(org.apache.airavata.credential.store.cpi.CredentialStoreService) Session(com.jcraft.jsch.Session)

Aggregations

ApplicationSettingsException (org.apache.airavata.common.exception.ApplicationSettingsException)2 QueueStatusModel (org.apache.airavata.model.status.QueueStatusModel)2 Channel (com.jcraft.jsch.Channel)1 ChannelExec (com.jcraft.jsch.ChannelExec)1 JSch (com.jcraft.jsch.JSch)1 Session (com.jcraft.jsch.Session)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 CredentialStoreService (org.apache.airavata.credential.store.cpi.CredentialStoreService)1 ComputeResourceDescription (org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription)1 JobSubmissionInterface (org.apache.airavata.model.appcatalog.computeresource.JobSubmissionInterface)1 ComputeResourcePreference (org.apache.airavata.model.appcatalog.gatewayprofile.ComputeResourcePreference)1 SSHCredential (org.apache.airavata.model.credential.store.SSHCredential)1 RegistryService (org.apache.airavata.registry.api.RegistryService)1 RegistryServiceException (org.apache.airavata.registry.api.exception.RegistryServiceException)1 TException (org.apache.thrift.TException)1 TTransportException (org.apache.thrift.transport.TTransportException)1 JobExecutionException (org.quartz.JobExecutionException)1