Search in sources :

Example 1 with CommandServiceSshNative

use of cbit.vcell.message.server.cmd.CommandServiceSshNative in project vcell by virtualcell.

the class SlurmProxyTest method testSLURM.

@Test
public void testSLURM() throws IOException, ExecutableException {
    System.setProperty("log4j2.trace", "true");
    System.setProperty(PropertyLoader.vcellServerIDProperty, "Test2");
    System.setProperty(PropertyLoader.htcLogDirExternal, "/Volumes/vcell/htclogs");
    VCMongoMessage.enabled = false;
    String[] partitions = new String[] { "vcell", "vcell2" };
    System.setProperty(PropertyLoader.slurm_partition, partitions[0]);
    CommandServiceSshNative cmd = null;
    try {
        cmd = new CommandServiceSshNative(new String[] { "vcell-service.cam.uchc.edu" }, "vcell", new File("/Users/schaff/.ssh/schaff_rsa"));
        SlurmProxy slurmProxy = new SlurmProxy(cmd, "vcell");
        Map<HtcJobInfo, HtcJobStatus> runningJobs = slurmProxy.getRunningJobs();
        for (HtcJobInfo jobInfo : runningJobs.keySet()) {
            HtcJobStatus jobStatus = runningJobs.get(jobInfo);
            System.out.println("job " + jobInfo.getHtcJobID() + " " + jobInfo.getJobName() + ", status=" + jobStatus.toString());
        }
        for (String partition : partitions) {
            System.setProperty(PropertyLoader.slurm_partition, partition);
            PartitionStatistics partitionStatistics = slurmProxy.getPartitionStatistics();
            System.out.println("partition statistics for partition " + partition + ": " + partitionStatistics);
            System.out.println("number of cpus allocated = " + partitionStatistics.numCpusAllocated);
            System.out.println("load = " + partitionStatistics.load);
            System.out.println("number of cpus total = " + partitionStatistics.numCpusTotal);
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cmd != null) {
            cmd.close();
        }
    }
}
Also used : PartitionStatistics(cbit.vcell.message.server.htc.HtcProxy.PartitionStatistics) HtcJobStatus(cbit.vcell.message.server.htc.HtcJobStatus) HtcJobInfo(cbit.vcell.message.server.htc.HtcProxy.HtcJobInfo) File(java.io.File) CommandServiceSshNative(cbit.vcell.message.server.cmd.CommandServiceSshNative) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ExecutableException(org.vcell.util.exe.ExecutableException) Test(org.junit.Test)

Example 2 with CommandServiceSshNative

use of cbit.vcell.message.server.cmd.CommandServiceSshNative in project vcell by virtualcell.

the class SlurmProxy method creatCommandService.

public static HtcProxy creatCommandService(java.lang.String[] sshHostUserKeyfile) throws IOException {
    CommandService commandService = null;
    if (sshHostUserKeyfile != null && sshHostUserKeyfile.length == 3) {
        ArrayList<String> htcDispatchHostNames = new ArrayList<String>();
        StringTokenizer st = new StringTokenizer(sshHostUserKeyfile[0], ", ");
        while (st.hasMoreElements()) {
            htcDispatchHostNames.add(st.nextToken());
        }
        // String sshHost = sshHostUserKeyfile[0];
        String sshUser = sshHostUserKeyfile[1];
        File sshKeyFile = new File(sshHostUserKeyfile[2]);
        try {
            commandService = new CommandServiceSshNative(htcDispatchHostNames.toArray(new String[0]), sshUser, sshKeyFile);
            commandService.command(new String[] { "/usr/bin/env bash -c ls | head -5" });
        // lg.trace("SSH Connection test passed with installed keyfile, running ls as user "+sshUser+" on "+sshHost);
        } catch (Exception e) {
            e.printStackTrace();
            try {
                commandService = new CommandServiceSshNative(htcDispatchHostNames.toArray(new String[0]), sshUser, sshKeyFile, new File("/root"));
                CommandOutput commandOutput = commandService.command(new String[] { "/usr/bin/env bash -c ls | head -5" });
            // lg.trace("SSH Connection test passed after installing keyfile, running ls as user "+sshUser+" on "+sshHost);
            } catch (Exception e2) {
                e.printStackTrace();
                throw new RuntimeException("failed to establish an ssh command connection to " + sshHostUserKeyfile[0] + " as user '" + sshUser + "' using key '" + sshKeyFile + "'", e);
            }
        }
        // can't make user directories, they are remote.
        AbstractSolver.bMakeUserDirs = false;
    } else {
        commandService = new CommandServiceLocal();
    }
    BatchSystemType batchSystemType = BatchSystemType.SLURM;
    HtcProxy htcProxy = null;
    switch(batchSystemType) {
        case SLURM:
            {
                htcProxy = new SlurmProxy(commandService, PropertyLoader.getRequiredProperty(PropertyLoader.htcUser));
                break;
            }
        default:
            {
                throw new RuntimeException("unrecognized batch scheduling option :" + batchSystemType);
            }
    }
    return htcProxy;
}
Also used : HtcProxy(cbit.vcell.message.server.htc.HtcProxy) CommandService(cbit.vcell.message.server.cmd.CommandService) CommandOutput(cbit.vcell.message.server.cmd.CommandService.CommandOutput) ArrayList(java.util.ArrayList) ExecutableException(org.vcell.util.exe.ExecutableException) HtcException(cbit.vcell.message.server.htc.HtcException) IOException(java.io.IOException) HtcJobNotFoundException(cbit.vcell.message.server.htc.HtcJobNotFoundException) StringTokenizer(java.util.StringTokenizer) CommandServiceLocal(cbit.vcell.message.server.cmd.CommandServiceLocal) BatchSystemType(cbit.vcell.server.HtcJobID.BatchSystemType) File(java.io.File) CommandServiceSshNative(cbit.vcell.message.server.cmd.CommandServiceSshNative)

Example 3 with CommandServiceSshNative

use of cbit.vcell.message.server.cmd.CommandServiceSshNative in project vcell by virtualcell.

the class SlurmProxyTest method testSingularitySupport.

@Test
public void testSingularitySupport() throws IOException, ExecutableException {
    CommandServiceSshNative cmd = null;
    try {
        Random r = new Random();
        System.setProperty("log4j2.trace", "true");
        System.setProperty(PropertyLoader.vcellServerIDProperty, "Test2");
        System.setProperty(PropertyLoader.htcLogDirExternal, "/Volumes/vcell/htclogs");
        VCMongoMessage.enabled = false;
        String[] partitions = new String[] { "vcell", "vcell2" };
        System.setProperty(PropertyLoader.slurm_partition, partitions[1]);
        cmd = new CommandServiceSshNative(new String[] { "vcell-service.cam.uchc.edu" }, "vcell", new File("/Users/schaff/.ssh/schaff_rsa"));
        SlurmProxy slurmProxy = new SlurmProxy(cmd, "vcell");
        String jobName = "V_TEST2_999999999_0_" + r.nextInt(10000);
        System.out.println("job name is " + jobName);
        File sub_file_localpath = new File("/Volumes/vcell/htclogs/" + jobName + ".slurm.sub");
        File sub_file_remotepath = new File("/share/apps/vcell3/htclogs/" + jobName + ".slurm.sub");
        StringBuffer subfileContent = new StringBuffer();
        subfileContent.append("#!/usr/bin/bash\n");
        String partition = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_partition);
        subfileContent.append("#SBATCH --partition=" + partition + "\n");
        subfileContent.append("#SBATCH -J " + jobName + "\n");
        subfileContent.append("#SBATCH -o /share/apps/vcell3/htclogs/" + jobName + ".slurm.log\n");
        subfileContent.append("#SBATCH -e /share/apps/vcell3/htclogs/" + jobName + ".slurm.log\n");
        subfileContent.append("#SBATCH --mem=1000M\n");
        subfileContent.append("#SBATCH --no-kill\n");
        subfileContent.append("#SBATCH --no-requeue\n");
        subfileContent.append("env\n");
        subfileContent.append("echo `hostname`\n");
        subfileContent.append("python -c \"some_str = ' ' * 51200000\"\n");
        subfileContent.append("retcode=$?\n");
        subfileContent.append("echo \"return code was $retcode\"\n");
        subfileContent.append("if [[ $retcode == 137 ]]; then\n");
        subfileContent.append("   echo \"job was killed via kill -9 (probably out of memory)\"\n");
        subfileContent.append("fi\n");
        subfileContent.append("sleep 20\n");
        subfileContent.append("exit $retcode\n");
        // subfileContent.append("export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles\n");
        // subfileContent.append("source /usr/share/Modules/init/bash\n");
        // subfileContent.append("module load singularity\n");
        // subfileContent.append("if command -v singularity >/dev/null 2>&1; then\n");
        // subfileContent.append("   echo 'singularity command exists'\n");
        // subfileContent.append("   exit 0\n");
        // subfileContent.append("else\n");
        // subfileContent.append("   echo 'singularity command not found'\n");
        // subfileContent.append("   exit 1\n");
        // subfileContent.append("fi\n");
        FileUtils.writeStringToFile(sub_file_localpath, subfileContent.toString());
        HtcJobID htcJobId = slurmProxy.submitJobFile(sub_file_remotepath);
        System.out.println("running job " + htcJobId);
        HtcJobInfo htcJobInfo = new HtcJobInfo(htcJobId, jobName);
        ArrayList<HtcJobInfo> jobInfos = new ArrayList<HtcJobInfo>();
        jobInfos.add(htcJobInfo);
        Map<HtcJobInfo, HtcJobStatus> jobStatusMap = slurmProxy.getJobStatus(jobInfos);
        int attempts = 0;
        while (attempts < 80 && (jobStatusMap.get(htcJobInfo) == null || !jobStatusMap.get(htcJobInfo).isDone())) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }
            jobStatusMap = slurmProxy.getJobStatus(jobInfos);
            System.out.println(jobStatusMap.get(htcJobInfo));
            if (attempts == 5) {
                slurmProxy.killJobs(jobName);
            }
            attempts++;
        }
        System.out.println(jobStatusMap.get(htcJobInfo));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cmd != null) {
            cmd.close();
        }
    }
}
Also used : HtcJobStatus(cbit.vcell.message.server.htc.HtcJobStatus) ArrayList(java.util.ArrayList) HtcJobInfo(cbit.vcell.message.server.htc.HtcProxy.HtcJobInfo) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ExecutableException(org.vcell.util.exe.ExecutableException) Random(java.util.Random) HtcJobID(cbit.vcell.server.HtcJobID) File(java.io.File) CommandServiceSshNative(cbit.vcell.message.server.cmd.CommandServiceSshNative) Test(org.junit.Test)

Aggregations

CommandServiceSshNative (cbit.vcell.message.server.cmd.CommandServiceSshNative)3 File (java.io.File)3 IOException (java.io.IOException)3 ExecutableException (org.vcell.util.exe.ExecutableException)3 HtcJobStatus (cbit.vcell.message.server.htc.HtcJobStatus)2 HtcJobInfo (cbit.vcell.message.server.htc.HtcProxy.HtcJobInfo)2 MalformedURLException (java.net.MalformedURLException)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 CommandService (cbit.vcell.message.server.cmd.CommandService)1 CommandOutput (cbit.vcell.message.server.cmd.CommandService.CommandOutput)1 CommandServiceLocal (cbit.vcell.message.server.cmd.CommandServiceLocal)1 HtcException (cbit.vcell.message.server.htc.HtcException)1 HtcJobNotFoundException (cbit.vcell.message.server.htc.HtcJobNotFoundException)1 HtcProxy (cbit.vcell.message.server.htc.HtcProxy)1 PartitionStatistics (cbit.vcell.message.server.htc.HtcProxy.PartitionStatistics)1 HtcJobID (cbit.vcell.server.HtcJobID)1 BatchSystemType (cbit.vcell.server.HtcJobID.BatchSystemType)1 Random (java.util.Random)1 StringTokenizer (java.util.StringTokenizer)1