Search in sources :

Example 6 with RProgressListener

use of uk.ac.babraham.SeqMonk.R.RProgressListener in project SeqMonk by s-andrews.

the class EdgeRFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // We need to make a temporary directory, save the data into it, write out the R script
    // and then run it an collect the list of results, then clean up.
    // Make up the list of DataStores in each replicate set
    DataStore[] fromStores = replicateSets[0].dataStores();
    DataStore[] toStores = replicateSets[1].dataStores();
    File tempDir;
    try {
        progressUpdated("Creating temp directory", 0, 1);
        tempDir = TempDirectory.createTempDirectory();
        System.err.println("Temp dir is " + tempDir.getAbsolutePath());
        progressUpdated("Writing R script", 0, 1);
        // Get the template script
        Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_template.r"));
        // Substitute in the variables we need to change
        template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
        // Say which p value column we're filtering on
        if (multiTest) {
            template.setValue("CORRECTED", "FDR");
        } else {
            template.setValue("CORRECTED", "PValue");
        }
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < fromStores.length; i++) {
            if (i > 0)
                sb.append(",");
            sb.append("1");
        }
        for (int i = 0; i < toStores.length; i++) {
            sb.append(",");
            sb.append("2");
        }
        template.setValue("CONDITIONS", sb.toString());
        template.setValue("PVALUE", "" + cutoff);
        // Write the script file
        File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
        PrintWriter pr = new PrintWriter(scriptFile);
        pr.print(template.toString());
        pr.close();
        // Write the count data
        File countFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
        pr = new PrintWriter(countFile);
        sb = new StringBuffer();
        sb.append("probe");
        for (int i = 0; i < fromStores.length; i++) {
            sb.append("\t");
            sb.append("from");
            sb.append(i);
        }
        for (int i = 0; i < toStores.length; i++) {
            sb.append("\t");
            sb.append("to");
            sb.append(i);
        }
        pr.println(sb.toString());
        progressUpdated("Writing count data", 0, 1);
        Probe[] probes = startingList.getAllProbes();
        float value;
        for (int p = 0; p < probes.length; p++) {
            if (p % 1000 == 0) {
                progressUpdated("Writing count data", p, probes.length);
            }
            sb = new StringBuffer();
            sb.append(p);
            for (int i = 0; i < fromStores.length; i++) {
                sb.append("\t");
                value = fromStores[i].getValueForProbe(probes[p]);
                if (value != (int) value) {
                    progressExceptionReceived(new IllegalArgumentException("Inputs to the EdgeR filter MUST be raw, incorrected counts, not things like " + value));
                    pr.close();
                    return;
                }
                sb.append(value);
            }
            for (int i = 0; i < toStores.length; i++) {
                sb.append("\t");
                value = toStores[i].getValueForProbe(probes[p]);
                if (value != (int) value) {
                    progressExceptionReceived(new IllegalArgumentException("Inputs to the EdgeR filter MUST be raw, incorrected counts, not things like " + value));
                    pr.close();
                    return;
                }
                sb.append(value);
            }
            pr.println(sb.toString());
        }
        pr.close();
        progressUpdated("Running R Script", 0, 1);
        RScriptRunner runner = new RScriptRunner(tempDir);
        RProgressListener listener = new RProgressListener(runner);
        runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
        runner.runScript();
        while (true) {
            if (listener.cancelled()) {
                progressCancelled();
                return;
            }
            if (listener.exceptionReceived()) {
                progressExceptionReceived(new SeqMonkException("R Script failed"));
                return;
            }
            if (listener.complete())
                break;
            Thread.sleep(500);
        }
        // We can now parse the results and put the hits into a new probe list
        ProbeList newList;
        newList = new ProbeList(startingList, "", "", "FDR");
        File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
        BufferedReader br = new BufferedReader(new FileReader(hitsFile));
        String line = br.readLine();
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            int probeIndex = Integer.parseInt(sections[0]);
            float pValue = Float.parseFloat(sections[sections.length - 1]);
            newList.addProbe(probes[probeIndex], pValue);
        }
        br.close();
        runner.cleanUp();
        filterFinished(newList);
    } catch (Exception ioe) {
        progressExceptionReceived(ioe);
        return;
    }
// filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) RProgressListener(uk.ac.babraham.SeqMonk.R.RProgressListener) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Template(uk.ac.babraham.SeqMonk.Utilities.Templates.Template) ProgressRecordDialog(uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File) RScriptRunner(uk.ac.babraham.SeqMonk.R.RScriptRunner) PrintWriter(java.io.PrintWriter)

Example 7 with RProgressListener

use of uk.ac.babraham.SeqMonk.R.RProgressListener in project SeqMonk by s-andrews.

the class EdgeRForRevFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // We need to make a temporary directory, save the data into it, write out the R script
    // and then run it an collect the list of results, then clean up.
    // Make up the list of DataStores in each replicate set
    DataStore[] fromStores = replicateSets[0].dataStores();
    DataStore[] toStores = replicateSets[1].dataStores();
    File tempDir;
    try {
        Probe[] probes = startingList.getAllProbes();
        if (resample) {
            // We need to check that the data stores are quantitated
            for (int i = 0; i < fromStores.length; i++) {
                if (!fromStores[i].isQuantitated()) {
                    progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " wasn't quantitated"));
                    return;
                }
                for (int p = 0; p < probes.length; p++) {
                    float value = fromStores[i].getValueForProbe(probes[p]);
                    if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
                        progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
                        return;
                    }
                }
            }
            for (int i = 0; i < toStores.length; i++) {
                if (!toStores[i].isQuantitated()) {
                    progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " wasn't quantitated"));
                    return;
                }
                for (int p = 0; p < probes.length; p++) {
                    float value = toStores[i].getValueForProbe(probes[p]);
                    if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
                        progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
                        return;
                    }
                }
            }
        }
        progressUpdated("Creating temp directory", 0, 1);
        tempDir = TempDirectory.createTempDirectory();
        System.err.println("Temp dir is " + tempDir.getAbsolutePath());
        progressUpdated("Writing R script", 0, 1);
        // Get the template script
        Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_for_rev_template.r"));
        // Substitute in the variables we need to change
        template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
        template.setValue("DIFFERENCE", "" + absDiffCutoff);
        template.setValue("PVALUE", "" + pValueCutoff);
        if (multiTest) {
            template.setValue("MULTITEST", "TRUE");
            template.setValue("CORRECTED", "FDR");
        } else {
            template.setValue("MULTITEST", "FALSE");
            template.setValue("CORRECTED", "PValue");
        }
        // For the conditions we just repeat "from" and "to" the number of times they occur in the
        // samples (twice for each sample since we have both meth and unmeth)
        StringBuffer conditions = new StringBuffer();
        for (int i = 0; i < fromStores.length; i++) {
            if (i > 0)
                conditions.append(",");
            conditions.append("\"from\",\"from\"");
        }
        for (int i = 0; i < toStores.length; i++) {
            conditions.append(",\"to\",\"to\"");
        }
        template.setValue("CONDITIONS", conditions.toString());
        // Write the script file
        File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
        PrintWriter pr = new PrintWriter(scriptFile);
        pr.print(template.toString());
        pr.close();
        // Write the count data
        File outFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
        pr = new PrintWriter(outFile);
        pr.print("id");
        for (int i = 0; i < fromStores.length; i++) {
            pr.print("\tfrom_" + i + "_meth\tfrom_" + i + "_unmeth");
        }
        for (int i = 0; i < toStores.length; i++) {
            pr.print("\tto_" + i + "_meth\tto_" + i + "_unmeth");
        }
        pr.print("\n");
        PROBE: for (int p = 0; p < probes.length; p++) {
            if (p % 1000 == 0) {
                progressUpdated("Writing data for chr" + probes[p].chromosome().name(), p, probes.length);
            }
            int[] fromMethCounts = new int[fromStores.length];
            int[] fromUnmethCounts = new int[fromStores.length];
            int[] toMethCounts = new int[toStores.length];
            int[] toUnmethCounts = new int[toStores.length];
            for (int i = 0; i < fromStores.length; i++) {
                long[] reads = fromStores[i].getReadsForProbe(probes[p]);
                int totalCount = 0;
                int methCount = 0;
                if (resample) {
                    float value = fromStores[i].getValueForProbe(probes[p]);
                    if (Float.isNaN(value)) {
                        continue PROBE;
                    }
                    totalCount = reads.length;
                    methCount = Math.round((totalCount * value) / 100f);
                } else {
                    for (int r = 0; r < reads.length; r++) {
                        totalCount++;
                        if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                            ++methCount;
                        }
                    }
                }
                fromMethCounts[i] = methCount;
                fromUnmethCounts[i] = totalCount - methCount;
            }
            for (int i = 0; i < toStores.length; i++) {
                long[] reads = toStores[i].getReadsForProbe(probes[p]);
                int totalCount = 0;
                int methCount = 0;
                if (resample) {
                    float value = toStores[i].getValueForProbe(probes[p]);
                    if (Float.isNaN(value)) {
                        continue PROBE;
                    }
                    totalCount = reads.length;
                    methCount = Math.round((totalCount * value) / 100f);
                } else {
                    for (int r = 0; r < reads.length; r++) {
                        totalCount++;
                        if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                            ++methCount;
                        }
                    }
                }
                toMethCounts[i] = methCount;
                toUnmethCounts[i] = totalCount - methCount;
            }
            // Check to see we meet the requirements for the min amount of information
            // and the min diff.
            int totalFromMeth = 0;
            int totalFrom = 0;
            int totalToMeth = 0;
            int totalTo = 0;
            int validFrom = 0;
            for (int i = 0; i < fromStores.length; i++) {
                totalFromMeth += fromMethCounts[i];
                totalFrom += fromMethCounts[i];
                totalFrom += fromUnmethCounts[i];
                if (fromMethCounts[i] > 0 || fromUnmethCounts[i] > 0) {
                    ++validFrom;
                }
            }
            int validTo = 0;
            for (int i = 0; i < toStores.length; i++) {
                totalToMeth += toMethCounts[i];
                totalTo += toMethCounts[i];
                totalTo += toUnmethCounts[i];
                if (toMethCounts[i] > 0 || toUnmethCounts[i] > 0) {
                    ++validTo;
                }
            }
            // EdgeR only requires 2 valid observations
            if (validFrom < 2 || validTo < 2) {
                // We don't have enough data to measure this one
                continue;
            }
            if (Math.abs((totalFromMeth * 100f / totalFrom) - (totalToMeth * 100f / totalTo)) < absDiffCutoff) {
                continue;
            }
            float[] fromPercentages = new float[validFrom];
            float[] toPercentages = new float[validTo];
            int lastFromIndex = 0;
            int lastToIndex = 0;
            for (int i = 0; i < fromMethCounts.length; i++) {
                if (fromMethCounts[i] + fromUnmethCounts[i] == 0)
                    continue;
                fromPercentages[lastFromIndex] = fromMethCounts[i] * 100f / (fromMethCounts[i] + fromUnmethCounts[i]);
                ++lastFromIndex;
            }
            for (int i = 0; i < toMethCounts.length; i++) {
                if (toMethCounts[i] + toUnmethCounts[i] == 0)
                    continue;
                toPercentages[lastToIndex] = toMethCounts[i] * 100f / (toMethCounts[i] + toUnmethCounts[i]);
                ++lastToIndex;
            }
            if (Math.abs(SimpleStats.mean(fromPercentages) - SimpleStats.mean(toPercentages)) < absDiffCutoff) {
                continue;
            }
            // If we get here then we're OK to use this probe so we print out its data.  We put all of the
            // data for one probe onto a single line.  The first value is the index of the probe.  The
            // rest are pairs of meth/unmeth values for the from samples then the to samples.
            pr.print(p);
            for (int i = 0; i < fromMethCounts.length; i++) {
                pr.print("\t" + fromMethCounts[i] + "\t" + fromUnmethCounts[i]);
            }
            for (int i = 0; i < toMethCounts.length; i++) {
                pr.print("\t" + toMethCounts[i] + "\t" + toUnmethCounts[i]);
            }
            pr.print("\n");
        }
        pr.close();
        progressUpdated("Running R Script", 0, 1);
        RScriptRunner runner = new RScriptRunner(tempDir);
        RProgressListener listener = new RProgressListener(runner);
        runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
        runner.runScript();
        while (true) {
            if (listener.cancelled()) {
                progressCancelled();
                pr.close();
                return;
            }
            if (listener.exceptionReceived()) {
                progressExceptionReceived(new SeqMonkException("R Script failed"));
                pr.close();
                return;
            }
            if (listener.complete())
                break;
            Thread.sleep(500);
        }
        // We can now parse the results and put the hits into a new probe list
        ProbeList newList;
        if (multiTest) {
            newList = new ProbeList(startingList, "", "", "FDR");
        } else {
            newList = new ProbeList(startingList, "", "", "p-value");
        }
        File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
        BufferedReader br = new BufferedReader(new FileReader(hitsFile));
        String line = br.readLine();
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            String[] indexSections = sections[0].split("\\.");
            int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
            float pValue = Float.parseFloat(sections[sections.length - 1]);
            newList.addProbe(probes[probeIndex], pValue);
        }
        br.close();
        runner.cleanUp();
        filterFinished(newList);
    } catch (Exception ioe) {
        progressExceptionReceived(ioe);
        return;
    }
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) RProgressListener(uk.ac.babraham.SeqMonk.R.RProgressListener) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Template(uk.ac.babraham.SeqMonk.Utilities.Templates.Template) ProgressRecordDialog(uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File) RScriptRunner(uk.ac.babraham.SeqMonk.R.RScriptRunner) PrintWriter(java.io.PrintWriter)

Example 8 with RProgressListener

use of uk.ac.babraham.SeqMonk.R.RProgressListener in project SeqMonk by s-andrews.

the class LogisticRegressionSplicingFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // We need to make a temporary directory, save the data into it, write out the R script
    // and then run it an collect the list of results, then clean up.
    // Make up the list of DataStores in each replicate set
    DataStore[] fromStores = replicateSets[0].dataStores();
    DataStore[] toStores = replicateSets[1].dataStores();
    File tempDir;
    try {
        Probe[] probes = startingList.getAllProbes();
        probes = filterProbesByCount(probes, fromStores, toStores);
        progressUpdated("Pairing Probes", 0, 1);
        // Make pairs of probes to test
        ProbePair[] pairs = makeProbePairs(probes, fromStores, toStores);
        System.err.println("Found " + pairs.length + " pairs to test");
        progressUpdated("Creating temp directory", 0, 1);
        tempDir = TempDirectory.createTempDirectory();
        System.err.println("Temp dir is " + tempDir.getAbsolutePath());
        progressUpdated("Writing R script", 0, 1);
        // Get the template script
        Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
        // Substitute in the variables we need to change
        template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
        template.setValue("PVALUE", "" + pValueCutoff);
        template.setValue("CORRECTCOUNT", "" + pairs.length);
        if (multiTest) {
            template.setValue("MULTITEST", "TRUE");
        } else {
            template.setValue("MULTITEST", "FALSE");
        }
        // Write the script file
        File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
        PrintWriter pr = new PrintWriter(scriptFile);
        pr.print(template.toString());
        pr.close();
        // Write the count data
        // Sort these so we can get probes from the same chromosome together
        Arrays.sort(probes);
        pr = null;
        String lastChr = "";
        for (int p = 0; p < pairs.length; p++) {
            if (!pairs[p].probe1.chromosome().name().equals(lastChr)) {
                if (pr != null)
                    pr.close();
                File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + pairs[p].probe1.chromosome().name() + ".txt");
                pr = new PrintWriter(outFile);
                lastChr = pairs[p].probe1.chromosome().name();
                pr.println("id\tgroup\treplicate\tstate\tcount");
            }
            if (p % 1000 == 0) {
                progressUpdated("Writing data for chr" + lastChr, p, probes.length);
            }
            int[] fromProbe1Counts = new int[fromStores.length];
            int[] fromProbe2Counts = new int[fromStores.length];
            int[] toProbe1Counts = new int[toStores.length];
            int[] toProbe2Counts = new int[toStores.length];
            for (int i = 0; i < fromStores.length; i++) {
                // TODO: For the moment we'll expect they've quantitated this themselves
                fromProbe1Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe1);
                fromProbe2Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe2);
            }
            for (int i = 0; i < toStores.length; i++) {
                // TODO: For the moment we'll expect they've quantitated this themselves
                toProbe1Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe1);
                toProbe2Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe2);
            }
            for (int i = 0; i < fromProbe1Counts.length; i++) {
                pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromProbe1Counts[i]);
                pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromProbe2Counts[i]);
            }
            for (int i = 0; i < toProbe1Counts.length; i++) {
                pr.println(p + "\tto\tto" + i + "\tmeth\t" + toProbe1Counts[i]);
                pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toProbe2Counts[i]);
            }
        }
        if (pr != null)
            pr.close();
        progressUpdated("Running R Script", 0, 1);
        RScriptRunner runner = new RScriptRunner(tempDir);
        RProgressListener listener = new RProgressListener(runner);
        runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
        runner.runScript();
        while (true) {
            if (listener.cancelled()) {
                progressCancelled();
                pr.close();
                return;
            }
            if (listener.exceptionReceived()) {
                progressExceptionReceived(new SeqMonkException("R Script failed"));
                pr.close();
                return;
            }
            if (listener.complete())
                break;
            Thread.sleep(500);
        }
        // We can now parse the results and put the hits into a new probe list
        ProbeList newList;
        if (multiTest) {
            newList = new ProbeList(startingList, "", "", "FDR");
        } else {
            newList = new ProbeList(startingList, "", "", "p-value");
        }
        File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
        BufferedReader br = new BufferedReader(new FileReader(hitsFile));
        String line = br.readLine();
        // In case the same probe is found multiple times we'll cache the p-values
        // we see and report the best one.
        HashMap<Probe, Float> cachedHits = new HashMap<Probe, Float>();
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            String[] indexSections = sections[0].split("\\.");
            int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
            float pValue = Float.parseFloat(sections[sections.length - 1]);
            if (!cachedHits.containsKey(pairs[probeIndex].probe1)) {
                cachedHits.put(pairs[probeIndex].probe1, pValue);
            }
            if (!cachedHits.containsKey(pairs[probeIndex].probe2)) {
                cachedHits.put(pairs[probeIndex].probe2, pValue);
            }
            // See if the pvalue we've got is better than the one we're storing
            if (pValue < cachedHits.get(pairs[probeIndex].probe1)) {
                cachedHits.put(pairs[probeIndex].probe1, pValue);
            }
            if (pValue < cachedHits.get(pairs[probeIndex].probe2)) {
                cachedHits.put(pairs[probeIndex].probe2, pValue);
            }
        }
        br.close();
        for (Probe probeHit : cachedHits.keySet()) {
            newList.addProbe(probeHit, cachedHits.get(probeHit));
        }
        runner.cleanUp();
        filterFinished(newList);
    } catch (Exception ioe) {
        progressExceptionReceived(ioe);
        return;
    }
}
Also used : HashMap(java.util.HashMap) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) RProgressListener(uk.ac.babraham.SeqMonk.R.RProgressListener) Template(uk.ac.babraham.SeqMonk.Utilities.Templates.Template) ProgressRecordDialog(uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) PrintWriter(java.io.PrintWriter) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) BufferedReader(java.io.BufferedReader) File(java.io.File) RScriptRunner(uk.ac.babraham.SeqMonk.R.RScriptRunner)

Example 9 with RProgressListener

use of uk.ac.babraham.SeqMonk.R.RProgressListener in project SeqMonk by s-andrews.

the class PCADataCalculator method runPCA.

private void runPCA() {
    File tempDir;
    try {
        pd.progressUpdated("Creating temp directory", 0, 1);
        tempDir = TempDirectory.createTempDirectory();
        // System.err.println("Temp dir is "+tempDir.getAbsolutePath());
        pd.progressUpdated("Writing R script", 0, 1);
        // Get the template script
        Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Displays/PCAPlot/pca_template.r"));
        // Substitute in the variables we need to change
        template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
        // Write the script file
        File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
        PrintWriter pr = new PrintWriter(scriptFile);
        pr.print(template.toString());
        pr.close();
        // Write the count data
        File countFile = new File(tempDir.getAbsoluteFile() + "/data.txt");
        pr = new PrintWriter(countFile);
        StringBuffer sb;
        pd.progressUpdated("Writing count data", 0, 1);
        for (int p = 0; p < usedProbes.length; p++) {
            sb = new StringBuffer();
            for (int d = 0; d < stores.length; d++) {
                if (d > 0)
                    sb.append("\t");
                sb.append(stores[d].getValueForProbe(usedProbes[p]));
            }
            pr.println(sb.toString());
        }
        pr.close();
        pd.progressUpdated("Running R Script", 0, 1);
        RScriptRunner runner = new RScriptRunner(tempDir);
        RProgressListener listener = new RProgressListener(runner);
        runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
        runner.runScript();
        while (true) {
            if (listener.cancelled()) {
                pd.progressCancelled();
                return;
            }
            if (listener.exceptionReceived()) {
                pd.progressExceptionReceived(listener.exception());
                return;
            }
            if (listener.complete())
                break;
            Thread.sleep(500);
        }
        // We can now parse and store the results
        File varianceFile = new File(tempDir.getAbsolutePath() + "/variance_data.txt");
        BufferedReader br = new BufferedReader(new FileReader(varianceFile));
        String line = br.readLine();
        FloatVector varianceVector = new FloatVector();
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            varianceVector.add(Float.parseFloat(sections[1]));
        }
        br.close();
        variances = varianceVector.toArray();
        // We want to convert the variance values into percentages
        // since that's what everyone wants for PCA.
        float varianceTotal = 0;
        for (int v = 0; v < variances.length; v++) {
            varianceTotal += variances[v];
        }
        // Now convert to percentages
        for (int v = 0; v < variances.length; v++) {
            variances[v] = (variances[v] / varianceTotal) * 100;
        }
        // We don't want to keep more than 10PCs
        if (variances.length > 10) {
            float[] shortVariances = new float[10];
            for (int i = 0; i < shortVariances.length; i++) {
                shortVariances[i] = variances[i];
            }
            variances = shortVariances;
        }
        File pcaFile = new File(tempDir.getAbsolutePath() + "/pca_data.txt");
        br = new BufferedReader(new FileReader(pcaFile));
        line = br.readLine();
        pcaResults = new float[stores.length][variances.length];
        int storeIndex = 0;
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            for (int i = 0; i < variances.length; i++) {
                pcaResults[storeIndex][i] = Float.parseFloat(sections[i + 1]);
            }
            ++storeIndex;
        }
        br.close();
        File weightsFile = new File(tempDir.getAbsolutePath() + "/pca_weights.txt");
        br = new BufferedReader(new FileReader(weightsFile));
        line = br.readLine();
        pcaRotations = new float[usedProbes.length][variances.length];
        int probeIndex = 0;
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            for (int i = 0; i < variances.length; i++) {
                // We multiply by 1000 to get the values on a vaguely
                // sane scale
                pcaRotations[probeIndex][i] = Float.parseFloat(sections[i + 1]) * 1000;
            }
            ++probeIndex;
        }
        br.close();
        runner.cleanUp();
    } catch (Exception e) {
        pd.progressExceptionReceived(e);
        return;
    }
    pd.progressComplete("pca_analysis", this);
    // TODO: Open the graph of variances and PCA results
    new PCAScatterPlotDialog(this);
    new PCAVarianceDialog(this);
}
Also used : FloatVector(uk.ac.babraham.SeqMonk.Utilities.FloatVector) RProgressListener(uk.ac.babraham.SeqMonk.R.RProgressListener) IOException(java.io.IOException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Template(uk.ac.babraham.SeqMonk.Utilities.Templates.Template) ProgressRecordDialog(uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) File(java.io.File) RScriptRunner(uk.ac.babraham.SeqMonk.R.RScriptRunner) PrintWriter(java.io.PrintWriter)

Aggregations

File (java.io.File)9 PrintWriter (java.io.PrintWriter)9 ProgressRecordDialog (uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog)9 RProgressListener (uk.ac.babraham.SeqMonk.R.RProgressListener)9 RScriptRunner (uk.ac.babraham.SeqMonk.R.RScriptRunner)9 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)9 Template (uk.ac.babraham.SeqMonk.Utilities.Templates.Template)9 BufferedReader (java.io.BufferedReader)8 FileReader (java.io.FileReader)8 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)6 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)6 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)6 IOException (java.io.IOException)3 FileNotFoundException (java.io.FileNotFoundException)1 HashMap (java.util.HashMap)1 JFileChooser (javax.swing.JFileChooser)1 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)1 ProgressDialog (uk.ac.babraham.SeqMonk.Dialogs.ProgressDialog.ProgressDialog)1 PCAScatterPlotDialog (uk.ac.babraham.SeqMonk.Displays.PCAPlot.PCAScatterPlotDialog)1 GenomeUpgrader (uk.ac.babraham.SeqMonk.Network.GenomeUpgrader)1