Search in sources :

Example 56 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class IntensityReplicateFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
    Probe[] probes = startingList.getAllProbes();
    // We need to work out how many probes are going to be put into
    // each sub-distribution we calculate.  The rule is going to be that
    // we use 1% of the total, or 100 probes whichever is the higher
    probesPerSet = probes.length / 100;
    if (probesPerSet < 100)
        probesPerSet = 100;
    if (probesPerSet > probes.length)
        probesPerSet = probes.length;
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
    // We'll build up a set of p-values as we go along
    float[] lowestPValues = new float[probes.length];
    for (int p = 0; p < lowestPValues.length; p++) {
        lowestPValues[p] = 1;
    }
    // These arrays are going to hold the real SDs for the replicate
    // sets we're going to analyse.
    double[] realSDFrom = new double[probes.length];
    double[] realSDTo = new double[probes.length];
    // These arrays are going to hold the averaged SDs for the replicate
    // sets we're going to analyse.
    double[] averageSDFrom = new double[probes.length];
    double[] averageSDTo = new double[probes.length];
    // This is going to be the temporary array we populate with the set of
    // differences we are going to analyse.
    double[] localSDset = new double[probesPerSet];
    // First work out the set of comparisons we're going to make
    Vector<SingleComparison> comparisons = new Vector<IntensityReplicateFilter.SingleComparison>();
    for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
        for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
            if (fromStores[fromIndex] == toStores[toIndex])
                continue;
            // If we can find the fromStore in the toStores we've already done and the
            // toStore anywhere in the fromStores then we can skip this.
            boolean canSkip = false;
            for (int i = 0; i < fromIndex; i++) {
                if (fromStores[i] == toStores[toIndex]) {
                    for (int j = 0; j < toStores.length; j++) {
                        if (toStores[j] == fromStores[fromIndex]) {
                            canSkip = true;
                            break;
                        }
                    }
                    break;
                }
            }
            if (canSkip)
                continue;
            comparisons.add(new SingleComparison(fromIndex, toIndex));
        }
    }
    for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
        int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
        int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
        // We need to generate a set of probe indices ordered by their average intensity
        Integer[] fromIndices = new Integer[probes.length];
        Integer[] toIndices = new Integer[probes.length];
        for (int i = 0; i < probes.length; i++) {
            fromIndices[i] = i;
            toIndices[i] = i;
        }
        // This isn't ideal.  We're sorting the probes by average intensity which puts together
        // probes which should probably have different standard deviations.  It would be better
        // to sort on the intensities in each data store separately, but we get such a problem
        // from very low values contaminating the set by giving artificially low average SDs that
        // I don't think we can get away with this.
        Comparator<Integer> fromComp = new AveragePairedIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
        Comparator<Integer> toComp = new AveragePairedIntensityComparator(toStores[toIndex], fromStores[fromIndex], probes);
        // Comparator<Integer> fromComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
        // Comparator<Integer> toComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
        Arrays.sort(fromIndices, fromComp);
        Arrays.sort(toIndices, toComp);
        // We also need to get the raw SDs for the two replicate sets
        double[] fromValues = new double[fromStores[fromIndex].dataStores().length];
        double[] toValues = new double[toStores[toIndex].dataStores().length];
        try {
            for (int p = 0; p < probes.length; p++) {
                for (int f = 0; f < fromValues.length; f++) {
                    fromValues[f] = fromStores[fromIndex].dataStores()[f].getValueForProbe(probes[p]);
                }
                for (int t = 0; t < toValues.length; t++) {
                    toValues[t] = toStores[toIndex].dataStores()[t].getValueForProbe(probes[p]);
                }
                realSDFrom[p] = SimpleStats.stdev(fromValues);
                realSDTo[p] = SimpleStats.stdev(toValues);
            }
        } catch (SeqMonkException sme) {
            progressExceptionReceived(sme);
        }
        progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
        IndexTTestValue[] currentPValues = new IndexTTestValue[probes.length];
        for (int i = 0; i < probes.length; i++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            if (i % 1000 == 0) {
                int progress = (i * 100) / probes.length;
                progress += 100 * comparisonIndex;
                progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
            }
            // We need to make up the set of SDs to represent this probe
            int startingIndex = i - (probesPerSet / 2);
            if (startingIndex < 0)
                startingIndex = 0;
            if (startingIndex + probesPerSet >= probes.length)
                startingIndex = probes.length - probesPerSet;
            for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
                localSDset[j - startingIndex] = realSDFrom[fromIndices[j]];
            }
            // averageSDFrom[fromIndices[i]] = SimpleStats.percentile(localSDset,90);
            averageSDFrom[fromIndices[i]] = SimpleStats.mean(localSDset);
            for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
                localSDset[j - startingIndex] = realSDTo[toIndices[j]];
            }
            // averageSDTo[toIndices[i]] = SimpleStats.percentile(localSDset,90);
            averageSDTo[toIndices[i]] = SimpleStats.mean(localSDset);
        }
        for (int p = 0; p < probes.length; p++) {
            double fromValue = 0;
            double toValue = 0;
            try {
                fromValue = fromStores[fromIndex].getValueForProbe(probes[p]);
                toValue = toStores[toIndex].getValueForProbe(probes[p]);
            } catch (SeqMonkException sme) {
            }
            double fromSD = Math.max(realSDFrom[p], averageSDFrom[p]);
            double toSD = Math.max(realSDTo[p], averageSDTo[p]);
            // double fromSD = averageSDFrom[p];
            // double toSD = averageSDTo[p];
            currentPValues[p] = new IndexTTestValue(p, TTest.calculatePValue(fromValue, toValue, fromSD, toSD, fromStores[fromIndex].dataStores().length, toStores[toIndex].dataStores().length));
        // System.err.println("P value was "+currentPValues[p].p+" from "+fromValue+" "+toValue+" "+fromSD+" "+toSD+" "+fromStores[fromIndex].dataStores().length+" "+toStores[toIndex].dataStores().length);
        }
        // We now need to correct the set of pValues
        if (applyMultipleTestingCorrection) {
            BenjHochFDR.calculateQValues(currentPValues);
        }
        // the combined set
        if (applyMultipleTestingCorrection) {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
                }
            }
        } else {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
                }
            }
        }
    }
    // pass the filter.
    for (int i = 0; i < lowestPValues.length; i++) {
        if (lowestPValues[i] < pValueLimit) {
            newList.addProbe(probes[i], lowestPValues[i]);
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) IndexTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue)

Example 57 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class LogisticRegressionSplicingFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // We need to make a temporary directory, save the data into it, write out the R script
    // and then run it an collect the list of results, then clean up.
    // Make up the list of DataStores in each replicate set
    DataStore[] fromStores = replicateSets[0].dataStores();
    DataStore[] toStores = replicateSets[1].dataStores();
    File tempDir;
    try {
        Probe[] probes = startingList.getAllProbes();
        probes = filterProbesByCount(probes, fromStores, toStores);
        progressUpdated("Pairing Probes", 0, 1);
        // Make pairs of probes to test
        ProbePair[] pairs = makeProbePairs(probes, fromStores, toStores);
        System.err.println("Found " + pairs.length + " pairs to test");
        progressUpdated("Creating temp directory", 0, 1);
        tempDir = TempDirectory.createTempDirectory();
        System.err.println("Temp dir is " + tempDir.getAbsolutePath());
        progressUpdated("Writing R script", 0, 1);
        // Get the template script
        Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
        // Substitute in the variables we need to change
        template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
        template.setValue("PVALUE", "" + pValueCutoff);
        template.setValue("CORRECTCOUNT", "" + pairs.length);
        if (multiTest) {
            template.setValue("MULTITEST", "TRUE");
        } else {
            template.setValue("MULTITEST", "FALSE");
        }
        // Write the script file
        File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
        PrintWriter pr = new PrintWriter(scriptFile);
        pr.print(template.toString());
        pr.close();
        // Write the count data
        // Sort these so we can get probes from the same chromosome together
        Arrays.sort(probes);
        pr = null;
        String lastChr = "";
        for (int p = 0; p < pairs.length; p++) {
            if (!pairs[p].probe1.chromosome().name().equals(lastChr)) {
                if (pr != null)
                    pr.close();
                File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + pairs[p].probe1.chromosome().name() + ".txt");
                pr = new PrintWriter(outFile);
                lastChr = pairs[p].probe1.chromosome().name();
                pr.println("id\tgroup\treplicate\tstate\tcount");
            }
            if (p % 1000 == 0) {
                progressUpdated("Writing data for chr" + lastChr, p, probes.length);
            }
            int[] fromProbe1Counts = new int[fromStores.length];
            int[] fromProbe2Counts = new int[fromStores.length];
            int[] toProbe1Counts = new int[toStores.length];
            int[] toProbe2Counts = new int[toStores.length];
            for (int i = 0; i < fromStores.length; i++) {
                // TODO: For the moment we'll expect they've quantitated this themselves
                fromProbe1Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe1);
                fromProbe2Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe2);
            }
            for (int i = 0; i < toStores.length; i++) {
                // TODO: For the moment we'll expect they've quantitated this themselves
                toProbe1Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe1);
                toProbe2Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe2);
            }
            for (int i = 0; i < fromProbe1Counts.length; i++) {
                pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromProbe1Counts[i]);
                pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromProbe2Counts[i]);
            }
            for (int i = 0; i < toProbe1Counts.length; i++) {
                pr.println(p + "\tto\tto" + i + "\tmeth\t" + toProbe1Counts[i]);
                pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toProbe2Counts[i]);
            }
        }
        if (pr != null)
            pr.close();
        progressUpdated("Running R Script", 0, 1);
        RScriptRunner runner = new RScriptRunner(tempDir);
        RProgressListener listener = new RProgressListener(runner);
        runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
        runner.runScript();
        while (true) {
            if (listener.cancelled()) {
                progressCancelled();
                pr.close();
                return;
            }
            if (listener.exceptionReceived()) {
                progressExceptionReceived(new SeqMonkException("R Script failed"));
                pr.close();
                return;
            }
            if (listener.complete())
                break;
            Thread.sleep(500);
        }
        // We can now parse the results and put the hits into a new probe list
        ProbeList newList;
        if (multiTest) {
            newList = new ProbeList(startingList, "", "", "FDR");
        } else {
            newList = new ProbeList(startingList, "", "", "p-value");
        }
        File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
        BufferedReader br = new BufferedReader(new FileReader(hitsFile));
        String line = br.readLine();
        // In case the same probe is found multiple times we'll cache the p-values
        // we see and report the best one.
        HashMap<Probe, Float> cachedHits = new HashMap<Probe, Float>();
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            String[] indexSections = sections[0].split("\\.");
            int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
            float pValue = Float.parseFloat(sections[sections.length - 1]);
            if (!cachedHits.containsKey(pairs[probeIndex].probe1)) {
                cachedHits.put(pairs[probeIndex].probe1, pValue);
            }
            if (!cachedHits.containsKey(pairs[probeIndex].probe2)) {
                cachedHits.put(pairs[probeIndex].probe2, pValue);
            }
            // See if the pvalue we've got is better than the one we're storing
            if (pValue < cachedHits.get(pairs[probeIndex].probe1)) {
                cachedHits.put(pairs[probeIndex].probe1, pValue);
            }
            if (pValue < cachedHits.get(pairs[probeIndex].probe2)) {
                cachedHits.put(pairs[probeIndex].probe2, pValue);
            }
        }
        br.close();
        for (Probe probeHit : cachedHits.keySet()) {
            newList.addProbe(probeHit, cachedHits.get(probeHit));
        }
        runner.cleanUp();
        filterFinished(newList);
    } catch (Exception ioe) {
        progressExceptionReceived(ioe);
        return;
    }
}
Also used : HashMap(java.util.HashMap) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) RProgressListener(uk.ac.babraham.SeqMonk.R.RProgressListener) Template(uk.ac.babraham.SeqMonk.Utilities.Templates.Template) ProgressRecordDialog(uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) PrintWriter(java.io.PrintWriter) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) BufferedReader(java.io.BufferedReader) File(java.io.File) RScriptRunner(uk.ac.babraham.SeqMonk.R.RScriptRunner)

Example 58 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class MonteCarloFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    Probe[] probes = toList.getAllProbes();
    ArrayList<Probe> allProbes = new ArrayList<Probe>();
    Probe[] fromProbes = startingList.getAllProbes();
    try {
        for (int i = 0; i < fromProbes.length; i++) {
            allProbes.add(fromProbes[i]);
        }
        int passedCount = 0;
        float targetValue = getValue(probes);
        for (int iteration = 0; iteration < iterationCount; iteration++) {
            if (iteration % 100 == 0) {
                progressUpdated("Performed " + iteration + " iterations", iteration, iterationCount);
            }
            if (cancel) {
                progressCancelled();
                return;
            }
            Probe[] theseProbes = makeRandomProbes(allProbes, probes.length);
            float value = getValue(theseProbes);
            if (value >= targetValue) {
                ++passedCount;
            }
        }
        float pValue = ((float) passedCount) / iterationCount;
        ProbeList newList = new ProbeList(toList, "", "", "P-value");
        for (int i = 0; i < probes.length; i++) {
            newList.addProbe(probes[i], pValue);
        }
        filterFinished(newList);
    } catch (SeqMonkException sme) {
        progressExceptionReceived(sme);
        return;
    }
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ArrayList(java.util.ArrayList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 59 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class PositionFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    ProbeList newList = new ProbeList(startingList, "", "", null);
    Probe[] probes = startingList.getAllProbes();
    // We only take probes which are completely enclosed in the selected region
    for (int p = 0; p < probes.length; p++) {
        if (p % 10000 == 0)
            progressUpdated(p, probes.length);
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        if (chromosome != null) {
            if (probes[p].chromosome() != chromosome)
                continue;
        }
        if (!strandType.useProbe(probes[p]))
            continue;
        if (start != null && probes[p].start() < start)
            continue;
        if (end != null && probes[p].end() > end)
            continue;
        newList.addProbe(probes[p], null);
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 60 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class ProbeListProbeGenerator method getOptionsPanel.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.ProbeGenerators.ProbeGenerator#getOptionsPanel(uk.ac.babraham.SeqMonk.SeqMonkApplication)
	 */
public JPanel getOptionsPanel() {
    if (optionPanel != null) {
        // We've done this already
        return optionPanel;
    }
    ProbeList currentActiveList = null;
    if (collection.probeSet() != null) {
        currentLists = collection.probeSet().getAllProbeLists();
        currentActiveList = collection.probeSet().getActiveList();
    } else {
        currentLists = new ProbeList[0];
    }
    optionPanel = new JPanel();
    optionPanel.setLayout(new GridBagLayout());
    GridBagConstraints gbc = new GridBagConstraints();
    gbc.gridx = 1;
    gbc.gridy = 1;
    gbc.weightx = 0.1;
    gbc.weighty = 0.5;
    gbc.fill = GridBagConstraints.HORIZONTAL;
    optionPanel.add(new JLabel("Select List"), gbc);
    gbc.gridx++;
    gbc.weightx = 0.9;
    selectedListBox = new JComboBox(currentLists);
    selectedListBox.setPrototypeDisplayValue("No longer than this please");
    for (int i = 0; i < currentLists.length; i++) {
        if (currentLists[i] == currentActiveList) {
            selectedListBox.setSelectedIndex(i);
        }
    }
    selectedListBox.addItemListener(new ItemListener() {

        public void itemStateChanged(ItemEvent e) {
            isReady();
        }
    });
    optionPanel.add(selectedListBox, gbc);
    gbc.gridx = 1;
    gbc.gridy++;
    gbc.weightx = 0.1;
    optionPanel.add(new JLabel("Reverse Probe Direction"), gbc);
    gbc.gridx++;
    gbc.weightx = 0.9;
    reverseDirectionBox = new JCheckBox();
    optionPanel.add(reverseDirectionBox, gbc);
    return optionPanel;
}
Also used : JCheckBox(javax.swing.JCheckBox) JPanel(javax.swing.JPanel) GridBagConstraints(java.awt.GridBagConstraints) ItemEvent(java.awt.event.ItemEvent) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) GridBagLayout(java.awt.GridBagLayout) JComboBox(javax.swing.JComboBox) JLabel(javax.swing.JLabel) ItemListener(java.awt.event.ItemListener)

Aggregations

ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)79 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)54 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)32 Vector (java.util.Vector)16 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)15 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)12 HashSet (java.util.HashSet)10 JLabel (javax.swing.JLabel)8 ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 GridBagConstraints (java.awt.GridBagConstraints)7 GridBagLayout (java.awt.GridBagLayout)7 File (java.io.File)7 JComboBox (javax.swing.JComboBox)7 JPanel (javax.swing.JPanel)7 BufferedReader (java.io.BufferedReader)6 FileReader (java.io.FileReader)6 PrintWriter (java.io.PrintWriter)6 JCheckBox (javax.swing.JCheckBox)6 ProgressRecordDialog (uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog)6 RProgressListener (uk.ac.babraham.SeqMonk.R.RProgressListener)6