Search in sources :

Example 61 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class ValuesFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
    Probe[] probes = startingList.getAllProbes();
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
    for (int p = 0; p < probes.length; p++) {
        progressUpdated(p, probes.length);
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        int count = 0;
        for (int s = 0; s < stores.length; s++) {
            float d = 0;
            if (!stores[s].hasValueForProbe(probes[p]))
                continue;
            try {
                d = stores[s].getValueForProbe(probes[p]);
            } catch (SeqMonkException e) {
                e.printStackTrace();
                continue;
            }
            // NaN values always fail the filter.
            if (Float.isNaN(d))
                continue;
            // Now we have the value we need to know if it passes the test
            if (upperLimit != null)
                if (d > upperLimit)
                    continue;
            if (lowerLimit != null)
                if (d < lowerLimit)
                    continue;
            // This one passes, we can add it to the count
            ++count;
        }
        // probe to the probe set.
        switch(limitType) {
            case EXACTLY:
                if (count == chosenNumber)
                    newList.addProbe(probes[p], null);
                break;
            case AT_LEAST:
                if (count >= chosenNumber)
                    newList.addProbe(probes[p], null);
                break;
            case NO_MORE_THAN:
                if (count <= chosenNumber)
                    newList.addProbe(probes[p], null);
                break;
        }
    }
    newList.setName("Value between " + lowerLimit + "-" + upperLimit);
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 62 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class WindowedReplicateStatsFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<ProbeGroupTTestValue> newListProbesVector = new Vector<ProbeGroupTTestValue>();
    for (int c = 0; c < chromosomes.length; c++) {
        progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
        Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
        ProbeGroupGenerator gen = null;
        if (windowType == DISTANCE_WINDOW) {
            gen = new ProbeWindowGenerator(probes, windowSize);
        } else if (windowType == CONSECUTIVE_WINDOW) {
            gen = new ConsecutiveProbeGenerator(probes, windowSize);
        } else if (windowType == FEATURE_WINDOW) {
            gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
        }
        while (true) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            Probe[] theseProbes = gen.nextSet();
            if (theseProbes == null) {
                // System.err.println("List of probes was null");
                break;
            }
            // We need at least 3 probes in a set to calculate a p-value
            if (theseProbes.length < 3) {
                // System.err.println("Only "+theseProbes.length+" probes in the set");
                continue;
            }
            double[][] values = new double[stores.length][];
            for (int j = 0; j < stores.length; j++) {
                if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
                    values[j] = new double[theseProbes.length * ((ReplicateSet) stores[j]).dataStores().length];
                } else {
                    values[j] = new double[theseProbes.length];
                }
            }
            for (int j = 0; j < stores.length; j++) {
                int index = 0;
                for (int i = 0; i < theseProbes.length; i++) {
                    try {
                        if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
                            DataStore[] localStores = ((ReplicateSet) stores[j]).dataStores();
                            for (int l = 0; l < localStores.length; l++) {
                                values[j][index] = localStores[l].getValueForProbe(theseProbes[i]);
                                index++;
                            }
                        } else {
                            values[j][index] = stores[j].getValueForProbe(theseProbes[i]);
                            index++;
                        }
                    } catch (SeqMonkException e) {
                    }
                }
                if (index != values[j].length) {
                    throw new IllegalStateException("Didn't fill all values total=" + values[j].length + " index=" + index);
                }
            }
            double pValue = 0;
            try {
                if (stores.length == 1) {
                    pValue = TTest.calculatePValue(values[0], 0);
                } else if (stores.length == 2) {
                    pValue = TTest.calculatePValue(values[0], values[1]);
                } else {
                    pValue = AnovaTest.calculatePValue(values);
                }
            } catch (SeqMonkException e) {
                throw new IllegalStateException(e);
            }
            newListProbesVector.add(new ProbeGroupTTestValue(theseProbes, pValue));
        }
    }
    ProbeGroupTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeGroupTTestValue[0]);
    // Do the multi-testing correction if necessary
    if (multiTest) {
        BenjHochFDR.calculateQValues(newListProbes);
    }
    ProbeList newList;
    // We need to handle duplicate hits internally since probe lists can't do
    // this themselves any more.
    Hashtable<Probe, Float> newListTemp = new Hashtable<Probe, Float>();
    if (multiTest) {
        newList = new ProbeList(startingList, "", "", "Q-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].q <= cutoff) {
                Probe[] passedProbes = newListProbes[i].probes;
                for (int p = 0; p < passedProbes.length; p++) {
                    if (newListTemp.containsKey(passedProbes[p])) {
                        // We always give a probe the lowest possible q-value
                        if (newListTemp.get(passedProbes[p]) <= newListProbes[i].q) {
                            continue;
                        }
                    }
                    newListTemp.put(passedProbes[p], (float) newListProbes[i].q);
                }
            }
        }
    } else {
        newList = new ProbeList(startingList, "", "", "P-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].p <= cutoff) {
                Probe[] passedProbes = newListProbes[i].probes;
                for (int p = 0; p < passedProbes.length; p++) {
                    if (newListTemp.containsKey(passedProbes[p])) {
                        // We always give a probe the lowest possible p-value
                        if (newListTemp.get(passedProbes[p]) <= newListProbes[i].p) {
                            continue;
                        }
                    }
                    newListTemp.put(passedProbes[p], (float) newListProbes[i].p);
                }
            }
        }
    }
    // Add the cached hits to the new list
    Enumeration<Probe> en = newListTemp.keys();
    while (en.hasMoreElements()) {
        Probe p = en.nextElement();
        newList.addProbe(p, newListTemp.get(p));
    }
    filterFinished(newList);
}
Also used : ReplicateSet(uk.ac.babraham.SeqMonk.DataTypes.ReplicateSet) ProbeWindowGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeWindowGenerator) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) ProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeGroupGenerator) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Hashtable(java.util.Hashtable) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ProbeGroupTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeGroupTTestValue) ConsecutiveProbeGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ConsecutiveProbeGenerator)

Example 63 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class WindowedValuesFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    for (int c = 0; c < chromosomes.length; c++) {
        HashSet<Probe> passedProbes = new HashSet<Probe>();
        progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
        Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
        ProbeGroupGenerator gen = null;
        if (windowType == DISTANCE_WINDOW) {
            gen = new ProbeWindowGenerator(probes, windowSize);
        } else if (windowType == CONSECUTIVE_WINDOW) {
            gen = new ConsecutiveProbeGenerator(probes, windowSize);
        } else if (windowType == FEATURE_WINDOW) {
            gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
        } else {
            progressExceptionReceived(new SeqMonkException("No window type known with number " + windowType));
            return;
        }
        while (true) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            Probe[] theseProbes = gen.nextSet();
            if (theseProbes == null) {
                break;
            }
            int count = 0;
            for (int s = 0; s < stores.length; s++) {
                double totalValue = 0;
                for (int i = 0; i < theseProbes.length; i++) {
                    // Get the values for the probes in this set
                    try {
                        totalValue += stores[s].getValueForProbe(theseProbes[i]);
                    } catch (SeqMonkException e) {
                    }
                }
                totalValue /= theseProbes.length;
                // Now we have the value we need to know if it passes the test
                if (upperLimit != null)
                    if (totalValue > upperLimit.doubleValue())
                        continue;
                if (lowerLimit != null)
                    if (totalValue < lowerLimit.doubleValue())
                        continue;
                // This one passes, we can add it to the count
                ++count;
            }
            // probe to the probe set.
            switch(limitType) {
                case EXACTLY:
                    if (count == storesLimit)
                        for (int i = 0; i < theseProbes.length; i++) {
                            if (passedProbes.contains(theseProbes[i]))
                                continue;
                            newList.addProbe(theseProbes[i], null);
                            passedProbes.add(theseProbes[i]);
                        }
                    break;
                case AT_LEAST:
                    if (count >= storesLimit)
                        for (int i = 0; i < theseProbes.length; i++) {
                            if (passedProbes.contains(theseProbes[i]))
                                continue;
                            newList.addProbe(theseProbes[i], null);
                            passedProbes.add(theseProbes[i]);
                        }
                    break;
                case NO_MORE_THAN:
                    if (count <= storesLimit)
                        for (int i = 0; i < theseProbes.length; i++) {
                            if (passedProbes.contains(theseProbes[i]))
                                continue;
                            newList.addProbe(theseProbes[i], null);
                            passedProbes.add(theseProbes[i]);
                        }
                    break;
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProbeWindowGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeWindowGenerator) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) ConsecutiveProbeGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ConsecutiveProbeGenerator) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) ProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeGroupGenerator) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) HashSet(java.util.HashSet)

Example 64 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class AntisenseTranscriptionPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    double pValue = optionsPanel.pValue();
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    long[] senseCounts = new long[data.length];
    long[] antisenseCounts = new long[data.length];
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Getting total antisense rate for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = getValidFeatures(chrs[c]);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
            probes.add(p);
            for (int d = 0; d < data.length; d++) {
                long[] reads = data[d].getReadsForProbe(p);
                for (int r = 0; r < reads.length; r++) {
                    if (readFilter.useRead(p, reads[r])) {
                        senseCounts[d] += SequenceRead.length(reads[r]);
                    } else {
                        antisenseCounts[d] += SequenceRead.length(reads[r]);
                    }
                }
            }
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we can work out the overall antisense rate
    double[] antisenseProbability = new double[data.length];
    for (int d = 0; d < data.length; d++) {
        System.err.println("Antisense counts are " + antisenseCounts[d] + " sense counts are " + senseCounts[d]);
        antisenseProbability[d] = antisenseCounts[d] / (double) (antisenseCounts[d] + senseCounts[d]);
        System.err.println("Antisense probability for " + data[d].name() + " is " + antisenseProbability[d]);
    }
    // Now we can quantitate each individual feature and test for whether it is significantly
    // showing antisense expression
    ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
    for (int d = 0; d < data.length; d++) {
        significantProbes.add(new Vector<ProbeTTestValue>());
    }
    int[] readLengths = new int[data.length];
    for (int d = 0; d < readLengths.length; d++) {
        readLengths[d] = data[d].getMaxReadLength();
        System.err.println("For " + data[d].name() + " max read len is " + readLengths[d]);
    }
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Probe[] thisChrProbes = collection().probeSet().getProbesForChromosome(chrs[c]);
        for (int p = 0; p < thisChrProbes.length; p++) {
            for (int d = 0; d < data.length; d++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                long senseCount = 0;
                long antisenseCount = 0;
                long[] reads = data[d].getReadsForProbe(thisChrProbes[p]);
                for (int r = 0; r < reads.length; r++) {
                    if (readFilter.useRead(thisChrProbes[p], reads[r])) {
                        // TODO: Just count overlap?
                        senseCount += SequenceRead.length(reads[r]);
                    } else {
                        antisenseCount += SequenceRead.length(reads[r]);
                    }
                }
                // if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
                // System.err.println("Raw base counts are sense="+senseCount+" anti="+antisenseCount+" from "+reads.length+" reads");
                // }
                int senseReads = (int) (senseCount / readLengths[d]);
                int antisenseReads = (int) (antisenseCount / readLengths[d]);
                // if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
                // System.err.println("Raw read counts are sense="+senseReads+" anti="+antisenseReads+" from "+reads.length+" reads");
                // }
                BinomialDistribution bd = new BinomialDistribution(senseReads + antisenseReads, antisenseProbability[d]);
                // Since the binomial distribution gives the probability of getting a value higher than
                // this we need to subtract one so we get the probability of this or higher.
                double thisPValue = 1 - bd.cumulativeProbability(antisenseReads - 1);
                if (antisenseReads == 0)
                    thisPValue = 1;
                // We have to add all results at this stage so we don't mess up the multiple
                // testing correction later on.
                significantProbes.get(d).add(new ProbeTTestValue(thisChrProbes[p], thisPValue));
                double expected = ((senseReads + antisenseReads) * antisenseProbability[d]);
                if (expected < 1)
                    expected = 1;
                float obsExp = antisenseReads / (float) expected;
                data[d].setValueForProbe(thisChrProbes[p], obsExp);
            }
        }
    }
    // filtering those which pass our p-value cutoff
    for (int d = 0; d < data.length; d++) {
        ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
        BenjHochFDR.calculateQValues(ttestResults);
        ProbeList newList = new ProbeList(collection().probeSet(), "Antisense < " + pValue + " in " + data[d].name(), "Probes showing significant antisense transcription from a basal level of " + antisenseProbability[d] + " with a cutoff of " + pValue, "FDR");
        for (int i = 0; i < ttestResults.length; i++) {
            if (ttestResults[i].probe.name().equals("RP4-798A10.2")) {
                System.err.println("Raw p=" + ttestResults[i].p + " q=" + ttestResults[i].q);
            }
            if (ttestResults[i].q < pValue) {
                newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Antisense transcription pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    if (optionsPanel.ignoreOverlaps()) {
        quantitationDescription.append(". Ignoring existing overlaps");
    }
    quantitationDescription.append(". P-value cutoff was ");
    quantitationDescription.append(optionsPanel.pValue());
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : ArrayList(java.util.ArrayList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution)

Example 65 with ProbeList

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.

the class ProbeLengthFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
    Probe[] probes = startingList.getAllProbes();
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
    for (int p = 0; p < probes.length; p++) {
        progressUpdated(p, probes.length);
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        if (upperLimit != null)
            if (probes[p].length() > upperLimit)
                continue;
        if (lowerLimit != null)
            if (probes[p].length() < lowerLimit)
                continue;
        newList.addProbe(probes[p], null);
    }
    newList.setName("Length between " + lowerLimit + "-" + upperLimit);
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Aggregations

ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)79 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)54 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)32 Vector (java.util.Vector)16 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)15 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)12 HashSet (java.util.HashSet)10 JLabel (javax.swing.JLabel)8 ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 GridBagConstraints (java.awt.GridBagConstraints)7 GridBagLayout (java.awt.GridBagLayout)7 File (java.io.File)7 JComboBox (javax.swing.JComboBox)7 JPanel (javax.swing.JPanel)7 BufferedReader (java.io.BufferedReader)6 FileReader (java.io.FileReader)6 PrintWriter (java.io.PrintWriter)6 JCheckBox (javax.swing.JCheckBox)6 ProgressRecordDialog (uk.ac.babraham.SeqMonk.Dialogs.ProgressRecordDialog)6 RProgressListener (uk.ac.babraham.SeqMonk.R.RProgressListener)6