Search in sources :

Example 1 with ProbeTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.

the class CodonBiasPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    double pValue = optionsPanel.pValue();
    String libraryType = optionsPanel.libraryType();
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Making probes for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
            probes.add(p);
        }
    }
    allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we can quantitate each individual feature and test for whether it is significantly
    // showing codon bias
    ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
    // data contains the data stores that this pipeline is going to use. We need to test each data store.
    for (int d = 0; d < data.length; d++) {
        significantProbes.add(new Vector<ProbeTTestValue>());
    }
    int probeCounter = 0;
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        for (int p = 0; p < features.length; p++) {
            // Get the corresponding feature and work out the mapping between genomic position and codon sub position.
            int[] mappingArray = createGenomeMappingArray(features[p]);
            DATASTORE_LOOP: for (int d = 0; d < data.length; d++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                long[] reads = data[d].getReadsForProbe(allProbes[probeCounter]);
                // TODO: make this configurable
                if (reads.length < 5) {
                    data[d].setValueForProbe(allProbes[probeCounter], Float.NaN);
                    continue DATASTORE_LOOP;
                }
                int pos1Count = 0;
                int pos2Count = 0;
                int pos3Count = 0;
                READ_LOOP: for (int r = 0; r < reads.length; r++) {
                    int genomicReadStart = SequenceRead.start(reads[r]);
                    int genomicReadEnd = SequenceRead.end(reads[r]);
                    int readStrand = SequenceRead.strand(reads[r]);
                    int relativeReadStart = -1;
                    // forward reads
                    if (readStrand == 1) {
                        if (libraryType == "Same strand specific") {
                            if (features[p].location().strand() == Location.FORWARD) {
                                // The start of the read needs to be within the feature
                                if (genomicReadStart - features[p].location().start() < 0) {
                                    continue READ_LOOP;
                                } else {
                                    // look up the read start pos in the mapping array
                                    relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
                                }
                            }
                        } else if (libraryType == "Opposing strand specific") {
                            if (features[p].location().strand() == Location.REVERSE) {
                                // The "start" of a reverse read/probe is actually the end
                                if (features[p].location().end() - genomicReadEnd < 0) {
                                    continue READ_LOOP;
                                } else {
                                    relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
                                }
                            }
                        }
                    }
                    // reverse reads
                    if (readStrand == -1) {
                        if (libraryType == "Same strand specific") {
                            if (features[p].location().strand() == Location.REVERSE) {
                                if (features[p].location().end() - genomicReadEnd < 0) {
                                    continue READ_LOOP;
                                } else {
                                    // System.out.println("features[p].location().end() is " + features[p].location().end() + ", genomicReadEnd is " + genomicReadEnd);
                                    // System.out.println("mapping array[0] is " + mappingArray[0]);
                                    relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
                                }
                            }
                        } else if (libraryType == "Opposing strand specific") {
                            if (features[p].location().strand() == Location.FORWARD) {
                                // The start of the read needs to be within the feature
                                if (genomicReadStart - features[p].location().start() < 0) {
                                    continue READ_LOOP;
                                } else {
                                    // look up the read start position in the mapping array
                                    relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
                                }
                            }
                        }
                    }
                    // find out which position the read is in
                    if (relativeReadStart == -1) {
                        continue READ_LOOP;
                    } else if (relativeReadStart % 3 == 0) {
                        pos3Count++;
                        continue READ_LOOP;
                    } else if ((relativeReadStart + 1) % 3 == 0) {
                        pos2Count++;
                        continue READ_LOOP;
                    } else if ((relativeReadStart + 2) % 3 == 0) {
                        pos1Count++;
                    }
                }
                // closing bracket for read loop
                // System.out.println("pos1Count for "+ features[p].name() + " is " + pos1Count);
                // System.out.println("pos2Count for "+ features[p].name() + " is " + pos2Count);
                // System.out.println("pos3Count for "+ features[p].name() + " is " + pos3Count);
                int interestingCodonCount = 0;
                int otherCodonCount = 0;
                if (optionsPanel.codonSubPosition() == 1) {
                    interestingCodonCount = pos1Count;
                    otherCodonCount = pos2Count + pos3Count;
                } else if (optionsPanel.codonSubPosition() == 2) {
                    interestingCodonCount = pos2Count;
                    otherCodonCount = pos1Count + pos3Count;
                } else if (optionsPanel.codonSubPosition() == 3) {
                    interestingCodonCount = pos3Count;
                    otherCodonCount = pos1Count + pos2Count;
                }
                int totalCount = interestingCodonCount + otherCodonCount;
                // BinomialDistribution bd = new BinomialDistribution(interestingCodonCount+otherCodonCount, 1/3d);
                BinomialDistribution bd = new BinomialDistribution(totalCount, 1 / 3d);
                // Since the binomial distribution gives the probability of getting a value higher than
                // this we need to subtract one so we get the probability of this or higher.
                double thisPValue = 1 - bd.cumulativeProbability(interestingCodonCount - 1);
                if (interestingCodonCount == 0)
                    thisPValue = 1;
                // We have to add all results at this stage so we don't mess up the multiple
                // testing correction later on.
                significantProbes.get(d).add(new ProbeTTestValue(allProbes[probeCounter], thisPValue));
                float percentageCount;
                if (totalCount == 0) {
                    percentageCount = 0;
                } else {
                    percentageCount = ((float) interestingCodonCount / (float) totalCount) * 100;
                }
                data[d].setValueForProbe(allProbes[probeCounter], percentageCount);
            // System.out.println("totalCount = " + totalCount);
            // System.out.println("interestingCodonCount " + interestingCodonCount);
            // System.out.println("pValue = " + thisPValue);
            // System.out.println("percentageCount = " + percentageCount);
            // System.out.println("");
            }
            probeCounter++;
        }
    }
    // filtering those which pass our p-value cutoff
    for (int d = 0; d < data.length; d++) {
        ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
        BenjHochFDR.calculateQValues(ttestResults);
        ProbeList newList = new ProbeList(collection().probeSet(), "Codon bias < " + pValue + " in " + data[d].name(), "Probes showing significant codon bias for position " + optionsPanel.codonSubPosition() + " with a cutoff of " + pValue, "FDR");
        for (int i = 0; i < ttestResults.length; i++) {
            if (ttestResults[i].q < pValue) {
                newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Codon bias pipeline using codon position " + optionsPanel.codonSubPosition() + " for " + optionsPanel.libraryType() + " library.");
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ArrayList(java.util.ArrayList) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution) Vector(java.util.Vector)

Example 2 with ProbeTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.

the class ProportionOfLibraryStatisticsFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    fromStores = optionsPanel.fromStores();
    toStores = optionsPanel.toStores();
    // System.err.println("Found "+fromStores.length+" from stores and "+toStores.length+" to stores");
    applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
    testForIncreasesOnly = optionsPanel.increasesOnlyBox.isSelected();
    Probe[] probes = startingList.getAllProbes();
    // We'll pull the number of probes to sample from the preferences if they've changed it
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
    // We'll build up a set of p-values as we go along
    float[] lowestPValues = new float[probes.length];
    for (int p = 0; p < lowestPValues.length; p++) {
        lowestPValues[p] = 1;
    }
    // Put something in the progress whilst we're ordering the probe values to make
    // the comparison.
    progressUpdated("Generating background model", 0, 1);
    try {
        for (int f = 0; f < fromStores.length; f++) {
            for (int t = 0; t < toStores.length; t++) {
                progressUpdated("Comparing " + fromStores[f] + " to " + toStores[t], 0, 1);
                // We need to work out the total counts in the probes we're using
                int fromTotalCount = 0;
                for (int p = 0; p < probes.length; p++) {
                    fromTotalCount += (int) fromStores[f].getValueForProbe(probes[p]);
                    if (cancel) {
                        cancel = false;
                        progressCancelled();
                        return;
                    }
                }
                int toTotalCount = 0;
                for (int p = 0; p < probes.length; p++) {
                    toTotalCount += (int) toStores[t].getValueForProbe(probes[p]);
                    if (cancel) {
                        cancel = false;
                        progressCancelled();
                        return;
                    }
                }
                for (int p = 0; p < probes.length; p++) {
                    if (cancel) {
                        cancel = false;
                        progressCancelled();
                        return;
                    }
                    int n11 = (int) fromStores[f].getValueForProbe(probes[p]);
                    int n12 = fromTotalCount - n11;
                    int n21 = (int) toStores[f].getValueForProbe(probes[p]);
                    int n22 = toTotalCount - n21;
                    double[] pValues = FishersExactTest.fishersExactTest(n11, n12, n21, n22);
                    // The values in the array are 0=2-sided p-value, 1=left-sided p-value, 2=right-sided p-value
                    if (testForIncreasesOnly) {
                        if (pValues[1] < lowestPValues[p])
                            lowestPValues[p] = (float) pValues[1];
                    } else {
                        if (pValues[0] < lowestPValues[p])
                            lowestPValues[p] = (float) pValues[0];
                    }
                }
            }
        }
    } catch (SeqMonkException sme) {
        progressExceptionReceived(sme);
    }
    if (applyMultipleTestingCorrection) {
        ProbeTTestValue[] statsValues = new ProbeTTestValue[probes.length];
        for (int i = 0; i < probes.length; i++) {
            statsValues[i] = new ProbeTTestValue(probes[i], lowestPValues[i]);
        }
        BenjHochFDR.calculateQValues(statsValues);
        for (int i = 0; i < statsValues.length; i++) {
            if (statsValues[i].q < pValueLimit) {
                newList.addProbe(statsValues[i].probe, (float) statsValues[i].q);
            }
        }
    } else {
        for (int i = 0; i < lowestPValues.length; i++) {
            if (lowestPValues[i] < pValueLimit) {
                newList.addProbe(probes[i], lowestPValues[i]);
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 3 with ProbeTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.

the class ReplicateSetStatsFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    // Make up the list of DataStores in each replicate set
    DataStore[][] stores = new DataStore[replicateSets.length][];
    for (int i = 0; i < replicateSets.length; i++) {
        stores[i] = replicateSets[i].dataStores();
    }
    Vector<ProbeTTestValue> newListProbesVector = new Vector<ProbeTTestValue>();
    for (int c = 0; c < chromosomes.length; c++) {
        progressUpdated("Processing probes on Chr" + chromosomes[c].name(), c, chromosomes.length);
        Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
        for (int p = 0; p < probes.length; p++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            double[][] values = new double[replicateSets.length][];
            for (int i = 0; i < replicateSets.length; i++) {
                values[i] = new double[stores[i].length];
                for (int j = 0; j < stores[i].length; j++) {
                    try {
                        values[i][j] = stores[i][j].getValueForProbe(probes[p]);
                    } catch (SeqMonkException e) {
                    }
                }
            }
            double pValue = 0;
            try {
                if (replicateSets.length == 1) {
                    pValue = TTest.calculatePValue(values[0], 0);
                } else if (replicateSets.length == 2) {
                    pValue = TTest.calculatePValue(values[0], values[1]);
                } else {
                    pValue = AnovaTest.calculatePValue(values);
                }
            } catch (SeqMonkException e) {
                throw new IllegalStateException(e);
            }
            newListProbesVector.add(new ProbeTTestValue(probes[p], pValue));
        }
    }
    ProbeTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeTTestValue[0]);
    // Do the multi-testing correction if necessary
    if (multiTest) {
        BenjHochFDR.calculateQValues(newListProbes);
    }
    ProbeList newList;
    if (multiTest) {
        newList = new ProbeList(startingList, "", "", "Q-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].q <= cutoff) {
                newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].q));
            }
        }
    } else {
        newList = new ProbeList(startingList, "", "", "P-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].p <= cutoff) {
                newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].p));
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 4 with ProbeTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.

the class ChiSquareFilterFrontBack method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    if (options.stringencyField.getText().length() == 0) {
        stringency = 0.05;
    } else {
        stringency = Double.parseDouble(options.stringencyField.getText());
    }
    if (options.minObservationsField.getText().length() == 0) {
        minObservations = 10;
    } else {
        minObservations = Integer.parseInt(options.minObservationsField.getText());
    }
    if (options.minDifferenceField.getText().length() == 0) {
        minPercentShift = 10;
    } else {
        minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
    }
    QuantitationStrandType readFilter = (QuantitationStrandType) options.strandLimitBox.getSelectedItem();
    applyMultipleTestingCorrection = options.multiTestBox.isSelected();
    ProbeList newList;
    if (applyMultipleTestingCorrection) {
        newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
    } else {
        newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
    }
    Probe[] probes = startingList.getAllProbes();
    int[][] frontBackCounts = new int[stores.length][2];
    // This is where we'll store any hits
    Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
    PROBE: for (int p = 0; p < probes.length; p++) {
        if (p % 100 == 0) {
            progressUpdated("Processed " + p + " probes", p, probes.length);
        }
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        Probe frontProbe;
        Probe backProbe;
        if (probes[p].strand() == Location.REVERSE) {
            backProbe = new Probe(probes[p].chromosome(), probes[p].start(), probes[p].middle(), probes[p].strand());
            frontProbe = new Probe(probes[p].chromosome(), probes[p].middle(), probes[p].end(), probes[p].strand());
        } else {
            frontProbe = new Probe(probes[p].chromosome(), probes[p].start(), probes[p].middle(), probes[p].strand());
            backProbe = new Probe(probes[p].chromosome(), probes[p].middle(), probes[p].end(), probes[p].strand());
        }
        // For each dataset make up a list of forward and reverse probes under this probe
        for (int d = 0; d < stores.length; d++) {
            long[] frontReads = stores[d].getReadsForProbe(frontProbe);
            long[] backReads = stores[d].getReadsForProbe(backProbe);
            frontBackCounts[d][0] = 0;
            frontBackCounts[d][1] = 0;
            for (int r = 0; r < frontReads.length; r++) {
                if (readFilter.useRead(frontProbe, frontReads[r]))
                    ++frontBackCounts[d][0];
            }
            for (int r = 0; r < backReads.length; r++) {
                if (readFilter.useRead(backProbe, backReads[r]))
                    ++frontBackCounts[d][1];
            }
        // System.err.println("Datset = "+stores[d].name()+" Front counts="+frontBackCounts[d][0]+" Back counts="+frontBackCounts[d][1]);
        }
        // See if we have enough counts and difference to go on with this
        double minPercent = 0;
        double maxPercent = 0;
        for (int d = 0; d < stores.length; d++) {
            if (frontBackCounts[d][0] < minObservations) {
                // System.err.println("Not enough counts to test");
                continue PROBE;
            }
            double percent = (((double) frontBackCounts[d][0]) / (frontBackCounts[d][0] + frontBackCounts[d][1])) * 100;
            if (d == 0 || percent < minPercent)
                minPercent = percent;
            if (d == 0 || percent > maxPercent)
                maxPercent = percent;
        }
        if (maxPercent - minPercent < minPercentShift) {
            // System.err.println("Not enough difference to test");
            continue PROBE;
        }
        // Now perform the Chi-Square test.
        double pValue = ChiSquareTest.chiSquarePvalue(frontBackCounts);
        // System.err.println("Raw p-value="+pValue);
        // Store this as a potential hit (after correcting p-values later)
        hits.add(new ProbeTTestValue(probes[p], pValue));
    }
    // Now we can correct the p-values if we need to
    ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
    if (applyMultipleTestingCorrection) {
        BenjHochFDR.calculateQValues(rawHits);
    }
    for (int h = 0; h < rawHits.length; h++) {
        if (applyMultipleTestingCorrection) {
            if (rawHits[h].q < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
            }
        } else {
            if (rawHits[h].p < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Vector(java.util.Vector)

Example 5 with ProbeTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.

the class AntisenseTranscriptionPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    double pValue = optionsPanel.pValue();
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    long[] senseCounts = new long[data.length];
    long[] antisenseCounts = new long[data.length];
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Getting total antisense rate for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = getValidFeatures(chrs[c]);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
            probes.add(p);
            for (int d = 0; d < data.length; d++) {
                long[] reads = data[d].getReadsForProbe(p);
                for (int r = 0; r < reads.length; r++) {
                    if (readFilter.useRead(p, reads[r])) {
                        senseCounts[d] += SequenceRead.length(reads[r]);
                    } else {
                        antisenseCounts[d] += SequenceRead.length(reads[r]);
                    }
                }
            }
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we can work out the overall antisense rate
    double[] antisenseProbability = new double[data.length];
    for (int d = 0; d < data.length; d++) {
        System.err.println("Antisense counts are " + antisenseCounts[d] + " sense counts are " + senseCounts[d]);
        antisenseProbability[d] = antisenseCounts[d] / (double) (antisenseCounts[d] + senseCounts[d]);
        System.err.println("Antisense probability for " + data[d].name() + " is " + antisenseProbability[d]);
    }
    // Now we can quantitate each individual feature and test for whether it is significantly
    // showing antisense expression
    ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
    for (int d = 0; d < data.length; d++) {
        significantProbes.add(new Vector<ProbeTTestValue>());
    }
    int[] readLengths = new int[data.length];
    for (int d = 0; d < readLengths.length; d++) {
        readLengths[d] = data[d].getMaxReadLength();
        System.err.println("For " + data[d].name() + " max read len is " + readLengths[d]);
    }
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Probe[] thisChrProbes = collection().probeSet().getProbesForChromosome(chrs[c]);
        for (int p = 0; p < thisChrProbes.length; p++) {
            for (int d = 0; d < data.length; d++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                long senseCount = 0;
                long antisenseCount = 0;
                long[] reads = data[d].getReadsForProbe(thisChrProbes[p]);
                for (int r = 0; r < reads.length; r++) {
                    if (readFilter.useRead(thisChrProbes[p], reads[r])) {
                        // TODO: Just count overlap?
                        senseCount += SequenceRead.length(reads[r]);
                    } else {
                        antisenseCount += SequenceRead.length(reads[r]);
                    }
                }
                // if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
                // System.err.println("Raw base counts are sense="+senseCount+" anti="+antisenseCount+" from "+reads.length+" reads");
                // }
                int senseReads = (int) (senseCount / readLengths[d]);
                int antisenseReads = (int) (antisenseCount / readLengths[d]);
                // if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
                // System.err.println("Raw read counts are sense="+senseReads+" anti="+antisenseReads+" from "+reads.length+" reads");
                // }
                BinomialDistribution bd = new BinomialDistribution(senseReads + antisenseReads, antisenseProbability[d]);
                // Since the binomial distribution gives the probability of getting a value higher than
                // this we need to subtract one so we get the probability of this or higher.
                double thisPValue = 1 - bd.cumulativeProbability(antisenseReads - 1);
                if (antisenseReads == 0)
                    thisPValue = 1;
                // We have to add all results at this stage so we don't mess up the multiple
                // testing correction later on.
                significantProbes.get(d).add(new ProbeTTestValue(thisChrProbes[p], thisPValue));
                double expected = ((senseReads + antisenseReads) * antisenseProbability[d]);
                if (expected < 1)
                    expected = 1;
                float obsExp = antisenseReads / (float) expected;
                data[d].setValueForProbe(thisChrProbes[p], obsExp);
            }
        }
    }
    // filtering those which pass our p-value cutoff
    for (int d = 0; d < data.length; d++) {
        ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
        BenjHochFDR.calculateQValues(ttestResults);
        ProbeList newList = new ProbeList(collection().probeSet(), "Antisense < " + pValue + " in " + data[d].name(), "Probes showing significant antisense transcription from a basal level of " + antisenseProbability[d] + " with a cutoff of " + pValue, "FDR");
        for (int i = 0; i < ttestResults.length; i++) {
            if (ttestResults[i].probe.name().equals("RP4-798A10.2")) {
                System.err.println("Raw p=" + ttestResults[i].p + " q=" + ttestResults[i].q);
            }
            if (ttestResults[i].q < pValue) {
                newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Antisense transcription pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    if (optionsPanel.ignoreOverlaps()) {
        quantitationDescription.append(". Ignoring existing overlaps");
    }
    quantitationDescription.append(". P-value cutoff was ");
    quantitationDescription.append(optionsPanel.pValue());
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : ArrayList(java.util.ArrayList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution)

Aggregations

ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)8 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)8 Vector (java.util.Vector)7 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)4 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)3 ArrayList (java.util.ArrayList)2 BinomialDistribution (org.apache.commons.math3.distribution.BinomialDistribution)2 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)2 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)2 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)2 AlternativeHypothesis (org.apache.commons.math3.stat.inference.AlternativeHypothesis)1 BinomialTest (org.apache.commons.math3.stat.inference.BinomialTest)1 ConfidenceInterval (org.apache.commons.math3.stat.interval.ConfidenceInterval)1 WilsonScoreInterval (org.apache.commons.math3.stat.interval.WilsonScoreInterval)1 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)1