Search in sources :

Example 71 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class WindowedValuesFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    for (int c = 0; c < chromosomes.length; c++) {
        HashSet<Probe> passedProbes = new HashSet<Probe>();
        progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
        Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
        ProbeGroupGenerator gen = null;
        if (windowType == DISTANCE_WINDOW) {
            gen = new ProbeWindowGenerator(probes, windowSize);
        } else if (windowType == CONSECUTIVE_WINDOW) {
            gen = new ConsecutiveProbeGenerator(probes, windowSize);
        } else if (windowType == FEATURE_WINDOW) {
            gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
        } else {
            progressExceptionReceived(new SeqMonkException("No window type known with number " + windowType));
            return;
        }
        while (true) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            Probe[] theseProbes = gen.nextSet();
            if (theseProbes == null) {
                break;
            }
            int count = 0;
            for (int s = 0; s < stores.length; s++) {
                double totalValue = 0;
                for (int i = 0; i < theseProbes.length; i++) {
                    // Get the values for the probes in this set
                    try {
                        totalValue += stores[s].getValueForProbe(theseProbes[i]);
                    } catch (SeqMonkException e) {
                    }
                }
                totalValue /= theseProbes.length;
                // Now we have the value we need to know if it passes the test
                if (upperLimit != null)
                    if (totalValue > upperLimit.doubleValue())
                        continue;
                if (lowerLimit != null)
                    if (totalValue < lowerLimit.doubleValue())
                        continue;
                // This one passes, we can add it to the count
                ++count;
            }
            // probe to the probe set.
            switch(limitType) {
                case EXACTLY:
                    if (count == storesLimit)
                        for (int i = 0; i < theseProbes.length; i++) {
                            if (passedProbes.contains(theseProbes[i]))
                                continue;
                            newList.addProbe(theseProbes[i], null);
                            passedProbes.add(theseProbes[i]);
                        }
                    break;
                case AT_LEAST:
                    if (count >= storesLimit)
                        for (int i = 0; i < theseProbes.length; i++) {
                            if (passedProbes.contains(theseProbes[i]))
                                continue;
                            newList.addProbe(theseProbes[i], null);
                            passedProbes.add(theseProbes[i]);
                        }
                    break;
                case NO_MORE_THAN:
                    if (count <= storesLimit)
                        for (int i = 0; i < theseProbes.length; i++) {
                            if (passedProbes.contains(theseProbes[i]))
                                continue;
                            newList.addProbe(theseProbes[i], null);
                            passedProbes.add(theseProbes[i]);
                        }
                    break;
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProbeWindowGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeWindowGenerator) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) ConsecutiveProbeGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ConsecutiveProbeGenerator) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) ProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeGroupGenerator) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) HashSet(java.util.HashSet)

Example 72 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class BinomialFilterForRev method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    boolean aboveOnly = false;
    boolean belowOnly = false;
    if (options.directionBox.getSelectedItem().equals("Above"))
        aboveOnly = true;
    else if (options.directionBox.getSelectedItem().equals("Below"))
        belowOnly = true;
    if (options.stringencyField.getText().length() == 0) {
        stringency = 0.05;
    } else {
        stringency = Double.parseDouble(options.stringencyField.getText());
    }
    if (options.minObservationsField.getText().length() == 0) {
        minObservations = 10;
    } else {
        minObservations = Integer.parseInt(options.minObservationsField.getText());
    }
    if (options.minDifferenceField.getText().length() == 0) {
        minPercentShift = 10;
    } else {
        minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
    }
    useCurrentQuant = options.useCurrentQuantBox.isSelected();
    applyMultipleTestingCorrection = options.multiTestBox.isSelected();
    ProbeList newList;
    if (applyMultipleTestingCorrection) {
        newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
    } else {
        newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
    }
    Probe[] probes = startingList.getAllProbes();
    // We need to create a set of mean end methylation values for all starting values
    // We found to the nearest percent so we'll end up with a set of 101 values (0-100)
    // which are the expected end points
    double[] expectedEnds = calculateEnds(probes);
    // They cancelled whilst calculating.
    if (expectedEnds == null)
        return;
    for (int i = 0; i < expectedEnds.length; i++) {
        System.err.println("" + i + "\t" + expectedEnds[i]);
    }
    // This is where we'll store any hits
    Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
    BinomialTest bt = new BinomialTest();
    AlternativeHypothesis hypothesis = AlternativeHypothesis.TWO_SIDED;
    if (aboveOnly)
        hypothesis = AlternativeHypothesis.GREATER_THAN;
    if (belowOnly)
        hypothesis = AlternativeHypothesis.LESS_THAN;
    for (int p = 0; p < probes.length; p++) {
        if (p % 100 == 0) {
            progressUpdated("Processed " + p + " probes", p, probes.length);
        }
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        long[] reads = fromStore.getReadsForProbe(probes[p]);
        int forCount = 0;
        int revCount = 0;
        for (int r = 0; r < reads.length; r++) {
            if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                ++forCount;
            } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
                ++revCount;
            }
        }
        if (forCount + revCount < minObservations)
            continue;
        int fromPercent = Math.round((forCount * 100f) / (forCount + revCount));
        // We need to calculate the confidence range for the from reads and work
        // out the most pessimistic value we could take as a starting value
        WilsonScoreInterval wi = new WilsonScoreInterval();
        ConfidenceInterval ci = wi.createInterval(forCount + revCount, forCount, 1 - stringency);
        // System.err.println("From percent="+fromPercent+" meth="+forCount+" unmeth="+revCount+" sig="+stringency+" ci="+ci.getLowerBound()*100+" - "+ci.getUpperBound()*100);
        reads = toStore.getReadsForProbe(probes[p]);
        forCount = 0;
        revCount = 0;
        for (int r = 0; r < reads.length; r++) {
            if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                ++forCount;
            } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
                ++revCount;
            }
        }
        if (forCount + revCount < minObservations)
            continue;
        float toPercent = (forCount * 100f) / (forCount + revCount);
        // System.err.println("Observed toPercent is "+toPercent+ "from meth="+forCount+" unmeth="+revCount+" and true predicted is "+expectedEnds[Math.round(toPercent)]);
        // Find the most pessimistic fromPercent such that the expected toPercent is as close
        // to the observed value based on the confidence interval we calculated before.
        double worseCaseExpectedPercent = 0;
        double smallestTheoreticalToActualDiff = 100;
        // Just taking the abs diff can still leave us with a closest value which is still
        // quite far from where we are.  We therefore also check if our confidence interval
        // gives us a potential value range which spans the actual value, and if it does we
        // fail it without even running the test.
        boolean seenLower = false;
        boolean seenHigher = false;
        for (int m = Math.max((int) Math.floor(ci.getLowerBound() * 100), 0); m <= Math.min((int) Math.ceil(ci.getUpperBound() * 100), 100); m++) {
            double expectedPercent = expectedEnds[m];
            double diff = expectedPercent - toPercent;
            if (diff <= 0)
                seenLower = true;
            if (diff >= 0)
                seenHigher = true;
            if (Math.abs(diff) < smallestTheoreticalToActualDiff) {
                worseCaseExpectedPercent = expectedPercent;
                smallestTheoreticalToActualDiff = Math.abs(diff);
            }
        }
        // Sanity check
        if (smallestTheoreticalToActualDiff > Math.abs((toPercent - expectedEnds[Math.round(fromPercent)]))) {
            throw new IllegalStateException("Can't have a worst case which is better than the actual");
        }
        if (Math.abs(toPercent - worseCaseExpectedPercent) < minPercentShift)
            continue;
        // If they want to use the current quantitation as well then we can do that calculation now.
        if (useCurrentQuant) {
            try {
                if (Math.abs(toStore.getValueForProbe(probes[p]) - expectedEnds[Math.round(fromStore.getValueForProbe(probes[p]))]) < minPercentShift)
                    continue;
            } catch (SeqMonkException sme) {
                throw new IllegalStateException(sme);
            }
        }
        // Check the directionality
        if (aboveOnly && worseCaseExpectedPercent - toPercent > 0)
            continue;
        if (belowOnly && worseCaseExpectedPercent - toPercent < 0)
            continue;
        // Now perform the Binomial test.
        double pValue = bt.binomialTest(forCount + revCount, forCount, worseCaseExpectedPercent / 100d, hypothesis);
        // Our confidence range spanned the actual value we had so we can't be significant
        if (seenLower && seenHigher)
            pValue = 0.5;
        // System.err.println("P value is "+pValue);
        // Store this as a potential hit (after correcting p-values later)
        hits.add(new ProbeTTestValue(probes[p], pValue));
    }
    // Now we can correct the p-values if we need to
    ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
    if (applyMultipleTestingCorrection) {
        // System.err.println("Correcting for "+rawHits.length+" tests");
        BenjHochFDR.calculateQValues(rawHits);
    }
    for (int h = 0; h < rawHits.length; h++) {
        if (applyMultipleTestingCorrection) {
            if (rawHits[h].q < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
            }
        } else {
            if (rawHits[h].p < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
            }
        }
    }
    filterFinished(newList);
}
Also used : BinomialTest(org.apache.commons.math3.stat.inference.BinomialTest) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) AlternativeHypothesis(org.apache.commons.math3.stat.inference.AlternativeHypothesis) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) WilsonScoreInterval(org.apache.commons.math3.stat.interval.WilsonScoreInterval) Vector(java.util.Vector) ConfidenceInterval(org.apache.commons.math3.stat.interval.ConfidenceInterval)

Example 73 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class ChiSquareFilterForRev method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    if (options.stringencyField.getText().length() == 0) {
        stringency = 0.05;
    } else {
        stringency = Double.parseDouble(options.stringencyField.getText());
    }
    if (options.minObservationsField.getText().length() == 0) {
        minObservations = 10;
    } else {
        minObservations = Integer.parseInt(options.minObservationsField.getText());
    }
    if (options.minDifferenceField.getText().length() == 0) {
        minPercentShift = 10;
    } else {
        minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
    }
    applyMultipleTestingCorrection = options.multiTestBox.isSelected();
    resample = options.resampleBox.isSelected();
    Probe[] probes = startingList.getAllProbes();
    if (resample) {
        // Do a sanity check that the current quantitation actually looks like
        // percentage values.
        progressUpdated("Checking current quantitations", 0, 1);
        for (int p = 0; p < probes.length; p++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            try {
                for (int d = 0; d < stores.length; d++) {
                    float value = stores[d].getValueForProbe(probes[p]);
                    // NaN is OK
                    if (Float.isNaN(value))
                        continue;
                    if (value < 0 || value > 100) {
                        progressExceptionReceived(new SeqMonkException("For resampling quantitations must be between 0 and 100.  Probe " + probes[p].name() + " had value " + value + " in " + stores[d].name()));
                        return;
                    }
                }
            } catch (SeqMonkException sme) {
                progressExceptionReceived(sme);
                return;
            }
        }
    }
    ProbeList newList;
    if (applyMultipleTestingCorrection) {
        newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
    } else {
        newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
    }
    int[][] forRevCounts = new int[stores.length][2];
    // This is where we'll store any hits
    Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
    PROBE: for (int p = 0; p < probes.length; p++) {
        if (p % 100 == 0) {
            progressUpdated("Processed " + p + " probes", p, probes.length);
        }
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        // For each dataset make up a list of forward and reverse probes under this probe
        for (int d = 0; d < stores.length; d++) {
            long[] reads = stores[d].getReadsForProbe(probes[p]);
            forRevCounts[d][0] = 0;
            forRevCounts[d][1] = 0;
            if (resample) {
                try {
                    float value = stores[d].getValueForProbe(probes[p]);
                    // We redistribute the total count based on the percentage value
                    // in the quantitation
                    int methCount = Math.round((reads.length * value) / 100);
                    int unmethCount = reads.length - methCount;
                    forRevCounts[d][0] = methCount;
                    forRevCounts[d][1] = unmethCount;
                } catch (SeqMonkException sme) {
                    continue PROBE;
                }
            } else {
                for (int r = 0; r < reads.length; r++) {
                    if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                        ++forRevCounts[d][0];
                    } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
                        ++forRevCounts[d][1];
                    }
                }
            }
        }
        // See if we have enough counts and difference to go on with this
        double minPercent = 0;
        double maxPercent = 0;
        for (int d = 0; d < stores.length; d++) {
            if (forRevCounts[d][0] + forRevCounts[d][1] < minObservations) {
                continue PROBE;
            }
            double percent = (((double) forRevCounts[d][0]) / (forRevCounts[d][0] + forRevCounts[d][1])) * 100;
            if (d == 0 || percent < minPercent)
                minPercent = percent;
            if (d == 0 || percent > maxPercent)
                maxPercent = percent;
        }
        if (maxPercent - minPercent < minPercentShift)
            continue PROBE;
        // Now perform the Chi-Square test.
        double pValue = ChiSquareTest.chiSquarePvalue(forRevCounts);
        // Store this as a potential hit (after correcting p-values later)
        hits.add(new ProbeTTestValue(probes[p], pValue));
    }
    // Now we can correct the p-values if we need to
    ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
    if (applyMultipleTestingCorrection) {
        BenjHochFDR.calculateQValues(rawHits);
    }
    for (int h = 0; h < rawHits.length; h++) {
        if (applyMultipleTestingCorrection) {
            if (rawHits[h].q < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
            }
        } else {
            if (rawHits[h].p < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Vector(java.util.Vector)

Example 74 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class CorrelationCluster method minRValue.

public double minRValue(Probe p, double cutoff) {
    // We test this probe against all of the probes in our
    // set and if it matches against all of them we keep it
    double minR = 1;
    try {
        for (int s = 0; s < stores.length; s++) {
            values1[s] = stores[s].getValueForProbe(p);
        }
        Enumeration<Probe> ep = probes.elements();
        while (ep.hasMoreElements()) {
            Probe p2 = ep.nextElement();
            for (int s = 0; s < stores.length; s++) {
                values2[s] = stores[s].getValueForProbe(p2);
            }
            double r = PearsonCorrelation.calculateCorrelation(values1, values2);
            if (r < minR) {
                minR = r;
            }
            // we're not keeping it anyway
            if (minR < cutoff) {
                return minR;
            }
        }
        return minR;
    } catch (SeqMonkException e) {
        return 0;
    }
}
Also used : SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 75 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class CorrelationFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
    Probe[] probes = startingList.getAllProbes();
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "R-value");
    double[] referenceProfile = null;
    try {
        referenceProfile = getReferneceProfile();
    } catch (SeqMonkException ex) {
        progressExceptionReceived(ex);
    }
    double[] currentProfile = new double[stores.length];
    for (int p = 0; p < probes.length; p++) {
        progressUpdated(p, probes.length);
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        try {
            for (int s = 0; s < stores.length; s++) {
                currentProfile[s] = stores[s].getValueForProbe(probes[p]);
            }
            float r = PearsonCorrelation.calculateCorrelation(referenceProfile, currentProfile);
            if (correlationCutoff > 0) {
                if (r >= correlationCutoff) {
                    newList.addProbe(probes[p], r);
                }
            } else {
                if (r <= correlationCutoff) {
                    newList.addProbe(probes[p], r);
                }
            }
        } catch (SeqMonkException ex) {
            continue;
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Aggregations

SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)91 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)49 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)30 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)22 Vector (java.util.Vector)21 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)20 File (java.io.File)19 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)17 BufferedReader (java.io.BufferedReader)16 FileReader (java.io.FileReader)16 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)14 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)13 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)11 InputStreamReader (java.io.InputStreamReader)11 GZIPInputStream (java.util.zip.GZIPInputStream)11 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)8 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)8 FileNotFoundException (java.io.FileNotFoundException)7 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)7