Search in sources :

Example 51 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class ProbeNameFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    queries = optionsPanel.queriesArea.getText().split("\n");
    stripSuffixes = optionsPanel.stripSuffixesBox.isSelected();
    stripTranscript = optionsPanel.stripTranscriptBox.isSelected();
    caseInsensitive = optionsPanel.caseInsensitiveBox.isSelected();
    ProbeList passedProbes = new ProbeList(startingList, "", "", startingList.getValueName());
    // We start by building a list of query strings we're going to
    // check against.
    HashSet<String> queryStrings = new HashSet<String>();
    for (int q = 0; q < queries.length; q++) {
        String query = queries[q].trim();
        if (caseInsensitive) {
            query = query.toLowerCase();
        }
        if (stripSuffixes) {
            query = query.replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
        }
        if (stripTranscript) {
            query = query.replaceAll("-\\d\\d\\d$", "");
        }
        // System.err.println("Adding query term "+query);
        queryStrings.add(query);
    }
    Probe[] probes = startingList.getAllProbes();
    // We can now step through the probes looking for a match to the stored feature names
    for (int p = 0; p < probes.length; p++) {
        if (p % 100 == 0) {
            progressUpdated("Filtering probes", p, probes.length);
        }
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        String name = probes[p].name();
        if (caseInsensitive) {
            name = name.toLowerCase();
        }
        if (stripSuffixes) {
            name = name.replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
        }
        if (stripTranscript) {
            name = name.replaceAll("-\\d\\d\\d$", "");
        }
        if (queryStrings.contains(name)) {
            passedProbes.addProbe(probes[p], startingList.getValueForProbe(probes[p]));
        } else {
        // System.err.println("No match for "+name);
        }
    }
    filterFinished(passedProbes);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) HashSet(java.util.HashSet)

Example 52 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class ProportionOfLibraryStatisticsFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    fromStores = optionsPanel.fromStores();
    toStores = optionsPanel.toStores();
    // System.err.println("Found "+fromStores.length+" from stores and "+toStores.length+" to stores");
    applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
    testForIncreasesOnly = optionsPanel.increasesOnlyBox.isSelected();
    Probe[] probes = startingList.getAllProbes();
    // We'll pull the number of probes to sample from the preferences if they've changed it
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
    // We'll build up a set of p-values as we go along
    float[] lowestPValues = new float[probes.length];
    for (int p = 0; p < lowestPValues.length; p++) {
        lowestPValues[p] = 1;
    }
    // Put something in the progress whilst we're ordering the probe values to make
    // the comparison.
    progressUpdated("Generating background model", 0, 1);
    try {
        for (int f = 0; f < fromStores.length; f++) {
            for (int t = 0; t < toStores.length; t++) {
                progressUpdated("Comparing " + fromStores[f] + " to " + toStores[t], 0, 1);
                // We need to work out the total counts in the probes we're using
                int fromTotalCount = 0;
                for (int p = 0; p < probes.length; p++) {
                    fromTotalCount += (int) fromStores[f].getValueForProbe(probes[p]);
                    if (cancel) {
                        cancel = false;
                        progressCancelled();
                        return;
                    }
                }
                int toTotalCount = 0;
                for (int p = 0; p < probes.length; p++) {
                    toTotalCount += (int) toStores[t].getValueForProbe(probes[p]);
                    if (cancel) {
                        cancel = false;
                        progressCancelled();
                        return;
                    }
                }
                for (int p = 0; p < probes.length; p++) {
                    if (cancel) {
                        cancel = false;
                        progressCancelled();
                        return;
                    }
                    int n11 = (int) fromStores[f].getValueForProbe(probes[p]);
                    int n12 = fromTotalCount - n11;
                    int n21 = (int) toStores[f].getValueForProbe(probes[p]);
                    int n22 = toTotalCount - n21;
                    double[] pValues = FishersExactTest.fishersExactTest(n11, n12, n21, n22);
                    // The values in the array are 0=2-sided p-value, 1=left-sided p-value, 2=right-sided p-value
                    if (testForIncreasesOnly) {
                        if (pValues[1] < lowestPValues[p])
                            lowestPValues[p] = (float) pValues[1];
                    } else {
                        if (pValues[0] < lowestPValues[p])
                            lowestPValues[p] = (float) pValues[0];
                    }
                }
            }
        }
    } catch (SeqMonkException sme) {
        progressExceptionReceived(sme);
    }
    if (applyMultipleTestingCorrection) {
        ProbeTTestValue[] statsValues = new ProbeTTestValue[probes.length];
        for (int i = 0; i < probes.length; i++) {
            statsValues[i] = new ProbeTTestValue(probes[i], lowestPValues[i]);
        }
        BenjHochFDR.calculateQValues(statsValues);
        for (int i = 0; i < statsValues.length; i++) {
            if (statsValues[i].q < pValueLimit) {
                newList.addProbe(statsValues[i].probe, (float) statsValues[i].q);
            }
        }
    } else {
        for (int i = 0; i < lowestPValues.length; i++) {
            if (lowestPValues[i] < pValueLimit) {
                newList.addProbe(probes[i], lowestPValues[i]);
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 53 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class ReplicateSetStatsFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    // Make up the list of DataStores in each replicate set
    DataStore[][] stores = new DataStore[replicateSets.length][];
    for (int i = 0; i < replicateSets.length; i++) {
        stores[i] = replicateSets[i].dataStores();
    }
    Vector<ProbeTTestValue> newListProbesVector = new Vector<ProbeTTestValue>();
    for (int c = 0; c < chromosomes.length; c++) {
        progressUpdated("Processing probes on Chr" + chromosomes[c].name(), c, chromosomes.length);
        Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
        for (int p = 0; p < probes.length; p++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            double[][] values = new double[replicateSets.length][];
            for (int i = 0; i < replicateSets.length; i++) {
                values[i] = new double[stores[i].length];
                for (int j = 0; j < stores[i].length; j++) {
                    try {
                        values[i][j] = stores[i][j].getValueForProbe(probes[p]);
                    } catch (SeqMonkException e) {
                    }
                }
            }
            double pValue = 0;
            try {
                if (replicateSets.length == 1) {
                    pValue = TTest.calculatePValue(values[0], 0);
                } else if (replicateSets.length == 2) {
                    pValue = TTest.calculatePValue(values[0], values[1]);
                } else {
                    pValue = AnovaTest.calculatePValue(values);
                }
            } catch (SeqMonkException e) {
                throw new IllegalStateException(e);
            }
            newListProbesVector.add(new ProbeTTestValue(probes[p], pValue));
        }
    }
    ProbeTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeTTestValue[0]);
    // Do the multi-testing correction if necessary
    if (multiTest) {
        BenjHochFDR.calculateQValues(newListProbes);
    }
    ProbeList newList;
    if (multiTest) {
        newList = new ProbeList(startingList, "", "", "Q-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].q <= cutoff) {
                newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].q));
            }
        }
    } else {
        newList = new ProbeList(startingList, "", "", "P-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].p <= cutoff) {
                newList.addProbe(newListProbes[i].probe, new Float(newListProbes[i].p));
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 54 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class ChiSquareFilterFrontBack method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    if (options.stringencyField.getText().length() == 0) {
        stringency = 0.05;
    } else {
        stringency = Double.parseDouble(options.stringencyField.getText());
    }
    if (options.minObservationsField.getText().length() == 0) {
        minObservations = 10;
    } else {
        minObservations = Integer.parseInt(options.minObservationsField.getText());
    }
    if (options.minDifferenceField.getText().length() == 0) {
        minPercentShift = 10;
    } else {
        minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
    }
    QuantitationStrandType readFilter = (QuantitationStrandType) options.strandLimitBox.getSelectedItem();
    applyMultipleTestingCorrection = options.multiTestBox.isSelected();
    ProbeList newList;
    if (applyMultipleTestingCorrection) {
        newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
    } else {
        newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
    }
    Probe[] probes = startingList.getAllProbes();
    int[][] frontBackCounts = new int[stores.length][2];
    // This is where we'll store any hits
    Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
    PROBE: for (int p = 0; p < probes.length; p++) {
        if (p % 100 == 0) {
            progressUpdated("Processed " + p + " probes", p, probes.length);
        }
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        Probe frontProbe;
        Probe backProbe;
        if (probes[p].strand() == Location.REVERSE) {
            backProbe = new Probe(probes[p].chromosome(), probes[p].start(), probes[p].middle(), probes[p].strand());
            frontProbe = new Probe(probes[p].chromosome(), probes[p].middle(), probes[p].end(), probes[p].strand());
        } else {
            frontProbe = new Probe(probes[p].chromosome(), probes[p].start(), probes[p].middle(), probes[p].strand());
            backProbe = new Probe(probes[p].chromosome(), probes[p].middle(), probes[p].end(), probes[p].strand());
        }
        // For each dataset make up a list of forward and reverse probes under this probe
        for (int d = 0; d < stores.length; d++) {
            long[] frontReads = stores[d].getReadsForProbe(frontProbe);
            long[] backReads = stores[d].getReadsForProbe(backProbe);
            frontBackCounts[d][0] = 0;
            frontBackCounts[d][1] = 0;
            for (int r = 0; r < frontReads.length; r++) {
                if (readFilter.useRead(frontProbe, frontReads[r]))
                    ++frontBackCounts[d][0];
            }
            for (int r = 0; r < backReads.length; r++) {
                if (readFilter.useRead(backProbe, backReads[r]))
                    ++frontBackCounts[d][1];
            }
        // System.err.println("Datset = "+stores[d].name()+" Front counts="+frontBackCounts[d][0]+" Back counts="+frontBackCounts[d][1]);
        }
        // See if we have enough counts and difference to go on with this
        double minPercent = 0;
        double maxPercent = 0;
        for (int d = 0; d < stores.length; d++) {
            if (frontBackCounts[d][0] < minObservations) {
                // System.err.println("Not enough counts to test");
                continue PROBE;
            }
            double percent = (((double) frontBackCounts[d][0]) / (frontBackCounts[d][0] + frontBackCounts[d][1])) * 100;
            if (d == 0 || percent < minPercent)
                minPercent = percent;
            if (d == 0 || percent > maxPercent)
                maxPercent = percent;
        }
        if (maxPercent - minPercent < minPercentShift) {
            // System.err.println("Not enough difference to test");
            continue PROBE;
        }
        // Now perform the Chi-Square test.
        double pValue = ChiSquareTest.chiSquarePvalue(frontBackCounts);
        // System.err.println("Raw p-value="+pValue);
        // Store this as a potential hit (after correcting p-values later)
        hits.add(new ProbeTTestValue(probes[p], pValue));
    }
    // Now we can correct the p-values if we need to
    ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
    if (applyMultipleTestingCorrection) {
        BenjHochFDR.calculateQValues(rawHits);
    }
    for (int h = 0; h < rawHits.length; h++) {
        if (applyMultipleTestingCorrection) {
            if (rawHits[h].q < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
            }
        } else {
            if (rawHits[h].p < stringency) {
                newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
            }
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Vector(java.util.Vector)

Example 55 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class HeatmapMatrix method createProbeListsFromClusters.

public ProbeList createProbeListsFromClusters(int minClusterSize, int startIndex, int endIndex) {
    if (cluster == null)
        return null;
    ClusterPair[] connectedClusters = cluster.getConnectedClusters(currentClusterRValue);
    ProbeList commonList = findCommonProbeListParent();
    ProbeList allClusterList = new ProbeList(commonList, "HiC Clusters", "HiC Clusters with R > " + commonList, null);
    HashSet<Probe> allClusterProbes = new HashSet<Probe>();
    // Now we need to work our way through the connected clusters
    // to make the appropriate sub-lists
    // Make up the same initial list of probes as before
    Vector<Probe> originallyOrderedProbes = new Vector<Probe>();
    for (int l = 0; l < probeLists.length; l++) {
        Probe[] theseProbes = probeLists[l].getAllProbes();
        for (int p = 0; p < theseProbes.length; p++) {
            originallyOrderedProbes.add(theseProbes[p]);
        }
    }
    int currentPosition = 0;
    for (int subListIndex = 0; subListIndex < connectedClusters.length; subListIndex++) {
        Integer[] indices = connectedClusters[subListIndex].getAllIndices();
        currentPosition += indices.length;
        if (currentPosition - indices.length < startIndex)
            continue;
        if (currentPosition > endIndex)
            break;
        // We may get rid of the list later if there are duplicates in it.
        if (indices.length < minClusterSize)
            continue;
        Probe[] theseProbes = new Probe[indices.length];
        for (int i = 0; i < theseProbes.length; i++) {
            theseProbes[i] = originallyOrderedProbes.elementAt(indices[i]);
        }
        Arrays.sort(theseProbes);
        // Now find the non-redundant count
        int nonRedCount = 1;
        for (int i = 1; i < theseProbes.length; i++) {
            if (theseProbes[i] != theseProbes[i - 1]) {
                nonRedCount++;
            }
        }
        // There aren't enough different probes to keep this set.
        if (nonRedCount < minClusterSize)
            continue;
        ProbeList thisList = new ProbeList(allClusterList, "Cluster " + (subListIndex + 1), "HiC cluster list number " + (subListIndex + 1), "R-value");
        float rValue = connectedClusters[subListIndex].rValue();
        thisList.addProbe(theseProbes[0], rValue);
        if (!allClusterProbes.contains(theseProbes[0])) {
            allClusterList.addProbe(theseProbes[0], null);
            allClusterProbes.add(theseProbes[0]);
        }
        for (int i = 1; i < theseProbes.length; i++) {
            if (theseProbes[i] == theseProbes[i - 1])
                continue;
            thisList.addProbe(theseProbes[i], rValue);
            if (allClusterProbes.contains(theseProbes[i])) {
                continue;
            }
            allClusterList.addProbe(theseProbes[i], null);
            allClusterProbes.add(theseProbes[i]);
        }
    }
    return allClusterList;
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ClusterPair(uk.ac.babraham.SeqMonk.DataTypes.Cluster.ClusterPair) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Vector(java.util.Vector) HashSet(java.util.HashSet)

Aggregations

Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)125 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)54 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)52 Vector (java.util.Vector)48 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)47 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)26 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)21 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)20 HashSet (java.util.HashSet)9 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)9 File (java.io.File)8 PrintWriter (java.io.PrintWriter)8 ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)7 BufferedReader (java.io.BufferedReader)6 FileReader (java.io.FileReader)6 Hashtable (java.util.Hashtable)6 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)6 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)6