Search in sources :

Example 1 with ProbeGroupTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeGroupTTestValue in project SeqMonk by s-andrews.

the class WindowedReplicateStatsFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<ProbeGroupTTestValue> newListProbesVector = new Vector<ProbeGroupTTestValue>();
    for (int c = 0; c < chromosomes.length; c++) {
        progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
        Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
        ProbeGroupGenerator gen = null;
        if (windowType == DISTANCE_WINDOW) {
            gen = new ProbeWindowGenerator(probes, windowSize);
        } else if (windowType == CONSECUTIVE_WINDOW) {
            gen = new ConsecutiveProbeGenerator(probes, windowSize);
        } else if (windowType == FEATURE_WINDOW) {
            gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
        }
        while (true) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            Probe[] theseProbes = gen.nextSet();
            if (theseProbes == null) {
                // System.err.println("List of probes was null");
                break;
            }
            // We need at least 3 probes in a set to calculate a p-value
            if (theseProbes.length < 3) {
                // System.err.println("Only "+theseProbes.length+" probes in the set");
                continue;
            }
            double[][] values = new double[stores.length][];
            for (int j = 0; j < stores.length; j++) {
                if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
                    values[j] = new double[theseProbes.length * ((ReplicateSet) stores[j]).dataStores().length];
                } else {
                    values[j] = new double[theseProbes.length];
                }
            }
            for (int j = 0; j < stores.length; j++) {
                int index = 0;
                for (int i = 0; i < theseProbes.length; i++) {
                    try {
                        if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
                            DataStore[] localStores = ((ReplicateSet) stores[j]).dataStores();
                            for (int l = 0; l < localStores.length; l++) {
                                values[j][index] = localStores[l].getValueForProbe(theseProbes[i]);
                                index++;
                            }
                        } else {
                            values[j][index] = stores[j].getValueForProbe(theseProbes[i]);
                            index++;
                        }
                    } catch (SeqMonkException e) {
                    }
                }
                if (index != values[j].length) {
                    throw new IllegalStateException("Didn't fill all values total=" + values[j].length + " index=" + index);
                }
            }
            double pValue = 0;
            try {
                if (stores.length == 1) {
                    pValue = TTest.calculatePValue(values[0], 0);
                } else if (stores.length == 2) {
                    pValue = TTest.calculatePValue(values[0], values[1]);
                } else {
                    pValue = AnovaTest.calculatePValue(values);
                }
            } catch (SeqMonkException e) {
                throw new IllegalStateException(e);
            }
            newListProbesVector.add(new ProbeGroupTTestValue(theseProbes, pValue));
        }
    }
    ProbeGroupTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeGroupTTestValue[0]);
    // Do the multi-testing correction if necessary
    if (multiTest) {
        BenjHochFDR.calculateQValues(newListProbes);
    }
    ProbeList newList;
    // We need to handle duplicate hits internally since probe lists can't do
    // this themselves any more.
    Hashtable<Probe, Float> newListTemp = new Hashtable<Probe, Float>();
    if (multiTest) {
        newList = new ProbeList(startingList, "", "", "Q-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].q <= cutoff) {
                Probe[] passedProbes = newListProbes[i].probes;
                for (int p = 0; p < passedProbes.length; p++) {
                    if (newListTemp.containsKey(passedProbes[p])) {
                        // We always give a probe the lowest possible q-value
                        if (newListTemp.get(passedProbes[p]) <= newListProbes[i].q) {
                            continue;
                        }
                    }
                    newListTemp.put(passedProbes[p], (float) newListProbes[i].q);
                }
            }
        }
    } else {
        newList = new ProbeList(startingList, "", "", "P-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].p <= cutoff) {
                Probe[] passedProbes = newListProbes[i].probes;
                for (int p = 0; p < passedProbes.length; p++) {
                    if (newListTemp.containsKey(passedProbes[p])) {
                        // We always give a probe the lowest possible p-value
                        if (newListTemp.get(passedProbes[p]) <= newListProbes[i].p) {
                            continue;
                        }
                    }
                    newListTemp.put(passedProbes[p], (float) newListProbes[i].p);
                }
            }
        }
    }
    // Add the cached hits to the new list
    Enumeration<Probe> en = newListTemp.keys();
    while (en.hasMoreElements()) {
        Probe p = en.nextElement();
        newList.addProbe(p, newListTemp.get(p));
    }
    filterFinished(newList);
}
Also used : ReplicateSet(uk.ac.babraham.SeqMonk.DataTypes.ReplicateSet) ProbeWindowGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeWindowGenerator) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) ProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeGroupGenerator) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Hashtable(java.util.Hashtable) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ProbeGroupTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeGroupTTestValue) ConsecutiveProbeGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ConsecutiveProbeGenerator)

Aggregations

Hashtable (java.util.Hashtable)1 Vector (java.util.Vector)1 ProbeGroupTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeGroupTTestValue)1 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)1 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)1 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)1 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)1 ReplicateSet (uk.ac.babraham.SeqMonk.DataTypes.ReplicateSet)1 ConsecutiveProbeGenerator (uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ConsecutiveProbeGenerator)1 FeatureProbeGroupGenerator (uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator)1 ProbeGroupGenerator (uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeGroupGenerator)1 ProbeWindowGenerator (uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeWindowGenerator)1 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)1