Search in sources :

Example 1 with IndexTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue in project SeqMonk by s-andrews.

the class IntensityDifferenceFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
    Probe[] probes = startingList.getAllProbes();
    // We'll pull the number of probes to sample from the preferences if they've changed it
    Integer updatedProbesPerSet = optionsPanel.probesPerSet();
    if (updatedProbesPerSet != null)
        probesPerSet = updatedProbesPerSet;
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
    // We'll build up a set of p-values as we go along
    float[] lowestPValues = new float[probes.length];
    for (int p = 0; p < lowestPValues.length; p++) {
        lowestPValues[p] = 1;
    }
    // This is going to be the temporary array we populate with the set of
    // differences we are going to analyse.
    double[] currentDiffSet = new double[probesPerSet];
    // First work out the set of comparisons we're going to make
    Vector<SingleComparison> comparisons = new Vector<IntensityDifferenceFilter.SingleComparison>();
    for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
        for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
            if (fromStores[fromIndex] == toStores[toIndex])
                continue;
            // If we can find the fromStore in the toStores we've already done and the
            // toStore anywhere in the fromStores then we can skip this.
            boolean canSkip = false;
            for (int i = 0; i < fromIndex; i++) {
                if (fromStores[i] == toStores[toIndex]) {
                    for (int j = 0; j < toStores.length; j++) {
                        if (toStores[j] == fromStores[fromIndex]) {
                            canSkip = true;
                            break;
                        }
                    }
                    break;
                }
            }
            if (canSkip)
                continue;
            comparisons.add(new SingleComparison(fromIndex, toIndex));
        }
    }
    // Put something in the progress whilst we're ordering the probe values to make
    // the comparison.
    progressUpdated("Generating background model", 0, 1);
    for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
        int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
        int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
        // We need to generate a set of probe indices ordered by their average intensity
        Integer[] indices = new Integer[probes.length];
        for (int i = 0; i < probes.length; i++) {
            indices[i] = i;
        }
        Comparator<Integer> comp = new AverageIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
        Arrays.sort(indices, comp);
        progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
        IndexTTestValue[] currentPValues = new IndexTTestValue[indices.length];
        for (int i = 0; i < indices.length; i++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            if (i % 1000 == 0) {
                int progress = (i * 100) / indices.length;
                progress += 100 * comparisonIndex;
                progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
            }
            // We need to make up the set of differences to represent this probe
            int startingIndex = i - (probesPerSet / 2);
            if (startingIndex < 0)
                startingIndex = 0;
            if (startingIndex + (probesPerSet + 1) >= probes.length)
                startingIndex = probes.length - (probesPerSet + 1);
            try {
                for (int j = startingIndex; j < startingIndex + (probesPerSet + 1); j++) {
                    if (// Don't include the point being tested in the background model
                    j == startingIndex)
                        // Don't include the point being tested in the background model
                        continue;
                    else if (j < startingIndex) {
                        currentDiffSet[j - startingIndex] = fromStores[fromIndex].getValueForProbe(probes[indices[j]]) - toStores[toIndex].getValueForProbe(probes[indices[j]]);
                    } else {
                        currentDiffSet[(j - startingIndex) - 1] = fromStores[fromIndex].getValueForProbe(probes[indices[j]]) - toStores[toIndex].getValueForProbe(probes[indices[j]]);
                    }
                }
                // Should we fix the mean at 0?
                double mean = 0;
                // double mean = SimpleStats.mean(currentDiffSet);
                double stdev = SimpleStats.stdev(currentDiffSet, mean);
                if (stdev == 0) {
                    currentPValues[indices[i]] = new IndexTTestValue(indices[i], 1);
                    continue;
                }
                // Get the difference for this point
                double diff = fromStores[fromIndex].getValueForProbe(probes[indices[i]]) - toStores[toIndex].getValueForProbe(probes[indices[i]]);
                NormalDistribution nd = new NormalDistribution(mean, stdev);
                double significance;
                if (diff < mean) {
                    significance = nd.cumulativeProbability(diff);
                } else {
                    significance = 1 - nd.cumulativeProbability(diff);
                }
                currentPValues[indices[i]] = new IndexTTestValue(indices[i], significance);
            } catch (SeqMonkException sme) {
                progressExceptionReceived(sme);
                return;
            }
        }
        // We now need to correct the set of pValues
        if (applyMultipleTestingCorrection) {
            BenjHochFDR.calculateQValues(currentPValues);
        }
        // the combined set
        if (applyMultipleTestingCorrection) {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
                }
            }
        } else {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
                }
            }
        }
    }
    // pass the filter.
    for (int i = 0; i < lowestPValues.length; i++) {
        if (lowestPValues[i] < pValueLimit) {
            newList.addProbe(probes[i], lowestPValues[i]);
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) IndexTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue)

Example 2 with IndexTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue in project SeqMonk by s-andrews.

the class VarianceIntensityDifferenceFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
    int varianceType = optionsPanel.getVarianceMeasure();
    Probe[] probes = startingList.getAllProbes();
    // We'll pull the number of probes to sample from the preferences if they've changed it
    Integer updatedProbesPerSet = optionsPanel.probesPerSet();
    if (updatedProbesPerSet != null)
        probesPerSet = updatedProbesPerSet;
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
    // We'll build up a set of p-values as we go along
    float[] lowestPValues = new float[probes.length];
    for (int p = 0; p < lowestPValues.length; p++) {
        lowestPValues[p] = 1;
    }
    // Put something in the progress whilst we're ordering the probe values to make
    // the comparison.
    progressUpdated("Generating background model", 0, 1);
    for (int r = 0; r < repSetsToUse.length; r++) {
        SmoothedVarianceDataset var = new SmoothedVarianceDataset(repSetsToUse[r], probes, varianceType, probesPerSet);
        progressUpdated("Processing " + repSetsToUse[r].name(), r, repSetsToUse.length);
        IndexTTestValue[] currentPValues = new IndexTTestValue[probes.length];
        for (int p = 0; p < probes.length; p++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            if (p % 1000 == 0) {
                int progress = (p * 100) / probes.length;
                progress += 100 * r;
                progressUpdated("Made " + r + " out of " + repSetsToUse.length + " comparisons", progress, repSetsToUse.length * 100);
            }
            currentPValues[p] = new IndexTTestValue(p, var.getIntenstiyPValueForIndex(p, probesPerSet));
        }
        // We now need to correct the set of pValues
        if (applyMultipleTestingCorrection) {
            BenjHochFDR.calculateQValues(currentPValues);
        }
        // the combined set
        for (int i = 0; i < currentPValues.length; i++) {
            if (!optionsPanel.wantHighVariation()) {
                if (var.getDifferenceForIndex(currentPValues[i].index) > 0)
                    continue;
            }
            if (!optionsPanel.wantLowVariation()) {
                if (var.getDifferenceForIndex(currentPValues[i].index) < 0)
                    continue;
            }
            if (applyMultipleTestingCorrection) {
                if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
                }
            } else {
                if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
                }
            }
        }
    }
    // pass the filter.
    for (int i = 0; i < lowestPValues.length; i++) {
        if (lowestPValues[i] < pValueLimit) {
            newList.addProbe(probes[i], lowestPValues[i]);
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SmoothedVarianceDataset(uk.ac.babraham.SeqMonk.Analysis.Statistics.SmoothedVarianceDataset) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) IndexTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue)

Example 3 with IndexTTestValue

use of uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue in project SeqMonk by s-andrews.

the class IntensityReplicateFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
    Probe[] probes = startingList.getAllProbes();
    // We need to work out how many probes are going to be put into
    // each sub-distribution we calculate.  The rule is going to be that
    // we use 1% of the total, or 100 probes whichever is the higher
    probesPerSet = probes.length / 100;
    if (probesPerSet < 100)
        probesPerSet = 100;
    if (probesPerSet > probes.length)
        probesPerSet = probes.length;
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
    // We'll build up a set of p-values as we go along
    float[] lowestPValues = new float[probes.length];
    for (int p = 0; p < lowestPValues.length; p++) {
        lowestPValues[p] = 1;
    }
    // These arrays are going to hold the real SDs for the replicate
    // sets we're going to analyse.
    double[] realSDFrom = new double[probes.length];
    double[] realSDTo = new double[probes.length];
    // These arrays are going to hold the averaged SDs for the replicate
    // sets we're going to analyse.
    double[] averageSDFrom = new double[probes.length];
    double[] averageSDTo = new double[probes.length];
    // This is going to be the temporary array we populate with the set of
    // differences we are going to analyse.
    double[] localSDset = new double[probesPerSet];
    // First work out the set of comparisons we're going to make
    Vector<SingleComparison> comparisons = new Vector<IntensityReplicateFilter.SingleComparison>();
    for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
        for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
            if (fromStores[fromIndex] == toStores[toIndex])
                continue;
            // If we can find the fromStore in the toStores we've already done and the
            // toStore anywhere in the fromStores then we can skip this.
            boolean canSkip = false;
            for (int i = 0; i < fromIndex; i++) {
                if (fromStores[i] == toStores[toIndex]) {
                    for (int j = 0; j < toStores.length; j++) {
                        if (toStores[j] == fromStores[fromIndex]) {
                            canSkip = true;
                            break;
                        }
                    }
                    break;
                }
            }
            if (canSkip)
                continue;
            comparisons.add(new SingleComparison(fromIndex, toIndex));
        }
    }
    for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
        int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
        int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
        // We need to generate a set of probe indices ordered by their average intensity
        Integer[] fromIndices = new Integer[probes.length];
        Integer[] toIndices = new Integer[probes.length];
        for (int i = 0; i < probes.length; i++) {
            fromIndices[i] = i;
            toIndices[i] = i;
        }
        // This isn't ideal.  We're sorting the probes by average intensity which puts together
        // probes which should probably have different standard deviations.  It would be better
        // to sort on the intensities in each data store separately, but we get such a problem
        // from very low values contaminating the set by giving artificially low average SDs that
        // I don't think we can get away with this.
        Comparator<Integer> fromComp = new AveragePairedIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
        Comparator<Integer> toComp = new AveragePairedIntensityComparator(toStores[toIndex], fromStores[fromIndex], probes);
        // Comparator<Integer> fromComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
        // Comparator<Integer> toComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
        Arrays.sort(fromIndices, fromComp);
        Arrays.sort(toIndices, toComp);
        // We also need to get the raw SDs for the two replicate sets
        double[] fromValues = new double[fromStores[fromIndex].dataStores().length];
        double[] toValues = new double[toStores[toIndex].dataStores().length];
        try {
            for (int p = 0; p < probes.length; p++) {
                for (int f = 0; f < fromValues.length; f++) {
                    fromValues[f] = fromStores[fromIndex].dataStores()[f].getValueForProbe(probes[p]);
                }
                for (int t = 0; t < toValues.length; t++) {
                    toValues[t] = toStores[toIndex].dataStores()[t].getValueForProbe(probes[p]);
                }
                realSDFrom[p] = SimpleStats.stdev(fromValues);
                realSDTo[p] = SimpleStats.stdev(toValues);
            }
        } catch (SeqMonkException sme) {
            progressExceptionReceived(sme);
        }
        progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
        IndexTTestValue[] currentPValues = new IndexTTestValue[probes.length];
        for (int i = 0; i < probes.length; i++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            if (i % 1000 == 0) {
                int progress = (i * 100) / probes.length;
                progress += 100 * comparisonIndex;
                progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
            }
            // We need to make up the set of SDs to represent this probe
            int startingIndex = i - (probesPerSet / 2);
            if (startingIndex < 0)
                startingIndex = 0;
            if (startingIndex + probesPerSet >= probes.length)
                startingIndex = probes.length - probesPerSet;
            for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
                localSDset[j - startingIndex] = realSDFrom[fromIndices[j]];
            }
            // averageSDFrom[fromIndices[i]] = SimpleStats.percentile(localSDset,90);
            averageSDFrom[fromIndices[i]] = SimpleStats.mean(localSDset);
            for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
                localSDset[j - startingIndex] = realSDTo[toIndices[j]];
            }
            // averageSDTo[toIndices[i]] = SimpleStats.percentile(localSDset,90);
            averageSDTo[toIndices[i]] = SimpleStats.mean(localSDset);
        }
        for (int p = 0; p < probes.length; p++) {
            double fromValue = 0;
            double toValue = 0;
            try {
                fromValue = fromStores[fromIndex].getValueForProbe(probes[p]);
                toValue = toStores[toIndex].getValueForProbe(probes[p]);
            } catch (SeqMonkException sme) {
            }
            double fromSD = Math.max(realSDFrom[p], averageSDFrom[p]);
            double toSD = Math.max(realSDTo[p], averageSDTo[p]);
            // double fromSD = averageSDFrom[p];
            // double toSD = averageSDTo[p];
            currentPValues[p] = new IndexTTestValue(p, TTest.calculatePValue(fromValue, toValue, fromSD, toSD, fromStores[fromIndex].dataStores().length, toStores[toIndex].dataStores().length));
        // System.err.println("P value was "+currentPValues[p].p+" from "+fromValue+" "+toValue+" "+fromSD+" "+toSD+" "+fromStores[fromIndex].dataStores().length+" "+toStores[toIndex].dataStores().length);
        }
        // We now need to correct the set of pValues
        if (applyMultipleTestingCorrection) {
            BenjHochFDR.calculateQValues(currentPValues);
        }
        // the combined set
        if (applyMultipleTestingCorrection) {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
                }
            }
        } else {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
                }
            }
        }
    }
    // pass the filter.
    for (int i = 0; i < lowestPValues.length; i++) {
        if (lowestPValues[i] < pValueLimit) {
            newList.addProbe(probes[i], lowestPValues[i]);
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) IndexTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue)

Aggregations

IndexTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue)3 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)3 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)3 Vector (java.util.Vector)2 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)2 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)1 SmoothedVarianceDataset (uk.ac.babraham.SeqMonk.Analysis.Statistics.SmoothedVarianceDataset)1