Search in sources :

Example 36 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class CodonBiasPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    double pValue = optionsPanel.pValue();
    String libraryType = optionsPanel.libraryType();
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Making probes for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
            probes.add(p);
        }
    }
    allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we can quantitate each individual feature and test for whether it is significantly
    // showing codon bias
    ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
    // data contains the data stores that this pipeline is going to use. We need to test each data store.
    for (int d = 0; d < data.length; d++) {
        significantProbes.add(new Vector<ProbeTTestValue>());
    }
    int probeCounter = 0;
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        for (int p = 0; p < features.length; p++) {
            // Get the corresponding feature and work out the mapping between genomic position and codon sub position.
            int[] mappingArray = createGenomeMappingArray(features[p]);
            DATASTORE_LOOP: for (int d = 0; d < data.length; d++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                long[] reads = data[d].getReadsForProbe(allProbes[probeCounter]);
                // TODO: make this configurable
                if (reads.length < 5) {
                    data[d].setValueForProbe(allProbes[probeCounter], Float.NaN);
                    continue DATASTORE_LOOP;
                }
                int pos1Count = 0;
                int pos2Count = 0;
                int pos3Count = 0;
                READ_LOOP: for (int r = 0; r < reads.length; r++) {
                    int genomicReadStart = SequenceRead.start(reads[r]);
                    int genomicReadEnd = SequenceRead.end(reads[r]);
                    int readStrand = SequenceRead.strand(reads[r]);
                    int relativeReadStart = -1;
                    // forward reads
                    if (readStrand == 1) {
                        if (libraryType == "Same strand specific") {
                            if (features[p].location().strand() == Location.FORWARD) {
                                // The start of the read needs to be within the feature
                                if (genomicReadStart - features[p].location().start() < 0) {
                                    continue READ_LOOP;
                                } else {
                                    // look up the read start pos in the mapping array
                                    relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
                                }
                            }
                        } else if (libraryType == "Opposing strand specific") {
                            if (features[p].location().strand() == Location.REVERSE) {
                                // The "start" of a reverse read/probe is actually the end
                                if (features[p].location().end() - genomicReadEnd < 0) {
                                    continue READ_LOOP;
                                } else {
                                    relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
                                }
                            }
                        }
                    }
                    // reverse reads
                    if (readStrand == -1) {
                        if (libraryType == "Same strand specific") {
                            if (features[p].location().strand() == Location.REVERSE) {
                                if (features[p].location().end() - genomicReadEnd < 0) {
                                    continue READ_LOOP;
                                } else {
                                    // System.out.println("features[p].location().end() is " + features[p].location().end() + ", genomicReadEnd is " + genomicReadEnd);
                                    // System.out.println("mapping array[0] is " + mappingArray[0]);
                                    relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
                                }
                            }
                        } else if (libraryType == "Opposing strand specific") {
                            if (features[p].location().strand() == Location.FORWARD) {
                                // The start of the read needs to be within the feature
                                if (genomicReadStart - features[p].location().start() < 0) {
                                    continue READ_LOOP;
                                } else {
                                    // look up the read start position in the mapping array
                                    relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
                                }
                            }
                        }
                    }
                    // find out which position the read is in
                    if (relativeReadStart == -1) {
                        continue READ_LOOP;
                    } else if (relativeReadStart % 3 == 0) {
                        pos3Count++;
                        continue READ_LOOP;
                    } else if ((relativeReadStart + 1) % 3 == 0) {
                        pos2Count++;
                        continue READ_LOOP;
                    } else if ((relativeReadStart + 2) % 3 == 0) {
                        pos1Count++;
                    }
                }
                // closing bracket for read loop
                // System.out.println("pos1Count for "+ features[p].name() + " is " + pos1Count);
                // System.out.println("pos2Count for "+ features[p].name() + " is " + pos2Count);
                // System.out.println("pos3Count for "+ features[p].name() + " is " + pos3Count);
                int interestingCodonCount = 0;
                int otherCodonCount = 0;
                if (optionsPanel.codonSubPosition() == 1) {
                    interestingCodonCount = pos1Count;
                    otherCodonCount = pos2Count + pos3Count;
                } else if (optionsPanel.codonSubPosition() == 2) {
                    interestingCodonCount = pos2Count;
                    otherCodonCount = pos1Count + pos3Count;
                } else if (optionsPanel.codonSubPosition() == 3) {
                    interestingCodonCount = pos3Count;
                    otherCodonCount = pos1Count + pos2Count;
                }
                int totalCount = interestingCodonCount + otherCodonCount;
                // BinomialDistribution bd = new BinomialDistribution(interestingCodonCount+otherCodonCount, 1/3d);
                BinomialDistribution bd = new BinomialDistribution(totalCount, 1 / 3d);
                // Since the binomial distribution gives the probability of getting a value higher than
                // this we need to subtract one so we get the probability of this or higher.
                double thisPValue = 1 - bd.cumulativeProbability(interestingCodonCount - 1);
                if (interestingCodonCount == 0)
                    thisPValue = 1;
                // We have to add all results at this stage so we don't mess up the multiple
                // testing correction later on.
                significantProbes.get(d).add(new ProbeTTestValue(allProbes[probeCounter], thisPValue));
                float percentageCount;
                if (totalCount == 0) {
                    percentageCount = 0;
                } else {
                    percentageCount = ((float) interestingCodonCount / (float) totalCount) * 100;
                }
                data[d].setValueForProbe(allProbes[probeCounter], percentageCount);
            // System.out.println("totalCount = " + totalCount);
            // System.out.println("interestingCodonCount " + interestingCodonCount);
            // System.out.println("pValue = " + thisPValue);
            // System.out.println("percentageCount = " + percentageCount);
            // System.out.println("");
            }
            probeCounter++;
        }
    }
    // filtering those which pass our p-value cutoff
    for (int d = 0; d < data.length; d++) {
        ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
        BenjHochFDR.calculateQValues(ttestResults);
        ProbeList newList = new ProbeList(collection().probeSet(), "Codon bias < " + pValue + " in " + data[d].name(), "Probes showing significant codon bias for position " + optionsPanel.codonSubPosition() + " with a cutoff of " + pValue, "FDR");
        for (int i = 0; i < ttestResults.length; i++) {
            if (ttestResults[i].q < pValue) {
                newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Codon bias pipeline using codon position " + optionsPanel.codonSubPosition() + " for " + optionsPanel.libraryType() + " library.");
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ArrayList(java.util.ArrayList) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution) Vector(java.util.Vector)

Example 37 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class TranscriptionTerminationPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    int minCount = optionsPanel.minCount();
    int measurementLength = optionsPanel.measurementLength();
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Creating Probes" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = getValidFeatures(chrs[c], measurementLength);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            if (features[f].location().strand() == Location.REVERSE) {
                Probe p = new Probe(chrs[c], features[f].location().start() - measurementLength, features[f].location().start() + measurementLength, features[f].location().strand(), features[f].name());
                probes.add(p);
            } else {
                Probe p = new Probe(chrs[c], features[f].location().end() - measurementLength, features[f].location().end() + measurementLength, features[f].location().strand(), features[f].name());
                probes.add(p);
            }
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features " + measurementLength + "bp around the end of " + optionsPanel.getSelectedFeatureType(), allProbes));
    int probeIndex = 0;
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Feature[] features = getValidFeatures(chrs[c], measurementLength);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            for (int d = 0; d < data.length; d++) {
                if (allProbes[probeIndex].strand() == Location.REVERSE) {
                    Probe downstreamProbe = new Probe(chrs[c], features[f].location().start() - measurementLength, features[f].location().start(), features[f].location().strand(), features[f].name());
                    Probe upstreamProbe = new Probe(chrs[c], features[f].location().start(), features[f].location().start() + measurementLength, features[f].location().strand(), features[f].name());
                    long[] upstreamReads = data[d].getReadsForProbe(upstreamProbe);
                    long[] downstreamReads = data[d].getReadsForProbe(downstreamProbe);
                    int upstreamCount = 0;
                    for (int i = 0; i < upstreamReads.length; i++) {
                        if (readFilter.useRead(allProbes[probeIndex], upstreamReads[i]))
                            ++upstreamCount;
                    }
                    int downstreamCount = 0;
                    for (int i = 0; i < downstreamReads.length; i++) {
                        if (readFilter.useRead(allProbes[probeIndex], downstreamReads[i]))
                            ++downstreamCount;
                    }
                    float percentage = ((upstreamCount - downstreamCount) / (float) upstreamCount) * 100f;
                    if (upstreamCount >= minCount) {
                        data[d].setValueForProbe(allProbes[probeIndex], percentage);
                    } else {
                        data[d].setValueForProbe(allProbes[probeIndex], Float.NaN);
                    }
                } else {
                    Probe upstreamProbe = new Probe(chrs[c], features[f].location().end() - measurementLength, features[f].location().end(), features[f].location().strand(), features[f].name());
                    Probe downstreamProbe = new Probe(chrs[c], features[f].location().end(), features[f].location().end() + measurementLength, features[f].location().strand(), features[f].name());
                    long[] upstreamReads = data[d].getReadsForProbe(upstreamProbe);
                    long[] downstreamReads = data[d].getReadsForProbe(downstreamProbe);
                    int upstreamCount = 0;
                    for (int i = 0; i < upstreamReads.length; i++) {
                        if (readFilter.useRead(allProbes[probeIndex], upstreamReads[i]))
                            ++upstreamCount;
                    }
                    int downstreamCount = 0;
                    for (int i = 0; i < downstreamReads.length; i++) {
                        if (readFilter.useRead(allProbes[probeIndex], downstreamReads[i]))
                            ++downstreamCount;
                    }
                    float percentage = ((upstreamCount - downstreamCount) / (float) upstreamCount) * 100f;
                    if (upstreamCount >= minCount) {
                        data[d].setValueForProbe(allProbes[probeIndex], percentage);
                    } else {
                        data[d].setValueForProbe(allProbes[probeIndex], Float.NaN);
                    }
                }
            }
            ++probeIndex;
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Transcription termination pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    quantitationDescription.append(". Measurement length was ");
    quantitationDescription.append(optionsPanel.measurementLength());
    quantitationDescription.append(". Min count was ");
    quantitationDescription.append(optionsPanel.minCount());
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector)

Example 38 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class BoxWhiskerFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
    Hashtable<Probe, Integer> hitCounts = new Hashtable<Probe, Integer>();
    for (int s = 0; s < stores.length; s++) {
        progressUpdated("Processing " + stores[s].name(), s, stores.length);
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        BoxWhisker bw;
        try {
            bw = new BoxWhisker(stores[s], startingList, stringency);
        } catch (SeqMonkException e) {
            System.err.println("Ignoring unquantitated dataset");
            e.printStackTrace();
            continue;
        }
        if (useUpper) {
            Probe[] p = bw.upperProbeOutliers();
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            for (int i = 0; i < p.length; i++) {
                if (hitCounts.containsKey(p[i])) {
                    hitCounts.put(p[i], hitCounts.get(p[i]).intValue() + 1);
                } else {
                    hitCounts.put(p[i], 1);
                }
            }
        }
        if (useLower) {
            Probe[] p = bw.lowerProbeOutliers();
            for (int i = 0; i < p.length; i++) {
                if (cancel) {
                    cancel = false;
                    progressCancelled();
                    return;
                }
                if (hitCounts.containsKey(p[i])) {
                    hitCounts.put(p[i], hitCounts.get(p[i]).intValue() + 1);
                } else {
                    hitCounts.put(p[i], 1);
                }
            }
        }
    }
    // Now we can go through the probes which hit and see if
    // we had enough hits to put them into our final list.
    Enumeration<Probe> candidates = hitCounts.keys();
    while (candidates.hasMoreElements()) {
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        Probe candidate = candidates.nextElement();
        int count = hitCounts.get(candidate).intValue();
        // probe to the probe set.
        switch(filterType) {
            case EXACTLY:
                if (count == storeCutoff)
                    newList.addProbe(candidate, null);
                break;
            case AT_LEAST:
                if (count >= storeCutoff)
                    newList.addProbe(candidate, null);
                break;
            case NO_MORE_THAN:
                if (count <= storeCutoff)
                    newList.addProbe(candidate, null);
                break;
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Hashtable(java.util.Hashtable) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) BoxWhisker(uk.ac.babraham.SeqMonk.Analysis.Statistics.BoxWhisker)

Example 39 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class VariancePlotPanel method calculateNonredundantSet.

/**
 * This collapses individual points which are over the same
 * pixel when redrawing the plot at a different scale
 */
private synchronized void calculateNonredundantSet() {
    closestPoint = null;
    ProbePairValue[][] grid = new ProbePairValue[getWidth()][getHeight()];
    Probe[] probes = probeList.getAllProbes();
    try {
        for (int p = 0; p < probes.length; p++) {
            float xValue = repSet.getValueForProbeExcludingUnmeasured(probes[p]);
            float yValue = getYValue(probes[p]);
            if (Float.isNaN(xValue) || Float.isInfinite(xValue) || Float.isNaN(yValue) || Float.isInfinite(yValue)) {
                continue;
            }
            int x = getX(xValue);
            int y = getY(yValue);
            if (grid[x][y] == null) {
                grid[x][y] = new ProbePairValue(xValue, yValue, x, y);
                grid[x][y].setProbe(probes[p]);
            } else {
                // belong to
                if (subLists == null)
                    grid[x][y].count++;
                // As we have multiple probes at this point we remove the
                // specific probe annotation.
                grid[x][y].setProbe(null);
            }
        }
        if (subLists != null) {
            for (int s = 0; s < subLists.length; s++) {
                Probe[] subListProbes = subLists[s].getAllProbes();
                for (int p = 0; p < subListProbes.length; p++) {
                    float xValue = repSet.getValueForProbeExcludingUnmeasured(subListProbes[p]);
                    float yValue = getYValue(subListProbes[p]);
                    int x = getX(xValue);
                    int y = getY(yValue);
                    if (grid[x][y] == null) {
                        // This messes up where we catch it in the middle of a redraw
                        continue;
                    // throw new IllegalArgumentException("Found subList position not in main list");
                    }
                    // 1 = no list so 2 is the lowest sublist index
                    grid[x][y].count = s + 2;
                }
            }
        }
    } catch (SeqMonkException e) {
        throw new IllegalStateException(e);
    }
    // Now we need to put all of the ProbePairValues into
    // a single array;
    int count = 0;
    for (int x = 0; x < grid.length; x++) {
        for (int y = 0; y < grid[x].length; y++) {
            if (grid[x][y] != null)
                count++;
        }
    }
    ProbePairValue[] nonred = new ProbePairValue[count];
    count--;
    for (int x = 0; x < grid.length; x++) {
        for (int y = 0; y < grid[x].length; y++) {
            if (grid[x][y] != null) {
                nonred[count] = grid[x][y];
                count--;
            }
        }
    }
    Arrays.sort(nonred);
    // Work out the 95% percentile count
    int minCount = 1;
    int maxCount = 2;
    if (nonred.length > 0) {
        minCount = nonred[0].count;
        maxCount = nonred[((nonred.length - 1) * 95) / 100].count;
    }
    // Go through every nonred assigning a suitable colour
    ColourGradient gradient = new HotColdColourGradient();
    for (int i = 0; i < nonred.length; i++) {
        if (subLists == null) {
            nonred[i].color = gradient.getColor(nonred[i].count, minCount, maxCount);
        } else {
            if (nonred[i].count > subLists.length + 1) {
                throw new IllegalArgumentException("Count above threshold when showing sublists");
            }
            if (nonred[i].count == 1) {
                nonred[i].color = VERY_LIGHT_GREY;
            } else {
                nonred[i].color = ColourIndexSet.getColour(nonred[i].count - 2);
            }
        }
    }
    nonRedundantValues = nonred;
    lastNonredWidth = getWidth();
    lastNonredHeight = getHeight();
// System.out.println("Nonred was "+nonRedundantValues.length+" from "+probes.length);
}
Also used : HotColdColourGradient(uk.ac.babraham.SeqMonk.Gradients.HotColdColourGradient) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) ColourGradient(uk.ac.babraham.SeqMonk.Gradients.ColourGradient) HotColdColourGradient(uk.ac.babraham.SeqMonk.Gradients.HotColdColourGradient)

Example 40 with Probe

use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.

the class IntensityDifferenceFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
    Probe[] probes = startingList.getAllProbes();
    // We'll pull the number of probes to sample from the preferences if they've changed it
    Integer updatedProbesPerSet = optionsPanel.probesPerSet();
    if (updatedProbesPerSet != null)
        probesPerSet = updatedProbesPerSet;
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
    // We'll build up a set of p-values as we go along
    float[] lowestPValues = new float[probes.length];
    for (int p = 0; p < lowestPValues.length; p++) {
        lowestPValues[p] = 1;
    }
    // This is going to be the temporary array we populate with the set of
    // differences we are going to analyse.
    double[] currentDiffSet = new double[probesPerSet];
    // First work out the set of comparisons we're going to make
    Vector<SingleComparison> comparisons = new Vector<IntensityDifferenceFilter.SingleComparison>();
    for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
        for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
            if (fromStores[fromIndex] == toStores[toIndex])
                continue;
            // If we can find the fromStore in the toStores we've already done and the
            // toStore anywhere in the fromStores then we can skip this.
            boolean canSkip = false;
            for (int i = 0; i < fromIndex; i++) {
                if (fromStores[i] == toStores[toIndex]) {
                    for (int j = 0; j < toStores.length; j++) {
                        if (toStores[j] == fromStores[fromIndex]) {
                            canSkip = true;
                            break;
                        }
                    }
                    break;
                }
            }
            if (canSkip)
                continue;
            comparisons.add(new SingleComparison(fromIndex, toIndex));
        }
    }
    // Put something in the progress whilst we're ordering the probe values to make
    // the comparison.
    progressUpdated("Generating background model", 0, 1);
    for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
        int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
        int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
        // We need to generate a set of probe indices ordered by their average intensity
        Integer[] indices = new Integer[probes.length];
        for (int i = 0; i < probes.length; i++) {
            indices[i] = i;
        }
        Comparator<Integer> comp = new AverageIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
        Arrays.sort(indices, comp);
        progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
        IndexTTestValue[] currentPValues = new IndexTTestValue[indices.length];
        for (int i = 0; i < indices.length; i++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            if (i % 1000 == 0) {
                int progress = (i * 100) / indices.length;
                progress += 100 * comparisonIndex;
                progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
            }
            // We need to make up the set of differences to represent this probe
            int startingIndex = i - (probesPerSet / 2);
            if (startingIndex < 0)
                startingIndex = 0;
            if (startingIndex + (probesPerSet + 1) >= probes.length)
                startingIndex = probes.length - (probesPerSet + 1);
            try {
                for (int j = startingIndex; j < startingIndex + (probesPerSet + 1); j++) {
                    if (// Don't include the point being tested in the background model
                    j == startingIndex)
                        // Don't include the point being tested in the background model
                        continue;
                    else if (j < startingIndex) {
                        currentDiffSet[j - startingIndex] = fromStores[fromIndex].getValueForProbe(probes[indices[j]]) - toStores[toIndex].getValueForProbe(probes[indices[j]]);
                    } else {
                        currentDiffSet[(j - startingIndex) - 1] = fromStores[fromIndex].getValueForProbe(probes[indices[j]]) - toStores[toIndex].getValueForProbe(probes[indices[j]]);
                    }
                }
                // Should we fix the mean at 0?
                double mean = 0;
                // double mean = SimpleStats.mean(currentDiffSet);
                double stdev = SimpleStats.stdev(currentDiffSet, mean);
                if (stdev == 0) {
                    currentPValues[indices[i]] = new IndexTTestValue(indices[i], 1);
                    continue;
                }
                // Get the difference for this point
                double diff = fromStores[fromIndex].getValueForProbe(probes[indices[i]]) - toStores[toIndex].getValueForProbe(probes[indices[i]]);
                NormalDistribution nd = new NormalDistribution(mean, stdev);
                double significance;
                if (diff < mean) {
                    significance = nd.cumulativeProbability(diff);
                } else {
                    significance = 1 - nd.cumulativeProbability(diff);
                }
                currentPValues[indices[i]] = new IndexTTestValue(indices[i], significance);
            } catch (SeqMonkException sme) {
                progressExceptionReceived(sme);
                return;
            }
        }
        // We now need to correct the set of pValues
        if (applyMultipleTestingCorrection) {
            BenjHochFDR.calculateQValues(currentPValues);
        }
        // the combined set
        if (applyMultipleTestingCorrection) {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
                }
            }
        } else {
            for (int i = 0; i < currentPValues.length; i++) {
                if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
                    lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
                }
            }
        }
    }
    // pass the filter.
    for (int i = 0; i < lowestPValues.length; i++) {
        if (lowestPValues[i] < pValueLimit) {
            newList.addProbe(probes[i], lowestPValues[i]);
        }
    }
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) IndexTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.IndexTTestValue)

Aggregations

Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)125 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)54 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)52 Vector (java.util.Vector)48 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)47 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)26 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)21 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)20 HashSet (java.util.HashSet)9 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)9 File (java.io.File)8 PrintWriter (java.io.PrintWriter)8 ProbeTTestValue (uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue)8 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)7 BufferedReader (java.io.BufferedReader)6 FileReader (java.io.FileReader)6 Hashtable (java.util.Hashtable)6 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)6 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)6