Search in sources :

Example 66 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class AntisenseTranscriptionPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    double pValue = optionsPanel.pValue();
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    long[] senseCounts = new long[data.length];
    long[] antisenseCounts = new long[data.length];
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Getting total antisense rate for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = getValidFeatures(chrs[c]);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
            probes.add(p);
            for (int d = 0; d < data.length; d++) {
                long[] reads = data[d].getReadsForProbe(p);
                for (int r = 0; r < reads.length; r++) {
                    if (readFilter.useRead(p, reads[r])) {
                        senseCounts[d] += SequenceRead.length(reads[r]);
                    } else {
                        antisenseCounts[d] += SequenceRead.length(reads[r]);
                    }
                }
            }
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we can work out the overall antisense rate
    double[] antisenseProbability = new double[data.length];
    for (int d = 0; d < data.length; d++) {
        System.err.println("Antisense counts are " + antisenseCounts[d] + " sense counts are " + senseCounts[d]);
        antisenseProbability[d] = antisenseCounts[d] / (double) (antisenseCounts[d] + senseCounts[d]);
        System.err.println("Antisense probability for " + data[d].name() + " is " + antisenseProbability[d]);
    }
    // Now we can quantitate each individual feature and test for whether it is significantly
    // showing antisense expression
    ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
    for (int d = 0; d < data.length; d++) {
        significantProbes.add(new Vector<ProbeTTestValue>());
    }
    int[] readLengths = new int[data.length];
    for (int d = 0; d < readLengths.length; d++) {
        readLengths[d] = data[d].getMaxReadLength();
        System.err.println("For " + data[d].name() + " max read len is " + readLengths[d]);
    }
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Probe[] thisChrProbes = collection().probeSet().getProbesForChromosome(chrs[c]);
        for (int p = 0; p < thisChrProbes.length; p++) {
            for (int d = 0; d < data.length; d++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                long senseCount = 0;
                long antisenseCount = 0;
                long[] reads = data[d].getReadsForProbe(thisChrProbes[p]);
                for (int r = 0; r < reads.length; r++) {
                    if (readFilter.useRead(thisChrProbes[p], reads[r])) {
                        // TODO: Just count overlap?
                        senseCount += SequenceRead.length(reads[r]);
                    } else {
                        antisenseCount += SequenceRead.length(reads[r]);
                    }
                }
                // if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
                // System.err.println("Raw base counts are sense="+senseCount+" anti="+antisenseCount+" from "+reads.length+" reads");
                // }
                int senseReads = (int) (senseCount / readLengths[d]);
                int antisenseReads = (int) (antisenseCount / readLengths[d]);
                // if (thisChrProbes[p].name().equals("RP4-798A10.2")) {
                // System.err.println("Raw read counts are sense="+senseReads+" anti="+antisenseReads+" from "+reads.length+" reads");
                // }
                BinomialDistribution bd = new BinomialDistribution(senseReads + antisenseReads, antisenseProbability[d]);
                // Since the binomial distribution gives the probability of getting a value higher than
                // this we need to subtract one so we get the probability of this or higher.
                double thisPValue = 1 - bd.cumulativeProbability(antisenseReads - 1);
                if (antisenseReads == 0)
                    thisPValue = 1;
                // We have to add all results at this stage so we don't mess up the multiple
                // testing correction later on.
                significantProbes.get(d).add(new ProbeTTestValue(thisChrProbes[p], thisPValue));
                double expected = ((senseReads + antisenseReads) * antisenseProbability[d]);
                if (expected < 1)
                    expected = 1;
                float obsExp = antisenseReads / (float) expected;
                data[d].setValueForProbe(thisChrProbes[p], obsExp);
            }
        }
    }
    // filtering those which pass our p-value cutoff
    for (int d = 0; d < data.length; d++) {
        ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
        BenjHochFDR.calculateQValues(ttestResults);
        ProbeList newList = new ProbeList(collection().probeSet(), "Antisense < " + pValue + " in " + data[d].name(), "Probes showing significant antisense transcription from a basal level of " + antisenseProbability[d] + " with a cutoff of " + pValue, "FDR");
        for (int i = 0; i < ttestResults.length; i++) {
            if (ttestResults[i].probe.name().equals("RP4-798A10.2")) {
                System.err.println("Raw p=" + ttestResults[i].p + " q=" + ttestResults[i].q);
            }
            if (ttestResults[i].q < pValue) {
                newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Antisense transcription pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    if (optionsPanel.ignoreOverlaps()) {
        quantitationDescription.append(". Ignoring existing overlaps");
    }
    quantitationDescription.append(". P-value cutoff was ");
    quantitationDescription.append(optionsPanel.pValue());
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : ArrayList(java.util.ArrayList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ProbeTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) BinomialDistribution(org.apache.commons.math3.distribution.BinomialDistribution)

Example 67 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class IntronRegressionPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    int minDensity = optionsPanel.minDensity();
    int minLength = optionsPanel.minLength();
    double maxPValue = optionsPanel.maxPValue();
    int binSize = optionsPanel.measurementBinSize();
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        Vector<Probe> probesForThisChromosome = new Vector<Probe>();
        progressUpdated("Making probes", c, chrs.length);
        Feature[] features = getValidFeatures(chrs[c]);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            // There are no introns here
            if (!(features[f].location() instanceof SplitLocation))
                continue;
            Location[] subLocations = ((SplitLocation) features[f].location()).subLocations();
            // TODO: Reverse the subLocations if its a reverse feature
            for (int intron = 1; intron < subLocations.length; intron++) {
                int start = subLocations[intron - 1].end();
                int end = subLocations[intron].start();
                if ((end - start) + 1 < minLength) {
                    // This intron is too short.
                    continue;
                }
                // TODO: We could throw away any probes which didn't have enough reads in any feature
                Probe p = new Probe(chrs[c], start, end, features[f].location().strand(), features[f].name() + "_" + intron);
                probesForThisChromosome.add(p);
            }
        }
        // Now we can deduplicate the probes for this chromosome and add them to the main collection
        Probe[] dupProbes = probesForThisChromosome.toArray(new Probe[0]);
        Arrays.sort(dupProbes);
        for (int p = 0; p < dupProbes.length; p++) {
            if (p > 0 && dupProbes[p].packedPosition() == dupProbes[p - 1].packedPosition())
                continue;
            probes.add(dupProbes[p]);
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we go back through the probes and quantitate them
    for (int p = 0; p < allProbes.length; p++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        if (p % 1000 == 0) {
            progressUpdated("Quantitated " + p + " out of " + allProbes.length + " probes", p, allProbes.length);
        }
        for (int d = 0; d < data.length; d++) {
            long[] reads = data[d].getReadsForProbe(allProbes[p]);
            int[] countsPerSite = new int[allProbes[p].length()];
            int usableCounts = 0;
            for (int r = 0; r < reads.length; r++) {
                if (readFilter.useRead(allProbes[p], reads[r])) {
                    ++usableCounts;
                    for (int pos = Math.max(0, SequenceRead.start(reads[r]) - allProbes[p].start()); pos <= Math.min(countsPerSite.length - 1, SequenceRead.end(reads[r]) - allProbes[p].start()); pos++) {
                        ++countsPerSite[pos];
                    }
                }
            }
            if (usableCounts / (allProbes[p].length() / 1000d) >= minDensity) {
                // We're going to do a linear regression rather than a correlation
                // We're analysing in bins so we'll work out the bin counts and
                // add them dynamically to the regression.
                SimpleRegression regression = new SimpleRegression();
                int binCount = 0;
                for (int i = 0; i < countsPerSite.length; i++) {
                    if (i > 0 && i % binSize == 0) {
                        regression.addData(i, binCount);
                        binCount = 0;
                    }
                    binCount += countsPerSite[i];
                }
                float slope = (float) (regression.getSlope() * 1000000);
                double pValue = regression.getSignificance();
                if (allProbes[p].strand() == Location.REVERSE) {
                    slope = 0 - slope;
                }
                if (pValue <= maxPValue) {
                    data[d].setValueForProbe(allProbes[p], slope);
                } else {
                    data[d].setValueForProbe(allProbes[p], Float.NaN);
                }
            } else {
                data[d].setValueForProbe(allProbes[p], Float.NaN);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Intron regression pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    quantitationDescription.append(". Min intron length was ");
    quantitationDescription.append(minLength);
    quantitationDescription.append(". Min read density was ");
    quantitationDescription.append(minDensity);
    quantitationDescription.append(". Max slope p-value was ");
    quantitationDescription.append(maxPValue);
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 68 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class ProbeWindowGenerator method nextSet.

/**
 * Provides the next set of probe from this generator
 *
 * @return A list of probes in the next set.
 */
public Probe[] nextSet() {
    if (startPos >= probes.length) {
        return null;
    }
    int windowEnd = probes[startPos].start() + windowSize;
    v.removeAllElements();
    v.add(probes[startPos]);
    Chromosome c = probes[startPos].chromosome();
    for (int i = startPos + 1; i < probes.length; i++) {
        if (probes[i].chromosome() != c)
            break;
        if (probes[i].start() > windowEnd)
            break;
        if (probes[i].end() <= windowEnd) {
            v.add(probes[i]);
        }
    }
    ++startPos;
    return v.toArray(new Probe[0]);
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)

Example 69 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class HiCLengthHistogramPlot method getData.

/**
 * Gets the data.
 *
 * @param pl the pl
 * @return the data
 */
private float[] getData() {
    Vector<Integer> data = new Vector<Integer>();
    // If there is a probeset then we get pairs for the current probe list
    if (probes != null) {
        for (int p = 0; p < probes.length; p++) {
            HiCHitCollection hiCHits = store.getHiCReadsForProbe(probes[p]);
            long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
            long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
            for (int r = 0; r < sourcePositions.length; r++) {
                // Don't enter every pair twice
                if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
                    continue;
                data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
            }
        }
    } else {
        Chromosome[] chrs = SeqMonkApplication.getInstance().dataCollection().genome().getAllChromosomes();
        for (int c1 = 0; c1 < chrs.length; c1++) {
            HiCHitCollection hiCHits = store.getHiCReadsForChromosome(chrs[c1]);
            long[] sourcePositions = hiCHits.getSourcePositionsForChromosome(hiCHits.getSourceChromosomeName());
            long[] hitPositions = hiCHits.getHitPositionsForChromosome(hiCHits.getSourceChromosomeName());
            for (int r = 0; r < sourcePositions.length; r++) {
                // Don't enter every pair twice
                if (SequenceRead.compare(sourcePositions[r], hitPositions[r]) < 0)
                    continue;
                data.add(SequenceRead.fragmentLength(sourcePositions[r], hitPositions[r]));
            }
        }
    }
    // Convert to float [] for the return array
    float[] returnData = new float[data.size()];
    int index = 0;
    Enumeration<Integer> en = data.elements();
    while (en.hasMoreElements()) {
        returnData[index] = en.nextElement().floatValue();
        index++;
    }
    data.clear();
    return returnData;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Vector(java.util.Vector)

Example 70 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class HeatmapGenomeWindow method actionPerformed.

/* (non-Javadoc)
	 * @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
	 */
public void actionPerformed(ActionEvent ae) {
    if (ae.getActionCommand().equals("close")) {
        setVisible(false);
        dispose();
    } else if (ae.getActionCommand().equals("save_image")) {
        ImageSaver.saveImage(heatmapPanel);
    } else if (ae.getActionCommand().equals("cluster")) {
        if (clusterButton.getText().equals("Cluster Interactions")) {
            // First we need to make up an interaction cluster matrix which we
            // can then feed to the generic hierarchical clustering program
            InteractionClusterMatrix clusterMatrix = new InteractionClusterMatrix(matrix.filteredInteractions(), matrix.probeCount());
            clusterMatrix.addListener(new ProgressDialog(this, "HiC Interaction Clustering", clusterMatrix));
            clusterMatrix.addListener(this);
            clusterMatrix.startCorrelating();
            clusterButton.setEnabled(false);
        } else {
            matrix.setCluster(null);
            saveClustersButton.setEnabled(false);
            clusterButton.setText("Cluster Interactions");
        }
    } else if (ae.getActionCommand().equals("save_probes")) {
        ProbeList newList = matrix.createProbeListFromCurrentInteractions();
        // Ask for a name for the list
        String groupName = null;
        while (true) {
            groupName = (String) JOptionPane.showInputDialog(this, "Enter list name", "Found " + newList.getAllProbes().length + " probes", JOptionPane.QUESTION_MESSAGE, null, null, newList.name());
            if (groupName == null) {
                // Since the list will automatically have been added to
                // the ProbeList tree we actively need to delete it if
                // they choose to cancel at this point.
                newList.delete();
                // They cancelled
                return;
            }
            if (groupName.length() == 0)
                // Try again
                continue;
            break;
        }
        newList.setName(groupName);
    } else if (ae.getActionCommand().equals("save_clusters")) {
        if (matrix.cluster() == null)
            return;
        // Get a limit for how many probes per cluster
        String howManyProbes = JOptionPane.showInputDialog(this, "Minimum number of probes per cluster", "10");
        if (howManyProbes == null)
            return;
        int minProbes;
        try {
            minProbes = Integer.parseInt(howManyProbes);
        } catch (NumberFormatException nfe) {
            JOptionPane.showMessageDialog(this, howManyProbes + " was not an integer", "Error", JOptionPane.ERROR_MESSAGE);
            return;
        }
        ProbeList newList = matrix.createProbeListsFromClusters(minProbes, heatmapPanel.genomePanel().currentYStartIndex(), heatmapPanel.genomePanel().currentYEndIndex());
        // This was called before clustering had completed.
        if (newList == null)
            return;
        // Ask for a name for the list
        String groupName = null;
        while (true) {
            groupName = (String) JOptionPane.showInputDialog(this, "Enter list name", "Found " + newList.getAllProbes().length + " probes", JOptionPane.QUESTION_MESSAGE, null, null, newList.name());
            if (groupName == null) {
                // Since the list will automatically have been added to
                // the ProbeList tree we actively need to delete it if
                // they choose to cancel at this point.
                newList.delete();
                // They cancelled
                return;
            }
            if (groupName.length() == 0)
                // Try again
                continue;
            break;
        }
        newList.setName(groupName);
    } else if (ae.getActionCommand().equals("send_x")) {
        DisplayPreferences.getInstance().setLocation(xChr, SequenceRead.packPosition(xStart, xEnd, Location.UNKNOWN));
    } else if (ae.getActionCommand().equals("send_y")) {
        DisplayPreferences.getInstance().setLocation(yChr, SequenceRead.packPosition(yStart, yEnd, Location.UNKNOWN));
    } else if (ae.getActionCommand().equals("match")) {
        Chromosome chr = DisplayPreferences.getInstance().getCurrentChromosome();
        int start = SequenceRead.start(DisplayPreferences.getInstance().getCurrentLocation());
        int end = SequenceRead.end(DisplayPreferences.getInstance().getCurrentLocation());
        heatmapPanel.genomePanel().setCurrentPosition(chr, start, end);
    } else if (ae.getActionCommand().equals("make_report")) {
        new InteractionReportOptions(SeqMonkApplication.getInstance(), new AnnotatedInteractionReport(SeqMonkApplication.getInstance().dataCollection(), matrix));
    }
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) InteractionClusterMatrix(uk.ac.babraham.SeqMonk.DataTypes.Interaction.InteractionClusterMatrix) InteractionReportOptions(uk.ac.babraham.SeqMonk.Displays.Report.InteractionReport.InteractionReportOptions) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ProgressDialog(uk.ac.babraham.SeqMonk.Dialogs.ProgressDialog.ProgressDialog) AnnotatedInteractionReport(uk.ac.babraham.SeqMonk.Reports.Interaction.AnnotatedInteractionReport)

Aggregations

Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)78 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)47 Vector (java.util.Vector)36 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)23 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)23 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)22 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)12 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)11 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)8 ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)8 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)7 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)7 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)7 IOException (java.io.IOException)6 File (java.io.File)5 Hashtable (java.util.Hashtable)5 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)5 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)5 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)4