Search in sources :

Example 36 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class CisTransScatterPlotDialog method actionPerformed.

/* (non-Javadoc)
	 * @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
	 */
public void actionPerformed(ActionEvent ae) {
    if (ae.getActionCommand().equals("close")) {
        setVisible(false);
        dispose();
    } else if (ae.getActionCommand().equals("plot")) {
        HiCDataStore store = (HiCDataStore) hiCStores.getSelectedItem();
        getContentPane().remove(scatterPlotPanel);
        scatterPlotPanel = new CisTransScatterPlotPanel(store, collection.probeSet().getActiveList(), commonScaleBox.isSelected(), dotSizeSlider.getValue());
        getContentPane().add(scatterPlotPanel, BorderLayout.CENTER);
        validate();
    } else if (ae.getActionCommand().equals("filter_chromosome")) {
        Chromosome c;
        if (chromosomes.getSelectedItem() instanceof Chromosome) {
            c = (Chromosome) chromosomes.getSelectedItem();
        } else {
            c = null;
        }
        if (scatterPlotPanel instanceof CisTransScatterPlotPanel) {
            ((CisTransScatterPlotPanel) scatterPlotPanel).setChromosome(c);
        }
    } else if (ae.getActionCommand().equals("save_probe_list")) {
        if (scatterPlotPanel instanceof CisTransScatterPlotPanel) {
            ProbeList list = ((CisTransScatterPlotPanel) scatterPlotPanel).getFilteredProbes(collection.probeSet());
            if (list.getAllProbes().length == 0) {
                JOptionPane.showMessageDialog(this, "No probes were selected", "No probes", JOptionPane.INFORMATION_MESSAGE);
                return;
            }
            // Ask for a name for the list
            String groupName = null;
            while (true) {
                groupName = (String) JOptionPane.showInputDialog(this, "Enter list name", "Found " + list.getAllProbes().length + " probes", JOptionPane.QUESTION_MESSAGE, null, null, list.name());
                if (groupName == null) {
                    // Remove the list which will have been created by this stage
                    list.delete();
                    // They cancelled
                    return;
                }
                if (groupName.length() == 0)
                    // Try again
                    continue;
                break;
            }
            list.setName(groupName);
        }
    } else if (ae.getActionCommand().equals("save_image")) {
        ImageSaver.saveImage(scatterPlotPanel);
    }
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) HiCDataStore(uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)

Example 37 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class HeatmapMatrix method run.

public void run() {
    // First calculate chromosome offsets so we can access the
    // relevant probes more quickly when we come to adding other
    // end data.
    Hashtable<String, Integer> offsets = new Hashtable<String, Integer>();
    offsets.put(probes[0].probe.chromosome().name(), 0);
    for (int i = 1; i < probes.length; i++) {
        if (probes[i].probe.chromosome() != probes[i - 1].probe.chromosome()) {
            offsets.put(probes[i].probe.chromosome().name(), i);
        }
    }
    // We also need an initial list of total counts for all of our probes
    // so we can do the O/E calculations.  We can incorporate into this
    // the ability to correct for linkage.
    int[] totalCisCounts = new int[probes.length];
    int[] totalTransCounts = new int[probes.length];
    getTotalCounts(totalCisCounts, totalTransCounts);
    if (cancel) {
        Enumeration<ProgressListener> en2 = listeners.elements();
        while (en2.hasMoreElements()) {
            en2.nextElement().progressCancelled();
        }
        return;
    }
    // We'll make up an ever expanding list of interactions which we
    // care about
    Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
    // We'll need to correct for the number of tests we perform, but we're also able
    // to skip a load of tests based on the initial filters we supplied (non-statistical ones)
    // so we're going to keep a record of how many valid tests we actually need to correct for.
    // 
    // The worse case scenario is that we do every possible comparison (but only one way round).
    // After some testing this proved to be a bad idea.  We skipped so many tests where the
    // interaction wasn't observed that we ended up hugely under-correcting our final p-values
    // and making a load of false positive predictions.  We can do this more correctly later,
    // but for now we're either going to correct for every possible test, or for just every
    // possible cis test if we're excluding trans hits.
    long numberOfTestsPerformed = 0;
    if (initialMaxDistance > 0) {
        // We're just counting cis interactions
        int currentChrCount = 1;
        Chromosome currentChr = probes[0].probe.chromosome();
        for (int p = 1; p < probes.length; p++) {
            if (probes[p].probe.chromosome() == currentChr) {
                ++currentChrCount;
            } else {
                numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
                currentChrCount = 1;
                currentChr = probes[p].probe.chromosome();
            }
        }
        numberOfTestsPerformed += (currentChrCount * ((long) currentChrCount - 1)) / 2;
    } else {
        numberOfTestsPerformed = (probes.length * ((long) probes.length - 1)) / 2;
    }
    for (int p = 0; p < probes.length; p++) {
        if (p % 100 == 0) {
            Enumeration<ProgressListener> en = listeners.elements();
            while (en.hasMoreElements()) {
                en.nextElement().progressUpdated("Processed " + p + " probes", p, probes.length);
            }
            if (cancel) {
                Enumeration<ProgressListener> en2 = listeners.elements();
                while (en2.hasMoreElements()) {
                    en2.nextElement().progressCancelled();
                }
                return;
            }
        }
        // System.err.println("Getting interactions for "+probes[p].probe);
        // We temporarily store the interactions with this probe which means we
        // can make a decision about which ones we're keeping as we go along which
        // drastically reduces the amount we need to store.
        // This is going to be the data structure which holds the information
        // on the pairs of probes which have any interactions.  The key is the
        // index position of the pair (x+(y*no of probes), and the value is the
        // number of times this interaction was seen
        Hashtable<Long, Integer> interactionCounts = new Hashtable<Long, Integer>();
        HiCHitCollection hiCHits = dataSet.getHiCReadsForProbe(probes[p].probe);
        String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
        // System.err.println("Found hits on "+chromosomeNames.length+" chromosomes");
        CHROM: for (int c = 0; c < chromosomeNames.length; c++) {
            // Skip all trans reads if there is a max distance set.
            if (initialMaxDistance > 0) {
                if (!probes[p].probe.chromosome().name().equals(chromosomeNames[c])) {
                    continue;
                }
            }
            long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
            SequenceRead.sort(hitReads);
            // We're going to start when we hit the probes for this chromosome
            if (!offsets.containsKey(chromosomeNames[c])) {
                // System.err.println("No probes on chr "+chromosomeNames[c]+" skipping");
                continue;
            }
            int lastIndex = offsets.get(chromosomeNames[c]);
            READ: for (int o = 0; o < hitReads.length; o++) {
                if (cancel) {
                    Enumeration<ProgressListener> en = listeners.elements();
                    while (en.hasMoreElements()) {
                        en.nextElement().progressCancelled();
                    }
                    return;
                }
                // Check against the relevant reads
                int startIndex = lastIndex;
                for (int x = startIndex; x < probes.length; x++) {
                    // Check that we're on the right chromosome
                    if (!probes[x].probe.chromosome().name().equals(chromosomeNames[c])) {
                        // System.err.println("Stopping searching at position "+x+" since we changed to chr "+probes[x].probe.chromosome());
                        continue CHROM;
                    }
                    if (probes[x].probe.start() > SequenceRead.end(hitReads[o])) {
                        // to the next read.
                        continue READ;
                    }
                    if (SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
                        // We overlap with this probe
                        // System.err.println("Found hit to probe "+probes[x].probe);
                        // For probe list probes we could have the same probe several times
                        // so we can add the read to all of the probes which are the same
                        // from this point.
                        // We can skip over interactions where the matched index is less than our index
                        // since we'll duplicate this later anyway.
                        long interactionKey = p + (x * (long) probes.length);
                        if (!interactionCounts.containsKey(interactionKey)) {
                            if (probes[p].index < probes[x].index) {
                                interactionCounts.put(interactionKey, 0);
                            }
                        // else {
                        // System.err.println("Skipping earlier probe hit");
                        // }
                        }
                        // Add one to our counts for this interaction
                        if (probes[p].index < probes[x].index) {
                            interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
                        }
                        // Since we found our first hit here we can start looking from here next time.
                        lastIndex = x;
                        // this one.
                        for (x = x + 1; x < probes.length; x++) {
                            if (probes[x].probe.chromosome() != probes[x - 1].probe.chromosome()) {
                                // We've reached the end for this chromosome
                                break;
                            }
                            if (probes[x].probe == probes[x - 1].probe || SequenceRead.overlaps(hitReads[o], probes[x].probe.packedPosition())) {
                                if (probes[p].index >= probes[x].index)
                                    continue;
                                interactionKey = p + (x * (long) probes.length);
                                if (!interactionCounts.containsKey(interactionKey)) {
                                    if (interactionCounts.size() >= MAX_INTERACTIONS) {
                                        continue READ;
                                    }
                                    interactionCounts.put(interactionKey, 0);
                                }
                                interactionCounts.put(interactionKey, interactionCounts.get(interactionKey) + 1);
                            } else {
                                break;
                            }
                        }
                        continue READ;
                    }
                // End if overlaps
                }
            // End check each probe
            }
        // End each hit read
        }
        // End each hit chromosome
        // We can now go through the interactions we saw and decide if we
        // want to keep any of them.
        HiCInteractionStrengthCalculator strengthCalc = new HiCInteractionStrengthCalculator(dataSet, correctLinkage);
        Enumeration<Long> en = interactionCounts.keys();
        while (en.hasMoreElements()) {
            long index = en.nextElement();
            int absoluteValue = interactionCounts.get(index);
            if (absoluteValue < initialMinAbsolute)
                continue;
            ProbeWithIndex probe1 = probes[(int) (index % probes.length)];
            ProbeWithIndex probe2 = probes[(int) (index / probes.length)];
            // We calculate the obs/exp based on the total pair count
            // and the relative counts at each end of the interaction
            // Do the interaction strength calculation
            strengthCalc.calculateInteraction(absoluteValue, totalCisCounts[probe1.index], totalTransCounts[probe1.index], totalCisCounts[probe2.index], totalTransCounts[probe2.index], probe1.probe, probe2.probe);
            // We're not counting this here any more.  We work out theoretical numbers at the top instead
            // ++numberOfTestsPerformed;
            float obsExp = (float) strengthCalc.obsExp();
            float pValue = (float) strengthCalc.rawPValue();
            // interaction objects we have to create.
            if (obsExp < initialMinStrength)
                continue;
            // This isn't the final p-value check, but if the raw pvalue fails then the corrected value is never going to pass.
            if (initialMaxSignificance < 1 && pValue > initialMaxSignificance)
                continue;
            InteractionProbePair interaction = new InteractionProbePair(probe1.probe, probe1.index, probe2.probe, probe2.index, obsExp, absoluteValue);
            interaction.setSignificance(pValue);
            // We check against the current list of filters
            if (passesCurrentFilters(interaction)) {
                // See if the strength of any of the interactions is bigger than our current max
                if (obsExp > maxValue)
                    maxValue = obsExp;
                if (filteredInteractions.size() >= MAX_INTERACTIONS) {
                    Enumeration<ProgressListener> en2 = listeners.elements();
                    while (en2.hasMoreElements()) {
                        en2.nextElement().progressWarningReceived(new SeqMonkException("More than " + MAX_INTERACTIONS + " interactions passed the filters. Showing as many as I can"));
                    }
                } else {
                    filteredInteractions.add(interaction);
                }
            }
        }
    }
    // Put the interactions which worked into an Array
    interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
    // System.err.println("Found a set of "+interactions.length+" initially filtered interactions");
    // Apply multiple testing correction
    Arrays.sort(interactions, new Comparator<InteractionProbePair>() {

        public int compare(InteractionProbePair o1, InteractionProbePair o2) {
            return Float.compare(o1.signficance(), o2.signficance());
        }
    });
    for (int i = 0; i < interactions.length; i++) {
        interactions[i].setSignificance(interactions[i].signficance() * ((float) (numberOfTestsPerformed) / (i + 1)));
        // We can't allow corrected p-values to end up in a different order to the uncorrected ones
        if (i > 0 && interactions[i].signficance() < interactions[i - 1].signficance()) {
            interactions[i].setSignificance(interactions[i - 1].signficance());
        }
    }
    // Now re-filter the interactions to get those which finally pass the p-value filter
    filteredInteractions.clear();
    for (int i = 0; i < interactions.length; i++) {
        if (initialMaxSignificance >= 1 || interactions[i].signficance() < initialMaxSignificance) {
            filteredInteractions.add(interactions[i]);
        } else {
            break;
        }
    }
    // Put the interactions which worked into an Array
    interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
    // System.err.println("Found a set of "+interactions.length+" interactions after multiple testing correction");
    // We've processed all of the reads and the matrix is complete.  We can therefore
    // draw the plot
    Enumeration<ProgressListener> en2 = listeners.elements();
    while (en2.hasMoreElements()) {
        en2.nextElement().progressComplete("heatmap", null);
    }
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Hashtable(java.util.Hashtable) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector)

Example 38 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class DataStorePropertiesDialog method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Chromosome[] chrs = dataStore.collection().genome().getAllChromosomes();
    double averageLength = 0;
    int totalCount = 0;
    int shortestLength = 0;
    int longestLength = 0;
    for (int c = 0; c < chrs.length; c++) {
        ReadsWithCounts reads = dataStore.getReadsForChromosome(chrs[c]);
        for (int i = 0; i < reads.reads.length; i++) {
            totalCount += reads.counts[i];
            if (i == 0) {
                shortestLength = SequenceRead.length(reads.reads[i]);
                longestLength = SequenceRead.length(reads.reads[i]);
            }
            if (SequenceRead.length(reads.reads[i]) < shortestLength)
                shortestLength = SequenceRead.length(reads.reads[i]);
            if (SequenceRead.length(reads.reads[i]) > longestLength)
                longestLength = SequenceRead.length(reads.reads[i]);
            averageLength += SequenceRead.length(reads.reads[i]) * reads.counts[i];
        }
    }
    averageLength /= totalCount;
    this.averageLength.setText("" + (int) averageLength + "bp (" + shortestLength + "-" + longestLength + ")");
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Example 39 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class SeqMonkDataWriter method printPairedDataSet.

private boolean printPairedDataSet(PairedDataSet set, PrintStream p, int index, int indexTotal) throws IOException {
    p.println(set.getTotalReadCount() * 2 + "\t" + set.name());
    // Go through one chromosome at a time.
    Chromosome[] chrs = data.genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        HiCHitCollection hiCHits = set.getHiCReadsForChromosome(chrs[c]);
        // Work out how many of these reads we're actually going to output
        int validReadCount = 0;
        for (int c2 = 0; c2 < chrs.length; c2++) {
            validReadCount += hiCHits.getSourcePositionsForChromosome(chrs[c2].name()).length;
        }
        p.println(chrs[c].name() + "\t" + validReadCount);
        for (int c2 = 0; c2 < chrs.length; c2++) {
            long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chrs[c2].name());
            long[] hitReads = hiCHits.getHitPositionsForChromosome(chrs[c2].name());
            for (int j = 0; j < sourceReads.length; j++) {
                if (cancel) {
                    cancelled(p);
                    return false;
                }
                // TODO: Fix the progress bar
                if ((j % (1 + (validReadCount / 10))) == 0) {
                    Enumeration<ProgressListener> e2 = listeners.elements();
                    while (e2.hasMoreElements()) {
                        e2.nextElement().progressUpdated("Writing data for " + set.name(), index * chrs.length + c, indexTotal * chrs.length);
                    }
                }
                p.println(sourceReads[j] + "\t" + chrs[c2].name() + "\t" + hitReads[j]);
            }
        }
    }
    // Print a blank line after the last chromosome
    p.println("");
    return true;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)

Example 40 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class ChromosomeNameTranslator method getChromosomeForName.

/**
 * Tries to find a match for a name, but provides an option to not remember failures.
 * This can be used when the genome is still in a state of flux (with new chromosomes
 * being added) so we know that something which fails now may not necessarily fail
 * later.  This shouldn't be used once the genome is completely loaded since performance
 * will be adversely affected by not caching failures and rechecking them every time.
 *
 * @param name The name to match
 * @param rememberFailures Whether to cache names which couldn't be matched to a chromosome
 * @return The chromosome we matched.
 * @throws SeqMonkException If we couldn't match the name to any chromosome
 */
public ChromosomeWithOffset getChromosomeForName(String name, boolean rememberFailures) {
    // See if we've already worked this out for this name
    if (nameMap.containsKey(name)) {
        ChromosomeWithOffset chr = nameMap.get(name);
        if (chr != null) {
            return chr;
        }
        throw new IllegalArgumentException("Couldn't extract a valid name from '" + name + "'");
    }
    // Ensembl chromsome files look like:
    // Homo_sapiens.GRCh37.55.dna.chromosome.1.fa
    String[] dotSections = name.split("\\.");
    if (dotSections.length == 7) {
        // Try it
        Chromosome chr = genome.getExactChromsomeNameMatch(dotSections[5]);
        if (chr != null) {
            nameMap.put(name, new ChromosomeWithOffset(chr, 0));
            return nameMap.get(name);
        }
    }
    // Try filenames next
    String filename = name.trim();
    if (filename.toLowerCase().endsWith(".txt")) {
        filename = filename.substring(0, filename.length() - 4);
    }
    if (filename.toLowerCase().endsWith(".fa")) {
        filename = filename.substring(0, filename.length() - 3);
    }
    if (filename.toLowerCase().startsWith("chromosome")) {
        filename = filename.substring(10);
    }
    if (filename.toLowerCase().startsWith("chr")) {
        filename = filename.substring(3);
    }
    filename = filename.trim();
    Chromosome chr = genome.getExactChromsomeNameMatch(filename);
    if (chr != null) {
        nameMap.put(name, new ChromosomeWithOffset(chr, 0));
        return nameMap.get(name);
    }
    chr = genome.getExactChromsomeNameMatch(filename.toUpperCase());
    if (chr != null) {
        nameMap.put(name, new ChromosomeWithOffset(chr, 0));
        return nameMap.get(name);
    }
    chr = genome.getExactChromsomeNameMatch(filename.toLowerCase());
    if (chr != null) {
        nameMap.put(name, new ChromosomeWithOffset(chr, 0));
        return nameMap.get(name);
    }
    // We could also try the first text before a space in the string
    String[] spaceSections = name.trim().split("\\s+");
    chr = genome.getExactChromsomeNameMatch(spaceSections[0]);
    if (chr != null) {
        nameMap.put(name, new ChromosomeWithOffset(chr, 0));
        return nameMap.get(name);
    }
    // If we get this far then all is lost and we have to give up.
    if (rememberFailures) {
        nameMap.put(name, null);
    }
    throw new IllegalArgumentException("Couldn't extract a valid name from '" + name + "'");
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)

Aggregations

Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)78 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)47 Vector (java.util.Vector)36 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)23 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)23 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)22 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)12 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)11 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)8 ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)8 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)7 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)7 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)7 IOException (java.io.IOException)6 File (java.io.File)5 Hashtable (java.util.Hashtable)5 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)5 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)5 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)4