Search in sources :

Example 11 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class ContigProbeGenerator method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<Probe> newProbes = new Vector<Probe>();
    for (int c = 0; c < chromosomes.length; c++) {
        // Time for an update
        updateGenerationProgress("Processed " + c + " chromosomes", c, chromosomes.length);
        // We'll merge together the reads for all of the selected DataStores and
        // compute a single set of probes which covers all of them.
        ReadsWithCounts[] v = new ReadsWithCounts[selectedStores.length];
        for (int s = 0; s < selectedStores.length; s++) {
            v[s] = selectedStores[s].getReadsForChromosome(chromosomes[c]);
        }
        ReadsWithCounts rawReads = new ReadsWithCounts(v);
        v = null;
        // We now want to convert this list into a non-redundant set of
        // read positions with counts.  If we don't do this then we get
        // appalling performance where we have many reads mapped at the
        // same position
        // Our default is to do all strands at once
        int[] strandsToTry = new int[] { 100 };
        if (separateStrands.isSelected()) {
            strandsToTry = new int[] { Location.FORWARD, Location.REVERSE, Location.UNKNOWN };
        }
        for (int strand = 0; strand < strandsToTry.length; strand++) {
            ReadsWithCounts reads = getNonRedundantReads(rawReads, strandsToTry[strand]);
            if (reads.totalCount() == 0) {
                // System.err.println("Skipping strand "+strandsToTry[strand]+" on chr "+chromosomes[c]);
                continue;
            }
            int strandForNewProbes = Location.UNKNOWN;
            if (strandsToTry.length > 1) {
                strandForNewProbes = strandsToTry[strand];
            }
            int start = -1;
            // We now start a process where we work out at what point we cross the
            // threshold of having more than depthCutoff reads overlapping at any
            // point
            LinkedList<SequenceReadWithCount> currentSet = new LinkedList<SequenceReadWithCount>();
            int currentSetSize = 0;
            for (int r = 0; r < reads.reads.length; r++) {
                // See if we need to quit
                if (cancel) {
                    generationCancelled();
                    return;
                }
                while (currentSetSize > 0 && SequenceRead.end(currentSet.getFirst().read) < SequenceRead.start(reads.reads[r])) {
                    SequenceReadWithCount lastRead = currentSet.removeFirst();
                    currentSetSize -= lastRead.count;
                    if (start > 0 && currentSetSize < depthCutoff) {
                        // We just got to the end of a probe
                        Probe p = new Probe(chromosomes[c], start, SequenceRead.end(lastRead.read), strandForNewProbes);
                        // Check to see if we have a previous probe against which we can check
                        Probe lastProbe = null;
                        if (!newProbes.isEmpty())
                            lastProbe = newProbes.lastElement();
                        // Can we merge?
                        if (lastProbe != null && p.chromosome() == lastProbe.chromosome() && p.strand() == lastProbe.strand() && p.start() - lastProbe.end() <= distance) {
                            // Remove the last probe from the stored set
                            newProbes.remove(newProbes.size() - 1);
                            // Expand this probe to cover the last one and add it to the stored set
                            newProbes.add(new Probe(p.chromosome(), lastProbe.start(), p.end(), strandForNewProbes));
                        } else if (lastProbe != null) {
                            // We might still remove this if it's too small
                            if (lastProbe.length() < minSize) {
                                newProbes.remove(newProbes.size() - 1);
                            }
                            // We still need to add the new probe
                            newProbes.add(p);
                        } else {
                            newProbes.add(p);
                        }
                        start = -1;
                    }
                }
                // If there's nothing there already then just add it
                if (currentSetSize == 0) {
                    currentSet.add(new SequenceReadWithCount(reads.reads[r], reads.counts[r]));
                    currentSetSize += reads.counts[r];
                } else // If there are reads in the current set then we need to add this read
                // so that the current set is ordered by the end positions of the
                // reads, with the earliest end first.  We therefore start from the back
                // and work our way to the front, as soon as we see an entry whose end
                // is lower than ours we add ourselves after that
                {
                    // Now we add this read at a position based on its end position
                    ListIterator<SequenceReadWithCount> it = currentSet.listIterator(currentSet.size());
                    while (true) {
                        // If we reach the front of the set then we add ourselves to the front
                        if (!it.hasPrevious()) {
                            currentSet.addFirst(new SequenceReadWithCount(reads.reads[r], reads.counts[r]));
                            currentSetSize += reads.counts[r];
                            break;
                        } else {
                            SequenceReadWithCount previousRead = it.previous();
                            if (SequenceRead.end(previousRead.read) < SequenceRead.end(reads.reads[r])) {
                                // We want to add ourselves after this element so backtrack
                                // by one position (which must exist because we just went
                                // past it
                                it.next();
                                it.add(new SequenceReadWithCount(reads.reads[r], reads.counts[r]));
                                currentSetSize += reads.counts[r];
                                break;
                            }
                        }
                    }
                }
                // See if we crossed the threshold for starting a new probe
                if (start < 0 && currentSetSize >= depthCutoff) {
                    start = SequenceRead.start(reads.reads[r]);
                }
            }
            // out of the so far unprocessed reads on this chromosome
            if (start > 0) {
                Probe p = new Probe(chromosomes[c], start, SequenceRead.end(currentSet.getFirst().read), strandForNewProbes);
                // Check to see if we can merge with the last probe made
                Probe lastProbe = null;
                if (!newProbes.isEmpty())
                    lastProbe = newProbes.lastElement();
                // Can we merge?
                if (lastProbe != null && p.chromosome() == lastProbe.chromosome() && p.start() - lastProbe.end() <= distance) {
                    newProbes.remove(newProbes.size() - 1);
                    newProbes.add(new Probe(p.chromosome(), lastProbe.start(), p.end(), strandForNewProbes));
                } else if (lastProbe != null) {
                    // We might still remove this if it's too small
                    if (lastProbe.length() < minSize) {
                        newProbes.remove(newProbes.size() - 1);
                    }
                    // final chance
                    if (p.length() > minSize) {
                        newProbes.add(p);
                    }
                } else {
                    // Add the remaining probe if it's big enough.
                    if (p.length() > minSize) {
                        newProbes.add(p);
                    }
                }
            }
        }
    }
    Probe[] finalList = newProbes.toArray(new Probe[0]);
    newProbes.clear();
    ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
    generationComplete(finalSet);
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) LinkedList(java.util.LinkedList) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) Vector(java.util.Vector) IntVector(uk.ac.babraham.SeqMonk.Utilities.IntVector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector)

Example 12 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class DataSet method loadCacheForChromosome.

private synchronized void loadCacheForChromosome(Chromosome c) {
    // Check if we need to reset which chromosome was loaded last.
    boolean needToUpdate = lastCachedChromosome == null || lastCachedChromosome != c;
    if (needToUpdate) {
        // System.err.println("Cache miss for "+this.name()+" requested "+c+" but last cached was "+lastCachedChromosome);
        lastCachedChromosome = c;
        lastProbeLocation = 0;
        lastIndex = 0;
        if (SeqMonkApplication.getInstance() != null) {
            SeqMonkApplication.getInstance().cacheUsed();
        }
        // Check to see if we even have any data for this chromosome
        if (!readData.containsKey(c)) {
            lastCachedReads = new ReadsWithCounts(new long[0]);
        } else {
            // and one for the counts.
            try {
                ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new FileInputStream(readData.get(c).readsWithCountsTempFile)));
                lastCachedReads = (ReadsWithCounts) ois.readObject();
                ois.close();
            } catch (Exception e) {
                throw new IllegalStateException(e);
            }
        }
    }
}
Also used : BufferedInputStream(java.io.BufferedInputStream) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) ObjectInputStream(java.io.ObjectInputStream)

Example 13 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class SeqMonkDataWriter method printStandardDataSet.

private boolean printStandardDataSet(DataSet set, PrintStream p, int index, int indexTotal) throws IOException {
    p.println(set.getTotalReadCount() + "\t" + set.name());
    // Go through one chromosome at a time.
    Chromosome[] chrs = data.genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        ReadsWithCounts reads = set.getReadsForChromosome(chrs[c]);
        p.println(chrs[c].name() + "\t" + reads.totalCount());
        for (int j = 0; j < reads.reads.length; j++) {
            if (cancel) {
                cancelled(p);
                return false;
            }
            if ((j % (1 + (reads.reads.length / 10))) == 0) {
                Enumeration<ProgressListener> e2 = listeners.elements();
                while (e2.hasMoreElements()) {
                    e2.nextElement().progressUpdated("Writing data for " + set.name(), index * chrs.length + c, indexTotal * chrs.length);
                }
            }
            p.println(reads.reads[j] + "\t" + reads.counts[j]);
        }
    }
    // Print a blank line after the last chromosome
    p.println("");
    return true;
}
Also used : ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Aggregations

ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)13 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)8 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)4 IntVector (uk.ac.babraham.SeqMonk.Utilities.IntVector)4 LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)4 Vector (java.util.Vector)2 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)2 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)2 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ObjectInputStream (java.io.ObjectInputStream)1 LinkedList (java.util.LinkedList)1 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)1 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)1 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)1