use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class ContigProbeGenerator method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> newProbes = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
// Time for an update
updateGenerationProgress("Processed " + c + " chromosomes", c, chromosomes.length);
// We'll merge together the reads for all of the selected DataStores and
// compute a single set of probes which covers all of them.
ReadsWithCounts[] v = new ReadsWithCounts[selectedStores.length];
for (int s = 0; s < selectedStores.length; s++) {
v[s] = selectedStores[s].getReadsForChromosome(chromosomes[c]);
}
ReadsWithCounts rawReads = new ReadsWithCounts(v);
v = null;
// We now want to convert this list into a non-redundant set of
// read positions with counts. If we don't do this then we get
// appalling performance where we have many reads mapped at the
// same position
// Our default is to do all strands at once
int[] strandsToTry = new int[] { 100 };
if (separateStrands.isSelected()) {
strandsToTry = new int[] { Location.FORWARD, Location.REVERSE, Location.UNKNOWN };
}
for (int strand = 0; strand < strandsToTry.length; strand++) {
ReadsWithCounts reads = getNonRedundantReads(rawReads, strandsToTry[strand]);
if (reads.totalCount() == 0) {
// System.err.println("Skipping strand "+strandsToTry[strand]+" on chr "+chromosomes[c]);
continue;
}
int strandForNewProbes = Location.UNKNOWN;
if (strandsToTry.length > 1) {
strandForNewProbes = strandsToTry[strand];
}
int start = -1;
// We now start a process where we work out at what point we cross the
// threshold of having more than depthCutoff reads overlapping at any
// point
LinkedList<SequenceReadWithCount> currentSet = new LinkedList<SequenceReadWithCount>();
int currentSetSize = 0;
for (int r = 0; r < reads.reads.length; r++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
while (currentSetSize > 0 && SequenceRead.end(currentSet.getFirst().read) < SequenceRead.start(reads.reads[r])) {
SequenceReadWithCount lastRead = currentSet.removeFirst();
currentSetSize -= lastRead.count;
if (start > 0 && currentSetSize < depthCutoff) {
// We just got to the end of a probe
Probe p = new Probe(chromosomes[c], start, SequenceRead.end(lastRead.read), strandForNewProbes);
// Check to see if we have a previous probe against which we can check
Probe lastProbe = null;
if (!newProbes.isEmpty())
lastProbe = newProbes.lastElement();
// Can we merge?
if (lastProbe != null && p.chromosome() == lastProbe.chromosome() && p.strand() == lastProbe.strand() && p.start() - lastProbe.end() <= distance) {
// Remove the last probe from the stored set
newProbes.remove(newProbes.size() - 1);
// Expand this probe to cover the last one and add it to the stored set
newProbes.add(new Probe(p.chromosome(), lastProbe.start(), p.end(), strandForNewProbes));
} else if (lastProbe != null) {
// We might still remove this if it's too small
if (lastProbe.length() < minSize) {
newProbes.remove(newProbes.size() - 1);
}
// We still need to add the new probe
newProbes.add(p);
} else {
newProbes.add(p);
}
start = -1;
}
}
// If there's nothing there already then just add it
if (currentSetSize == 0) {
currentSet.add(new SequenceReadWithCount(reads.reads[r], reads.counts[r]));
currentSetSize += reads.counts[r];
} else // If there are reads in the current set then we need to add this read
// so that the current set is ordered by the end positions of the
// reads, with the earliest end first. We therefore start from the back
// and work our way to the front, as soon as we see an entry whose end
// is lower than ours we add ourselves after that
{
// Now we add this read at a position based on its end position
ListIterator<SequenceReadWithCount> it = currentSet.listIterator(currentSet.size());
while (true) {
// If we reach the front of the set then we add ourselves to the front
if (!it.hasPrevious()) {
currentSet.addFirst(new SequenceReadWithCount(reads.reads[r], reads.counts[r]));
currentSetSize += reads.counts[r];
break;
} else {
SequenceReadWithCount previousRead = it.previous();
if (SequenceRead.end(previousRead.read) < SequenceRead.end(reads.reads[r])) {
// We want to add ourselves after this element so backtrack
// by one position (which must exist because we just went
// past it
it.next();
it.add(new SequenceReadWithCount(reads.reads[r], reads.counts[r]));
currentSetSize += reads.counts[r];
break;
}
}
}
}
// See if we crossed the threshold for starting a new probe
if (start < 0 && currentSetSize >= depthCutoff) {
start = SequenceRead.start(reads.reads[r]);
}
}
// out of the so far unprocessed reads on this chromosome
if (start > 0) {
Probe p = new Probe(chromosomes[c], start, SequenceRead.end(currentSet.getFirst().read), strandForNewProbes);
// Check to see if we can merge with the last probe made
Probe lastProbe = null;
if (!newProbes.isEmpty())
lastProbe = newProbes.lastElement();
// Can we merge?
if (lastProbe != null && p.chromosome() == lastProbe.chromosome() && p.start() - lastProbe.end() <= distance) {
newProbes.remove(newProbes.size() - 1);
newProbes.add(new Probe(p.chromosome(), lastProbe.start(), p.end(), strandForNewProbes));
} else if (lastProbe != null) {
// We might still remove this if it's too small
if (lastProbe.length() < minSize) {
newProbes.remove(newProbes.size() - 1);
}
// final chance
if (p.length() > minSize) {
newProbes.add(p);
}
} else {
// Add the remaining probe if it's big enough.
if (p.length() > minSize) {
newProbes.add(p);
}
}
}
}
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
newProbes.clear();
ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
generationComplete(finalSet);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class DataSet method loadCacheForChromosome.
private synchronized void loadCacheForChromosome(Chromosome c) {
// Check if we need to reset which chromosome was loaded last.
boolean needToUpdate = lastCachedChromosome == null || lastCachedChromosome != c;
if (needToUpdate) {
// System.err.println("Cache miss for "+this.name()+" requested "+c+" but last cached was "+lastCachedChromosome);
lastCachedChromosome = c;
lastProbeLocation = 0;
lastIndex = 0;
if (SeqMonkApplication.getInstance() != null) {
SeqMonkApplication.getInstance().cacheUsed();
}
// Check to see if we even have any data for this chromosome
if (!readData.containsKey(c)) {
lastCachedReads = new ReadsWithCounts(new long[0]);
} else {
// and one for the counts.
try {
ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new FileInputStream(readData.get(c).readsWithCountsTempFile)));
lastCachedReads = (ReadsWithCounts) ois.readObject();
ois.close();
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class SeqMonkDataWriter method printStandardDataSet.
private boolean printStandardDataSet(DataSet set, PrintStream p, int index, int indexTotal) throws IOException {
p.println(set.getTotalReadCount() + "\t" + set.name());
// Go through one chromosome at a time.
Chromosome[] chrs = data.genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
ReadsWithCounts reads = set.getReadsForChromosome(chrs[c]);
p.println(chrs[c].name() + "\t" + reads.totalCount());
for (int j = 0; j < reads.reads.length; j++) {
if (cancel) {
cancelled(p);
return false;
}
if ((j % (1 + (reads.reads.length / 10))) == 0) {
Enumeration<ProgressListener> e2 = listeners.elements();
while (e2.hasMoreElements()) {
e2.nextElement().progressUpdated("Writing data for " + set.name(), index * chrs.length + c, indexTotal * chrs.length);
}
}
p.println(reads.reads[j] + "\t" + reads.counts[j]);
}
}
// Print a blank line after the last chromosome
p.println("");
return true;
}
Aggregations