use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class ChromosomeDataTrack method assignSlots.
/**
* Assign slots.
*/
private void assignSlots() {
if (getHeight() == height && DisplayPreferences.getInstance().getReadDisplay() == lastSplitMode && thisReadDensity == lastReadDensity && drawProbes == lastDrawProbes) {
// Nothing to do.
return;
}
// Cache the values so we might be able to skip this next time.
height = getHeight();
lastReadDensity = thisReadDensity;
lastDrawProbes = drawProbes;
lastSplitMode = DisplayPreferences.getInstance().getReadDisplay();
// Lets recalculate the slot values
/*
* Each slot is a shaded area of [readHeight]px separated by a
* blank [readSpace]px area. This means there are [readHeight+readSpace]px between
* adjacent slots. Because height might not be even
* we need to calculate for the smallest half (hence
* the divide by 2 and later multiply by 2.
*
* Finally we leave the top and bottom slots empty so
* we can distinguish between tracks (hence the -2 at
* the end.
*
* I've changed the -2 to -1 since there should always be an odd
* number of slots (a central one and then pairs around it)
*
* If we don't have much space for each lane then we can get
* negative slot counts, and we can't let that happen!
*
* We also calculate differently depending on whether we have to
* draw probes as well. If we're drawing probes we only
* have half of the lane to work with. If we're just
* drawing reads we've got the whole space.
*/
// We'll only use half of the height if we're either drawing probes, or if
// we're a HiC dataset where the bottom half will show interactions.
// int halfHeightCorrection = (drawProbes ? 2 : 1);
int halfHeightCorrection = 1;
if (drawProbes || isHiC) {
halfHeightCorrection = 2;
}
/*
* This gets a value of 2 if we're drawing probes as well and 1
* if we're not.
*/
int slotCount = (((height / (2 * halfHeightCorrection)) / (readHeight + readSpace)) * 2) - 1;
if (slotCount < 1)
slotCount = 1;
slotYValues = new int[slotCount];
// System.err.println("There will be "+slotYValues.length+" slots");
int mid = height / (2 * halfHeightCorrection);
for (int i = 0; i < slotYValues.length; i++) {
if (i == 0) {
slotYValues[i] = mid;
} else if (i % 2 == 0) {
// We're going down
slotYValues[i] = mid + ((readHeight + readSpace) * (i / 2));
} else {
// We're going up
slotYValues[i] = mid - ((readHeight + readSpace) * ((i + 1) / 2));
}
}
// We now need to assign each probe to a slot
// We're going to go back to the original source for the reads. That way we only need to keep
// hold of the ones which are assignable in this height of view which could save us a lot of
// memory
ReadsWithCounts rwc = data.getReadsForChromosome(DisplayPreferences.getInstance().getCurrentChromosome());
// We'll start a temporary list of the reads which we can draw, and this will be what we put together.
LongVector drawableReads = new LongVector();
IntVector drawableSlotValues = new IntVector();
// We can also make the array of cached positions to optimise drawing
lastReadXEnds = new int[slotCount];
// The lastBase array keeps track of the last
// base to be drawn in each slot.
int[] lastBase = new int[slotCount];
for (int i = 0; i < lastBase.length; i++) {
lastBase[i] = 0;
}
// fit them
if (lastSplitMode == DisplayPreferences.READ_DISPLAY_COMBINED) {
// To save doing a lot of processing we're going to cache the
// next available position if we're off the end of the display
// so we can quickly skip over reads which are never going to
// fit
int nextPossibleSlot = 0;
for (int r = 0; r < rwc.reads.length; r++) {
long read = rwc.reads[r];
READ: for (int c = 0; c < rwc.counts[r]; c++) {
if (nextPossibleSlot != 0) {
// See if we can quickly skip this read
if (nextPossibleSlot > SequenceRead.start(reads[r])) {
continue;
} else {
// Reset this as we're adding reads again.
nextPossibleSlot = 0;
}
}
for (int s = 0; s < slotCount; s++) {
if (lastBase[s] < SequenceRead.start(read)) {
drawableReads.add(read);
drawableSlotValues.add(s);
lastBase[s] = SequenceRead.end(read);
continue READ;
}
}
// skip stuff quickly in future
for (int s = 0; s < slotCount; s++) {
if (lastBase[s] < nextPossibleSlot)
nextPossibleSlot = lastBase[s];
}
}
}
} else if (lastSplitMode == DisplayPreferences.READ_DISPLAY_SEPARATED) {
// reads go below.
for (int r = 0; r < rwc.reads.length; r++) {
long read = rwc.reads[r];
READ: for (int c = 0; c < rwc.counts[r]; c++) {
int startSlot = 0;
int interval = slotCount;
if (SequenceRead.strand(read) == Location.FORWARD) {
startSlot = 1;
interval = 2;
} else if (SequenceRead.strand(read) == Location.REVERSE) {
startSlot = 2;
interval = 2;
}
for (int s = startSlot; s < slotCount; s += interval) {
if (lastBase[s] < SequenceRead.start(read)) {
drawableSlotValues.add(s);
drawableReads.add(read);
lastBase[s] = SequenceRead.end(read);
continue READ;
}
}
// If we get here then we don't have enough
// slots to draw the reads in this chromosome.
// In this case we just don't draw them in this
// display. That just measns we don't add them
// to anything.
}
}
}
reads = drawableReads.toArray();
slotValues = drawableSlotValues.toArray();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class DataStorePropertiesDialog method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Chromosome[] chrs = dataStore.collection().genome().getAllChromosomes();
double averageLength = 0;
int totalCount = 0;
int shortestLength = 0;
int longestLength = 0;
for (int c = 0; c < chrs.length; c++) {
ReadsWithCounts reads = dataStore.getReadsForChromosome(chrs[c]);
for (int i = 0; i < reads.reads.length; i++) {
totalCount += reads.counts[i];
if (i == 0) {
shortestLength = SequenceRead.length(reads.reads[i]);
longestLength = SequenceRead.length(reads.reads[i]);
}
if (SequenceRead.length(reads.reads[i]) < shortestLength)
shortestLength = SequenceRead.length(reads.reads[i]);
if (SequenceRead.length(reads.reads[i]) > longestLength)
longestLength = SequenceRead.length(reads.reads[i]);
averageLength += SequenceRead.length(reads.reads[i]) * reads.counts[i];
}
}
averageLength /= totalCount;
this.averageLength.setText("" + (int) averageLength + "bp (" + shortestLength + "-" + longestLength + ")");
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class ExactOverlapQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
float[] corrections = new float[data.length];
if (correctTotal) {
float largest = 0;
if (correctPerMillion) {
largest = 1000000;
}
for (int d = 0; d < data.length; d++) {
if (correctOnlyInProbes) {
corrections[d] = getTotalCountInProbes(data[d], probes);
} else {
corrections[d] = data[d].getTotalReadCount();
}
if (d == 0 && !correctPerMillion) {
largest = corrections[d];
} else {
if (!correctPerMillion && corrections[d] > largest) {
largest = corrections[d];
}
}
}
// We correct everything by the largest count
for (int d = 0; d < corrections.length; d++) {
corrections[d] = largest / corrections[d];
}
}
// To make this more efficient we'll do this chromosome by chromosome
Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Quantiating probes on " + chrs[c].name(), c, chrs.length);
Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
Arrays.sort(thisChrProbes);
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
// We'll fetch all reads for this chr and then do a count per position
ReadsWithCounts reads = data[d].getReadsForChromosome(chrs[c]);
quantitationType.resetLastRead();
int startIndex = 0;
for (int p = 0; p < thisChrProbes.length; p++) {
int rawCount = 0;
for (int r = startIndex; r < reads.reads.length; r++) {
if (SequenceRead.start(reads.reads[r]) < thisChrProbes[p].start()) {
startIndex = r;
}
if (SequenceRead.start(reads.reads[r]) > thisChrProbes[p].start())
break;
if (quantitationType.useRead(thisChrProbes[p], reads.reads[r])) {
if (SequenceRead.start(reads.reads[r]) == thisChrProbes[p].start() && SequenceRead.end(reads.reads[r]) == thisChrProbes[p].end()) {
rawCount += reads.counts[r];
}
}
}
// We have the counts now work out any correction.
float count = rawCount;
if (logTransform && count == 0) {
count = 0.9f;
}
if (correctTotal) {
count *= corrections[d];
}
if (logTransform) {
count = (float) Math.log(count) / log2;
}
data[d].setValueForProbe(thisChrProbes[p], count);
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class VisibleStoresParser method processNormalDataStore.
private DataSet processNormalDataStore(DataStore store) {
int extendBy = prefs.extendReads();
boolean reverse = prefs.reverseReads();
boolean removeStrand = prefs.removeStrandInfo();
DataSet newData = new DataSet(store.name() + "_reimport", "Reimported from " + store.name(), prefs.removeDuplicates());
// Now process the data
Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Processing " + store.name() + " chr " + chrs[c].name(), c, chrs.length);
ReadsWithCounts reads = store.getReadsForChromosome(chrs[c]);
Feature[] features = null;
if (filterByFeature) {
features = collection.genome().annotationCollection().getFeaturesForType(chrs[c], featureType);
Arrays.sort(features);
}
int currentFeaturePostion = 0;
for (int r = 0; r < reads.reads.length; r++) {
for (int ct = 0; ct < reads.counts[r]; ct++) {
long thisRead = reads.reads[r];
if (cancel) {
progressCancelled();
return null;
}
if (downsample && downsampleProbabilty < 1) {
if (Math.random() > downsampleProbabilty) {
continue;
}
}
long read;
int start = SequenceRead.start(thisRead);
int end = SequenceRead.end(thisRead);
int strand = SequenceRead.strand(thisRead);
if (filterByStrand) {
if (strand == Location.FORWARD && !keepForward)
continue;
if (strand == Location.REVERSE && !keepReverse)
continue;
if (strand == Location.UNKNOWN && !keepUnknown)
continue;
}
if (filterByLength) {
int length = SequenceRead.length(thisRead);
if (minLength != null && length < minLength)
continue;
if (maxLength != null && length > maxLength)
continue;
}
if (strand == Location.FORWARD) {
start += forwardOffset;
end += forwardOffset;
}
if (strand == Location.REVERSE) {
start -= reverseOffset;
end -= reverseOffset;
}
if (filterByFeature && features.length == 0 && !excludeFeature)
continue;
if (filterByFeature && features.length > 0) {
// See if we're comparing against the right feature
while (SequenceRead.start(thisRead) > features[currentFeaturePostion].location().end() && currentFeaturePostion < (features.length - 1)) {
currentFeaturePostion++;
}
// Test to see if we overlap
if (SequenceRead.overlaps(thisRead, features[currentFeaturePostion].location().packedPosition())) {
if (excludeFeature)
continue;
} else {
if (!excludeFeature)
continue;
}
}
if (reverse) {
if (strand == Location.FORWARD) {
strand = Location.REVERSE;
} else if (strand == Location.REVERSE) {
strand = Location.FORWARD;
}
}
if (removeStrand) {
strand = Location.UNKNOWN;
}
if (extractCentres) {
int centre = start + ((end - start) / 2);
start = centre - centreExtractContext;
end = centre + centreExtractContext;
}
if (extendBy != 0) {
// We now allow negative extensions to shorten reads
if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
end += extendBy;
if (end < start)
end = start;
} else if (strand == Location.REVERSE) {
start -= extendBy;
if (start > end)
start = end;
}
}
// We don't allow reads before the start of the chromosome
if (start < 1) {
int overrun = (0 - start) + 1;
progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// We also don't allow readings which are beyond the end of the chromosome
if (end > chrs[c].length()) {
int overrun = end - chrs[c].length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// We can now make the new reading
try {
read = SequenceRead.packPosition(start, end, strand);
if (!prefs.isHiC()) {
// HiC additions are deferred until we know the other end is OK too.
newData.addData(chrs[c], read);
}
} catch (SeqMonkException e) {
progressWarningReceived(e);
continue;
}
}
}
}
return newData;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class ContigProbeGenerator method getNonRedundantReads.
/**
* Gets the non redundant reads.
*
* @param reads the reads
* @return the non redundant reads
*/
private ReadsWithCounts getNonRedundantReads(ReadsWithCounts reads, int limitToStrand) {
boolean limitStrand = false;
if (limitToStrand == Location.FORWARD || limitToStrand == Location.REVERSE || limitToStrand == Location.UNKNOWN) {
limitStrand = true;
}
if (!limitStrand) {
// It's already done.
return reads;
}
LongVector keptReads = new LongVector();
IntVector keptCounts = new IntVector();
for (int r = 0; r < reads.reads.length; r++) {
if (!readStrandType.useRead(reads.reads[r])) {
continue;
}
if (limitStrand && (SequenceRead.strand(reads.reads[r]) != limitToStrand))
continue;
keptReads.add(reads.reads[r]);
keptCounts.add(reads.counts[r]);
}
return new ReadsWithCounts(keptReads.toArray(), keptCounts.toArray());
}
Aggregations