use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.
the class DataSet method getReadsForProbe.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.DataTypes.DataStore#getReadsForProbe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)
*/
public long[] getReadsForProbe(Probe p) {
if (!isFinalised)
finalise();
ReadsWithCounts allReads;
loadCacheForChromosome(p.chromosome());
// We take a copy of the arrays now so that we don't get a problem if something
// else updates them whilst we're still working otherwise we get index errors.
allReads = lastCachedReads;
if (allReads.reads.length == 0)
return new long[0];
LongVector reads = new LongVector();
IntVector counts = new IntVector();
int startPos;
if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome && (lastProbeLocation == 0 || SequenceRead.compare(p.packedPosition(), lastProbeLocation) >= 0)) {
startPos = lastIndex;
// System.out.println("Using cached start pos "+lastIndex);
} else // enough back that we can't have missed even the longest read in the set.
if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome) {
// System.out.println("Last chr="+lastCachedChromosome+" this chr="+p.chromosome()+" lastProbeLocation="+lastProbeLocation+" diff="+SequenceRead.compare(p.packedPosition(), lastProbeLocation));
int longestRead = getMaxReadLength();
for (; lastIndex > 0; lastIndex--) {
if (p.start() - SequenceRead.start(allReads.reads[lastIndex]) > longestRead) {
break;
}
}
// System.out.println("Starting from index "+lastIndex+" which starts at "+SequenceRead.start(allReads[lastIndex])+" for "+p.start()+" when max length is "+longestRead);
startPos = lastIndex;
} else // If we're on a different chromosome then start from the very beginning
{
startPos = 0;
lastIndex = 0;
// System.out.println("Starting from the beginning");
}
// Can't see how this would happen, but we had a report showing this.
if (startPos < 0)
startPos = 0;
lastProbeLocation = p.packedPosition();
// We now go forward to see what we can find
boolean cacheSet = false;
for (int i = startPos; i < allReads.reads.length; i++) {
// Reads come in order, so we can stop when we've seen enough.
if (SequenceRead.start(allReads.reads[i]) > p.end()) {
break;
}
if (SequenceRead.overlaps(allReads.reads[i], p.packedPosition())) {
// then update the cache
if (!cacheSet) {
lastIndex = i;
cacheSet = true;
}
reads.add(allReads.reads[i]);
counts.add(allReads.counts[i]);
}
}
long[] returnReads = expandReadsAndCounts(reads.toArray(), counts.toArray());
// SequenceRead.sort(returnReads);
return returnReads;
}
use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.
the class HiCHitCollection method getAllSourcePositions.
public long[] getAllSourcePositions() {
LongVector lv = new LongVector();
Enumeration<String> en = hits.keys();
while (en.hasMoreElements()) {
long[] thisSourcePositions = getSourcePositionsForChromosome(en.nextElement());
for (int i = 0; i < thisSourcePositions.length; i++) {
lv.add(thisSourcePositions[i]);
}
}
long[] returnArray = lv.toArray();
SequenceRead.sort(returnArray);
return returnArray;
}
use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.
the class FeatureGroup method getSubLocations.
public Location[] getSubLocations() {
if (features.size() == 1) {
Location loc = features.elementAt(0).location();
if (loc instanceof SplitLocation) {
return ((SplitLocation) loc).subLocations();
} else {
return new Location[] { loc };
}
}
LongVector allLocs = new LongVector();
Enumeration<Feature> en = features.elements();
while (en.hasMoreElements()) {
Location loc = en.nextElement().location();
if (loc instanceof SplitLocation) {
Location[] subLocs = ((SplitLocation) loc).subLocations();
for (int s = 0; s < subLocs.length; s++) {
allLocs.add(subLocs[s].packedPosition());
}
} else {
allLocs.add(loc.packedPosition());
}
}
long[] locs = allLocs.toArray();
SequenceRead.sort(locs);
Vector<Location> mergedLocs = new Vector<Location>();
long current = locs[0];
for (int i = 1; i < locs.length; i++) {
// if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
// if (debug) {System.err.println("They overlap, extending...");}
current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
} else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
// if (debug) {System.err.println("This is a subset, ignoring it");}
continue;
} else {
// if (debug) {System.err.println("They don't overlap, moving on...");}
mergedLocs.add(new Location(current));
current = locs[i];
}
}
mergedLocs.add(new Location(current));
return mergedLocs.toArray(new Location[0]);
}
use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.
the class ChromosomeDataTrack method assignSlots.
/**
* Assign slots.
*/
private void assignSlots() {
if (getHeight() == height && DisplayPreferences.getInstance().getReadDisplay() == lastSplitMode && thisReadDensity == lastReadDensity && drawProbes == lastDrawProbes) {
// Nothing to do.
return;
}
// Cache the values so we might be able to skip this next time.
height = getHeight();
lastReadDensity = thisReadDensity;
lastDrawProbes = drawProbes;
lastSplitMode = DisplayPreferences.getInstance().getReadDisplay();
// Lets recalculate the slot values
/*
* Each slot is a shaded area of [readHeight]px separated by a
* blank [readSpace]px area. This means there are [readHeight+readSpace]px between
* adjacent slots. Because height might not be even
* we need to calculate for the smallest half (hence
* the divide by 2 and later multiply by 2.
*
* Finally we leave the top and bottom slots empty so
* we can distinguish between tracks (hence the -2 at
* the end.
*
* I've changed the -2 to -1 since there should always be an odd
* number of slots (a central one and then pairs around it)
*
* If we don't have much space for each lane then we can get
* negative slot counts, and we can't let that happen!
*
* We also calculate differently depending on whether we have to
* draw probes as well. If we're drawing probes we only
* have half of the lane to work with. If we're just
* drawing reads we've got the whole space.
*/
// We'll only use half of the height if we're either drawing probes, or if
// we're a HiC dataset where the bottom half will show interactions.
// int halfHeightCorrection = (drawProbes ? 2 : 1);
int halfHeightCorrection = 1;
if (drawProbes || isHiC) {
halfHeightCorrection = 2;
}
/*
* This gets a value of 2 if we're drawing probes as well and 1
* if we're not.
*/
int slotCount = (((height / (2 * halfHeightCorrection)) / (readHeight + readSpace)) * 2) - 1;
if (slotCount < 1)
slotCount = 1;
slotYValues = new int[slotCount];
// System.err.println("There will be "+slotYValues.length+" slots");
int mid = height / (2 * halfHeightCorrection);
for (int i = 0; i < slotYValues.length; i++) {
if (i == 0) {
slotYValues[i] = mid;
} else if (i % 2 == 0) {
// We're going down
slotYValues[i] = mid + ((readHeight + readSpace) * (i / 2));
} else {
// We're going up
slotYValues[i] = mid - ((readHeight + readSpace) * ((i + 1) / 2));
}
}
// We now need to assign each probe to a slot
// We're going to go back to the original source for the reads. That way we only need to keep
// hold of the ones which are assignable in this height of view which could save us a lot of
// memory
ReadsWithCounts rwc = data.getReadsForChromosome(DisplayPreferences.getInstance().getCurrentChromosome());
// We'll start a temporary list of the reads which we can draw, and this will be what we put together.
LongVector drawableReads = new LongVector();
IntVector drawableSlotValues = new IntVector();
// We can also make the array of cached positions to optimise drawing
lastReadXEnds = new int[slotCount];
// The lastBase array keeps track of the last
// base to be drawn in each slot.
int[] lastBase = new int[slotCount];
for (int i = 0; i < lastBase.length; i++) {
lastBase[i] = 0;
}
// fit them
if (lastSplitMode == DisplayPreferences.READ_DISPLAY_COMBINED) {
// To save doing a lot of processing we're going to cache the
// next available position if we're off the end of the display
// so we can quickly skip over reads which are never going to
// fit
int nextPossibleSlot = 0;
for (int r = 0; r < rwc.reads.length; r++) {
long read = rwc.reads[r];
READ: for (int c = 0; c < rwc.counts[r]; c++) {
if (nextPossibleSlot != 0) {
// See if we can quickly skip this read
if (nextPossibleSlot > SequenceRead.start(reads[r])) {
continue;
} else {
// Reset this as we're adding reads again.
nextPossibleSlot = 0;
}
}
for (int s = 0; s < slotCount; s++) {
if (lastBase[s] < SequenceRead.start(read)) {
drawableReads.add(read);
drawableSlotValues.add(s);
lastBase[s] = SequenceRead.end(read);
continue READ;
}
}
// skip stuff quickly in future
for (int s = 0; s < slotCount; s++) {
if (lastBase[s] < nextPossibleSlot)
nextPossibleSlot = lastBase[s];
}
}
}
} else if (lastSplitMode == DisplayPreferences.READ_DISPLAY_SEPARATED) {
// reads go below.
for (int r = 0; r < rwc.reads.length; r++) {
long read = rwc.reads[r];
READ: for (int c = 0; c < rwc.counts[r]; c++) {
int startSlot = 0;
int interval = slotCount;
if (SequenceRead.strand(read) == Location.FORWARD) {
startSlot = 1;
interval = 2;
} else if (SequenceRead.strand(read) == Location.REVERSE) {
startSlot = 2;
interval = 2;
}
for (int s = startSlot; s < slotCount; s += interval) {
if (lastBase[s] < SequenceRead.start(read)) {
drawableSlotValues.add(s);
drawableReads.add(read);
lastBase[s] = SequenceRead.end(read);
continue READ;
}
}
// If we get here then we don't have enough
// slots to draw the reads in this chromosome.
// In this case we just don't draw them in this
// display. That just measns we don't add them
// to anything.
}
}
}
reads = drawableReads.toArray();
slotValues = drawableSlotValues.toArray();
}
use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.
the class FeatureMerging method getNonOverlappingFeaturesWithinChromosome.
private static Location[] getNonOverlappingFeaturesWithinChromosome(Feature[] features, boolean useSubFeatures) {
// See if we can take some shortcuts
if (features.length == 1) {
Location loc = features[0].location();
if (loc instanceof SplitLocation) {
return ((SplitLocation) loc).subLocations();
} else {
return new Location[] { loc };
}
}
LongVector allLocs = new LongVector();
for (int f = 0; f < features.length; f++) {
Location loc = features[f].location();
if (loc instanceof SplitLocation) {
Location[] subLocs = ((SplitLocation) loc).subLocations();
for (int s = 0; s < subLocs.length; s++) {
allLocs.add(subLocs[s].packedPosition());
}
} else {
allLocs.add(loc.packedPosition());
}
}
long[] locs = allLocs.toArray();
SequenceRead.sort(locs);
Vector<Location> mergedLocs = new Vector<Location>();
long current = locs[0];
for (int i = 1; i < locs.length; i++) {
// if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
// if (debug) {System.err.println("They overlap, extending...");}
current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
} else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
// if (debug) {System.err.println("This is a subset, ignoring it");}
continue;
} else {
// if (debug) {System.err.println("They don't overlap, moving on...");}
mergedLocs.add(new Location(current));
current = locs[i];
}
}
mergedLocs.add(new Location(current));
Location[] finalLocations = mergedLocs.toArray(new Location[0]);
Arrays.sort(finalLocations);
return mergedLocs.toArray(finalLocations);
}
Aggregations