Search in sources :

Example 1 with LongVector

use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.

the class DataSet method getReadsForProbe.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.DataTypes.DataStore#getReadsForProbe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)
	 */
public long[] getReadsForProbe(Probe p) {
    if (!isFinalised)
        finalise();
    ReadsWithCounts allReads;
    loadCacheForChromosome(p.chromosome());
    // We take a copy of the arrays now so that we don't get a problem if something
    // else updates them whilst we're still working otherwise we get index errors.
    allReads = lastCachedReads;
    if (allReads.reads.length == 0)
        return new long[0];
    LongVector reads = new LongVector();
    IntVector counts = new IntVector();
    int startPos;
    if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome && (lastProbeLocation == 0 || SequenceRead.compare(p.packedPosition(), lastProbeLocation) >= 0)) {
        startPos = lastIndex;
    // System.out.println("Using cached start pos "+lastIndex);
    } else // enough back that we can't have missed even the longest read in the set.
    if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome) {
        // System.out.println("Last chr="+lastCachedChromosome+" this chr="+p.chromosome()+" lastProbeLocation="+lastProbeLocation+" diff="+SequenceRead.compare(p.packedPosition(), lastProbeLocation));
        int longestRead = getMaxReadLength();
        for (; lastIndex > 0; lastIndex--) {
            if (p.start() - SequenceRead.start(allReads.reads[lastIndex]) > longestRead) {
                break;
            }
        }
        // System.out.println("Starting from index "+lastIndex+" which starts at "+SequenceRead.start(allReads[lastIndex])+" for "+p.start()+" when max length is "+longestRead);
        startPos = lastIndex;
    } else // If we're on a different chromosome then start from the very beginning
    {
        startPos = 0;
        lastIndex = 0;
    // System.out.println("Starting from the beginning");
    }
    // Can't see how this would happen, but we had a report showing this.
    if (startPos < 0)
        startPos = 0;
    lastProbeLocation = p.packedPosition();
    // We now go forward to see what we can find
    boolean cacheSet = false;
    for (int i = startPos; i < allReads.reads.length; i++) {
        // Reads come in order, so we can stop when we've seen enough.
        if (SequenceRead.start(allReads.reads[i]) > p.end()) {
            break;
        }
        if (SequenceRead.overlaps(allReads.reads[i], p.packedPosition())) {
            // then update the cache
            if (!cacheSet) {
                lastIndex = i;
                cacheSet = true;
            }
            reads.add(allReads.reads[i]);
            counts.add(allReads.counts[i]);
        }
    }
    long[] returnReads = expandReadsAndCounts(reads.toArray(), counts.toArray());
    // SequenceRead.sort(returnReads);
    return returnReads;
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) IntVector(uk.ac.babraham.SeqMonk.Utilities.IntVector) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Example 2 with LongVector

use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.

the class HiCHitCollection method getAllSourcePositions.

public long[] getAllSourcePositions() {
    LongVector lv = new LongVector();
    Enumeration<String> en = hits.keys();
    while (en.hasMoreElements()) {
        long[] thisSourcePositions = getSourcePositionsForChromosome(en.nextElement());
        for (int i = 0; i < thisSourcePositions.length; i++) {
            lv.add(thisSourcePositions[i]);
        }
    }
    long[] returnArray = lv.toArray();
    SequenceRead.sort(returnArray);
    return returnArray;
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector)

Example 3 with LongVector

use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.

the class FeatureGroup method getSubLocations.

public Location[] getSubLocations() {
    if (features.size() == 1) {
        Location loc = features.elementAt(0).location();
        if (loc instanceof SplitLocation) {
            return ((SplitLocation) loc).subLocations();
        } else {
            return new Location[] { loc };
        }
    }
    LongVector allLocs = new LongVector();
    Enumeration<Feature> en = features.elements();
    while (en.hasMoreElements()) {
        Location loc = en.nextElement().location();
        if (loc instanceof SplitLocation) {
            Location[] subLocs = ((SplitLocation) loc).subLocations();
            for (int s = 0; s < subLocs.length; s++) {
                allLocs.add(subLocs[s].packedPosition());
            }
        } else {
            allLocs.add(loc.packedPosition());
        }
    }
    long[] locs = allLocs.toArray();
    SequenceRead.sort(locs);
    Vector<Location> mergedLocs = new Vector<Location>();
    long current = locs[0];
    for (int i = 1; i < locs.length; i++) {
        // if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
        if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
            // if (debug) {System.err.println("They overlap, extending...");}
            current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
        } else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
            // if (debug) {System.err.println("This is a subset, ignoring it");}
            continue;
        } else {
            // if (debug) {System.err.println("They don't overlap, moving on...");}
            mergedLocs.add(new Location(current));
            current = locs[i];
        }
    }
    mergedLocs.add(new Location(current));
    return mergedLocs.toArray(new Location[0]);
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 4 with LongVector

use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.

the class ChromosomeDataTrack method assignSlots.

/**
 * Assign slots.
 */
private void assignSlots() {
    if (getHeight() == height && DisplayPreferences.getInstance().getReadDisplay() == lastSplitMode && thisReadDensity == lastReadDensity && drawProbes == lastDrawProbes) {
        // Nothing to do.
        return;
    }
    // Cache the values so we might be able to skip this next time.
    height = getHeight();
    lastReadDensity = thisReadDensity;
    lastDrawProbes = drawProbes;
    lastSplitMode = DisplayPreferences.getInstance().getReadDisplay();
    // Lets recalculate the slot values
    /* 
		 * Each slot is a shaded area of [readHeight]px separated by a
		 * blank [readSpace]px area.  This means there are [readHeight+readSpace]px between
		 * adjacent slots.  Because height might not be even
		 * we need to calculate for the smallest half (hence
		 * the divide by 2 and later multiply by 2.
		 *
		 * Finally we leave the top and bottom slots empty so
		 * we can distinguish between tracks (hence the -2 at
		 * the end.
		 * 
		 * I've changed the -2 to -1 since there should always be an odd
		 * number of slots (a central one and then pairs around it)
		 * 
		 * If we don't have much space for each lane then we can get
		 * negative slot counts, and we can't let that happen!
		 * 
		 * We also calculate differently depending on whether we have to
		 * draw probes as well.  If we're drawing probes we only
		 * have half of the lane to work with.  If we're just
		 * drawing reads we've got the whole space.
		 */
    // We'll only use half of the height if we're either drawing probes, or if
    // we're a HiC dataset where the bottom half will show interactions.
    // int halfHeightCorrection = (drawProbes ? 2 : 1);
    int halfHeightCorrection = 1;
    if (drawProbes || isHiC) {
        halfHeightCorrection = 2;
    }
    /*
		 * This gets a value of 2 if we're drawing probes as well and 1
		 * if we're not.
		 */
    int slotCount = (((height / (2 * halfHeightCorrection)) / (readHeight + readSpace)) * 2) - 1;
    if (slotCount < 1)
        slotCount = 1;
    slotYValues = new int[slotCount];
    // System.err.println("There will be "+slotYValues.length+" slots");
    int mid = height / (2 * halfHeightCorrection);
    for (int i = 0; i < slotYValues.length; i++) {
        if (i == 0) {
            slotYValues[i] = mid;
        } else if (i % 2 == 0) {
            // We're going down
            slotYValues[i] = mid + ((readHeight + readSpace) * (i / 2));
        } else {
            // We're going up
            slotYValues[i] = mid - ((readHeight + readSpace) * ((i + 1) / 2));
        }
    }
    // We now need to assign each probe to a slot
    // We're going to go back to the original source for the reads.  That way we only need to keep
    // hold of the ones which are assignable in this height of view which could save us a lot of
    // memory
    ReadsWithCounts rwc = data.getReadsForChromosome(DisplayPreferences.getInstance().getCurrentChromosome());
    // We'll start a temporary list of the reads which we can draw, and this will be what we put together.
    LongVector drawableReads = new LongVector();
    IntVector drawableSlotValues = new IntVector();
    // We can also make the array of cached positions to optimise drawing
    lastReadXEnds = new int[slotCount];
    // The lastBase array keeps track of the last
    // base to be drawn in each slot.
    int[] lastBase = new int[slotCount];
    for (int i = 0; i < lastBase.length; i++) {
        lastBase[i] = 0;
    }
    // fit them
    if (lastSplitMode == DisplayPreferences.READ_DISPLAY_COMBINED) {
        // To save doing a lot of processing we're going to cache the
        // next available position if we're off the end of the display
        // so we can quickly skip over reads which are never going to
        // fit
        int nextPossibleSlot = 0;
        for (int r = 0; r < rwc.reads.length; r++) {
            long read = rwc.reads[r];
            READ: for (int c = 0; c < rwc.counts[r]; c++) {
                if (nextPossibleSlot != 0) {
                    // See if we can quickly skip this read
                    if (nextPossibleSlot > SequenceRead.start(reads[r])) {
                        continue;
                    } else {
                        // Reset this as we're adding reads again.
                        nextPossibleSlot = 0;
                    }
                }
                for (int s = 0; s < slotCount; s++) {
                    if (lastBase[s] < SequenceRead.start(read)) {
                        drawableReads.add(read);
                        drawableSlotValues.add(s);
                        lastBase[s] = SequenceRead.end(read);
                        continue READ;
                    }
                }
                // skip stuff quickly in future
                for (int s = 0; s < slotCount; s++) {
                    if (lastBase[s] < nextPossibleSlot)
                        nextPossibleSlot = lastBase[s];
                }
            }
        }
    } else if (lastSplitMode == DisplayPreferences.READ_DISPLAY_SEPARATED) {
        // reads go below.
        for (int r = 0; r < rwc.reads.length; r++) {
            long read = rwc.reads[r];
            READ: for (int c = 0; c < rwc.counts[r]; c++) {
                int startSlot = 0;
                int interval = slotCount;
                if (SequenceRead.strand(read) == Location.FORWARD) {
                    startSlot = 1;
                    interval = 2;
                } else if (SequenceRead.strand(read) == Location.REVERSE) {
                    startSlot = 2;
                    interval = 2;
                }
                for (int s = startSlot; s < slotCount; s += interval) {
                    if (lastBase[s] < SequenceRead.start(read)) {
                        drawableSlotValues.add(s);
                        drawableReads.add(read);
                        lastBase[s] = SequenceRead.end(read);
                        continue READ;
                    }
                }
            // If we get here then we don't have enough
            // slots to draw the reads in this chromosome.
            // In this case we just don't draw them in this
            // display.  That just measns we don't add them
            // to anything.
            }
        }
    }
    reads = drawableReads.toArray();
    slotValues = drawableSlotValues.toArray();
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) IntVector(uk.ac.babraham.SeqMonk.Utilities.IntVector) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Example 5 with LongVector

use of uk.ac.babraham.SeqMonk.Utilities.LongVector in project SeqMonk by s-andrews.

the class FeatureMerging method getNonOverlappingFeaturesWithinChromosome.

private static Location[] getNonOverlappingFeaturesWithinChromosome(Feature[] features, boolean useSubFeatures) {
    // See if we can take some shortcuts
    if (features.length == 1) {
        Location loc = features[0].location();
        if (loc instanceof SplitLocation) {
            return ((SplitLocation) loc).subLocations();
        } else {
            return new Location[] { loc };
        }
    }
    LongVector allLocs = new LongVector();
    for (int f = 0; f < features.length; f++) {
        Location loc = features[f].location();
        if (loc instanceof SplitLocation) {
            Location[] subLocs = ((SplitLocation) loc).subLocations();
            for (int s = 0; s < subLocs.length; s++) {
                allLocs.add(subLocs[s].packedPosition());
            }
        } else {
            allLocs.add(loc.packedPosition());
        }
    }
    long[] locs = allLocs.toArray();
    SequenceRead.sort(locs);
    Vector<Location> mergedLocs = new Vector<Location>();
    long current = locs[0];
    for (int i = 1; i < locs.length; i++) {
        // if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
        if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
            // if (debug) {System.err.println("They overlap, extending...");}
            current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
        } else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
            // if (debug) {System.err.println("This is a subset, ignoring it");}
            continue;
        } else {
            // if (debug) {System.err.println("They don't overlap, moving on...");}
            mergedLocs.add(new Location(current));
            current = locs[i];
        }
    }
    mergedLocs.add(new Location(current));
    Location[] finalLocations = mergedLocs.toArray(new Location[0]);
    Arrays.sort(finalLocations);
    return mergedLocs.toArray(finalLocations);
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Aggregations

LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)9 Vector (java.util.Vector)3 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)3 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)3 ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)3 IntVector (uk.ac.babraham.SeqMonk.Utilities.IntVector)3 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)2