Search in sources :

Example 6 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class ChromosomeDataTrack method assignSlots.

/**
 * Assign slots.
 */
private void assignSlots() {
    if (getHeight() == height && DisplayPreferences.getInstance().getReadDisplay() == lastSplitMode && thisReadDensity == lastReadDensity && drawProbes == lastDrawProbes) {
        // Nothing to do.
        return;
    }
    // Cache the values so we might be able to skip this next time.
    height = getHeight();
    lastReadDensity = thisReadDensity;
    lastDrawProbes = drawProbes;
    lastSplitMode = DisplayPreferences.getInstance().getReadDisplay();
    // Lets recalculate the slot values
    /* 
		 * Each slot is a shaded area of [readHeight]px separated by a
		 * blank [readSpace]px area.  This means there are [readHeight+readSpace]px between
		 * adjacent slots.  Because height might not be even
		 * we need to calculate for the smallest half (hence
		 * the divide by 2 and later multiply by 2.
		 *
		 * Finally we leave the top and bottom slots empty so
		 * we can distinguish between tracks (hence the -2 at
		 * the end.
		 * 
		 * I've changed the -2 to -1 since there should always be an odd
		 * number of slots (a central one and then pairs around it)
		 * 
		 * If we don't have much space for each lane then we can get
		 * negative slot counts, and we can't let that happen!
		 * 
		 * We also calculate differently depending on whether we have to
		 * draw probes as well.  If we're drawing probes we only
		 * have half of the lane to work with.  If we're just
		 * drawing reads we've got the whole space.
		 */
    // We'll only use half of the height if we're either drawing probes, or if
    // we're a HiC dataset where the bottom half will show interactions.
    // int halfHeightCorrection = (drawProbes ? 2 : 1);
    int halfHeightCorrection = 1;
    if (drawProbes || isHiC) {
        halfHeightCorrection = 2;
    }
    /*
		 * This gets a value of 2 if we're drawing probes as well and 1
		 * if we're not.
		 */
    int slotCount = (((height / (2 * halfHeightCorrection)) / (readHeight + readSpace)) * 2) - 1;
    if (slotCount < 1)
        slotCount = 1;
    slotYValues = new int[slotCount];
    // System.err.println("There will be "+slotYValues.length+" slots");
    int mid = height / (2 * halfHeightCorrection);
    for (int i = 0; i < slotYValues.length; i++) {
        if (i == 0) {
            slotYValues[i] = mid;
        } else if (i % 2 == 0) {
            // We're going down
            slotYValues[i] = mid + ((readHeight + readSpace) * (i / 2));
        } else {
            // We're going up
            slotYValues[i] = mid - ((readHeight + readSpace) * ((i + 1) / 2));
        }
    }
    // We now need to assign each probe to a slot
    // We're going to go back to the original source for the reads.  That way we only need to keep
    // hold of the ones which are assignable in this height of view which could save us a lot of
    // memory
    ReadsWithCounts rwc = data.getReadsForChromosome(DisplayPreferences.getInstance().getCurrentChromosome());
    // We'll start a temporary list of the reads which we can draw, and this will be what we put together.
    LongVector drawableReads = new LongVector();
    IntVector drawableSlotValues = new IntVector();
    // We can also make the array of cached positions to optimise drawing
    lastReadXEnds = new int[slotCount];
    // The lastBase array keeps track of the last
    // base to be drawn in each slot.
    int[] lastBase = new int[slotCount];
    for (int i = 0; i < lastBase.length; i++) {
        lastBase[i] = 0;
    }
    // fit them
    if (lastSplitMode == DisplayPreferences.READ_DISPLAY_COMBINED) {
        // To save doing a lot of processing we're going to cache the
        // next available position if we're off the end of the display
        // so we can quickly skip over reads which are never going to
        // fit
        int nextPossibleSlot = 0;
        for (int r = 0; r < rwc.reads.length; r++) {
            long read = rwc.reads[r];
            READ: for (int c = 0; c < rwc.counts[r]; c++) {
                if (nextPossibleSlot != 0) {
                    // See if we can quickly skip this read
                    if (nextPossibleSlot > SequenceRead.start(reads[r])) {
                        continue;
                    } else {
                        // Reset this as we're adding reads again.
                        nextPossibleSlot = 0;
                    }
                }
                for (int s = 0; s < slotCount; s++) {
                    if (lastBase[s] < SequenceRead.start(read)) {
                        drawableReads.add(read);
                        drawableSlotValues.add(s);
                        lastBase[s] = SequenceRead.end(read);
                        continue READ;
                    }
                }
                // skip stuff quickly in future
                for (int s = 0; s < slotCount; s++) {
                    if (lastBase[s] < nextPossibleSlot)
                        nextPossibleSlot = lastBase[s];
                }
            }
        }
    } else if (lastSplitMode == DisplayPreferences.READ_DISPLAY_SEPARATED) {
        // reads go below.
        for (int r = 0; r < rwc.reads.length; r++) {
            long read = rwc.reads[r];
            READ: for (int c = 0; c < rwc.counts[r]; c++) {
                int startSlot = 0;
                int interval = slotCount;
                if (SequenceRead.strand(read) == Location.FORWARD) {
                    startSlot = 1;
                    interval = 2;
                } else if (SequenceRead.strand(read) == Location.REVERSE) {
                    startSlot = 2;
                    interval = 2;
                }
                for (int s = startSlot; s < slotCount; s += interval) {
                    if (lastBase[s] < SequenceRead.start(read)) {
                        drawableSlotValues.add(s);
                        drawableReads.add(read);
                        lastBase[s] = SequenceRead.end(read);
                        continue READ;
                    }
                }
            // If we get here then we don't have enough
            // slots to draw the reads in this chromosome.
            // In this case we just don't draw them in this
            // display.  That just measns we don't add them
            // to anything.
            }
        }
    }
    reads = drawableReads.toArray();
    slotValues = drawableSlotValues.toArray();
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) IntVector(uk.ac.babraham.SeqMonk.Utilities.IntVector) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Example 7 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class DataStorePropertiesDialog method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Chromosome[] chrs = dataStore.collection().genome().getAllChromosomes();
    double averageLength = 0;
    int totalCount = 0;
    int shortestLength = 0;
    int longestLength = 0;
    for (int c = 0; c < chrs.length; c++) {
        ReadsWithCounts reads = dataStore.getReadsForChromosome(chrs[c]);
        for (int i = 0; i < reads.reads.length; i++) {
            totalCount += reads.counts[i];
            if (i == 0) {
                shortestLength = SequenceRead.length(reads.reads[i]);
                longestLength = SequenceRead.length(reads.reads[i]);
            }
            if (SequenceRead.length(reads.reads[i]) < shortestLength)
                shortestLength = SequenceRead.length(reads.reads[i]);
            if (SequenceRead.length(reads.reads[i]) > longestLength)
                longestLength = SequenceRead.length(reads.reads[i]);
            averageLength += SequenceRead.length(reads.reads[i]) * reads.counts[i];
        }
    }
    averageLength /= totalCount;
    this.averageLength.setText("" + (int) averageLength + "bp (" + shortestLength + "-" + longestLength + ")");
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Example 8 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class ExactOverlapQuantitation method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Probe[] probes = application.dataCollection().probeSet().getAllProbes();
    float[] corrections = new float[data.length];
    if (correctTotal) {
        float largest = 0;
        if (correctPerMillion) {
            largest = 1000000;
        }
        for (int d = 0; d < data.length; d++) {
            if (correctOnlyInProbes) {
                corrections[d] = getTotalCountInProbes(data[d], probes);
            } else {
                corrections[d] = data[d].getTotalReadCount();
            }
            if (d == 0 && !correctPerMillion) {
                largest = corrections[d];
            } else {
                if (!correctPerMillion && corrections[d] > largest) {
                    largest = corrections[d];
                }
            }
        }
        // We correct everything by the largest count
        for (int d = 0; d < corrections.length; d++) {
            corrections[d] = largest / corrections[d];
        }
    }
    // To make this more efficient we'll do this chromosome by chromosome
    Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Quantiating probes on " + chrs[c].name(), c, chrs.length);
        Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
        Arrays.sort(thisChrProbes);
        for (int d = 0; d < data.length; d++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            // We'll fetch all reads for this chr and then do a count per position
            ReadsWithCounts reads = data[d].getReadsForChromosome(chrs[c]);
            quantitationType.resetLastRead();
            int startIndex = 0;
            for (int p = 0; p < thisChrProbes.length; p++) {
                int rawCount = 0;
                for (int r = startIndex; r < reads.reads.length; r++) {
                    if (SequenceRead.start(reads.reads[r]) < thisChrProbes[p].start()) {
                        startIndex = r;
                    }
                    if (SequenceRead.start(reads.reads[r]) > thisChrProbes[p].start())
                        break;
                    if (quantitationType.useRead(thisChrProbes[p], reads.reads[r])) {
                        if (SequenceRead.start(reads.reads[r]) == thisChrProbes[p].start() && SequenceRead.end(reads.reads[r]) == thisChrProbes[p].end()) {
                            rawCount += reads.counts[r];
                        }
                    }
                }
                // We have the counts now work out any correction.
                float count = rawCount;
                if (logTransform && count == 0) {
                    count = 0.9f;
                }
                if (correctTotal) {
                    count *= corrections[d];
                }
                if (logTransform) {
                    count = (float) Math.log(count) / log2;
                }
                data[d].setValueForProbe(thisChrProbes[p], count);
            }
        }
    }
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 9 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class VisibleStoresParser method processNormalDataStore.

private DataSet processNormalDataStore(DataStore store) {
    int extendBy = prefs.extendReads();
    boolean reverse = prefs.reverseReads();
    boolean removeStrand = prefs.removeStrandInfo();
    DataSet newData = new DataSet(store.name() + "_reimport", "Reimported from " + store.name(), prefs.removeDuplicates());
    // Now process the data
    Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Processing " + store.name() + " chr " + chrs[c].name(), c, chrs.length);
        ReadsWithCounts reads = store.getReadsForChromosome(chrs[c]);
        Feature[] features = null;
        if (filterByFeature) {
            features = collection.genome().annotationCollection().getFeaturesForType(chrs[c], featureType);
            Arrays.sort(features);
        }
        int currentFeaturePostion = 0;
        for (int r = 0; r < reads.reads.length; r++) {
            for (int ct = 0; ct < reads.counts[r]; ct++) {
                long thisRead = reads.reads[r];
                if (cancel) {
                    progressCancelled();
                    return null;
                }
                if (downsample && downsampleProbabilty < 1) {
                    if (Math.random() > downsampleProbabilty) {
                        continue;
                    }
                }
                long read;
                int start = SequenceRead.start(thisRead);
                int end = SequenceRead.end(thisRead);
                int strand = SequenceRead.strand(thisRead);
                if (filterByStrand) {
                    if (strand == Location.FORWARD && !keepForward)
                        continue;
                    if (strand == Location.REVERSE && !keepReverse)
                        continue;
                    if (strand == Location.UNKNOWN && !keepUnknown)
                        continue;
                }
                if (filterByLength) {
                    int length = SequenceRead.length(thisRead);
                    if (minLength != null && length < minLength)
                        continue;
                    if (maxLength != null && length > maxLength)
                        continue;
                }
                if (strand == Location.FORWARD) {
                    start += forwardOffset;
                    end += forwardOffset;
                }
                if (strand == Location.REVERSE) {
                    start -= reverseOffset;
                    end -= reverseOffset;
                }
                if (filterByFeature && features.length == 0 && !excludeFeature)
                    continue;
                if (filterByFeature && features.length > 0) {
                    // See if we're comparing against the right feature
                    while (SequenceRead.start(thisRead) > features[currentFeaturePostion].location().end() && currentFeaturePostion < (features.length - 1)) {
                        currentFeaturePostion++;
                    }
                    // Test to see if we overlap
                    if (SequenceRead.overlaps(thisRead, features[currentFeaturePostion].location().packedPosition())) {
                        if (excludeFeature)
                            continue;
                    } else {
                        if (!excludeFeature)
                            continue;
                    }
                }
                if (reverse) {
                    if (strand == Location.FORWARD) {
                        strand = Location.REVERSE;
                    } else if (strand == Location.REVERSE) {
                        strand = Location.FORWARD;
                    }
                }
                if (removeStrand) {
                    strand = Location.UNKNOWN;
                }
                if (extractCentres) {
                    int centre = start + ((end - start) / 2);
                    start = centre - centreExtractContext;
                    end = centre + centreExtractContext;
                }
                if (extendBy != 0) {
                    // We now allow negative extensions to shorten reads
                    if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
                        end += extendBy;
                        if (end < start)
                            end = start;
                    } else if (strand == Location.REVERSE) {
                        start -= extendBy;
                        if (start > end)
                            start = end;
                    }
                }
                // We don't allow reads before the start of the chromosome
                if (start < 1) {
                    int overrun = (0 - start) + 1;
                    progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                    continue;
                }
                // We also don't allow readings which are beyond the end of the chromosome
                if (end > chrs[c].length()) {
                    int overrun = end - chrs[c].length();
                    progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                    continue;
                }
                // We can now make the new reading
                try {
                    read = SequenceRead.packPosition(start, end, strand);
                    if (!prefs.isHiC()) {
                        // HiC additions are deferred until we know the other end is OK too.
                        newData.addData(chrs[c], read);
                    }
                } catch (SeqMonkException e) {
                    progressWarningReceived(e);
                    continue;
                }
            }
        }
    }
    return newData;
}
Also used : DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)

Example 10 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class ContigProbeGenerator method getNonRedundantReads.

/**
 * Gets the non redundant reads.
 *
 * @param reads the reads
 * @return the non redundant reads
 */
private ReadsWithCounts getNonRedundantReads(ReadsWithCounts reads, int limitToStrand) {
    boolean limitStrand = false;
    if (limitToStrand == Location.FORWARD || limitToStrand == Location.REVERSE || limitToStrand == Location.UNKNOWN) {
        limitStrand = true;
    }
    if (!limitStrand) {
        // It's already done.
        return reads;
    }
    LongVector keptReads = new LongVector();
    IntVector keptCounts = new IntVector();
    for (int r = 0; r < reads.reads.length; r++) {
        if (!readStrandType.useRead(reads.reads[r])) {
            continue;
        }
        if (limitStrand && (SequenceRead.strand(reads.reads[r]) != limitToStrand))
            continue;
        keptReads.add(reads.reads[r]);
        keptCounts.add(reads.counts[r]);
    }
    return new ReadsWithCounts(keptReads.toArray(), keptCounts.toArray());
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) IntVector(uk.ac.babraham.SeqMonk.Utilities.IntVector) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Aggregations

ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)13 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)8 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)4 IntVector (uk.ac.babraham.SeqMonk.Utilities.IntVector)4 LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)4 Vector (java.util.Vector)2 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)2 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)2 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ObjectInputStream (java.io.ObjectInputStream)1 LinkedList (java.util.LinkedList)1 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)1 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)1 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)1