Search in sources :

Example 1 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class ReadLengthHistogramPlot method getReadLengths.

/**
 * Gets the read lengths.
 *
 * @param d the d
 * @return the read lengths
 */
private float[] getReadLengths(DataStore d) {
    float[] data = new float[d.getTotalReadCount()];
    int index = 0;
    Chromosome[] chrs = d.collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        ReadsWithCounts reads = d.getReadsForChromosome(chrs[c]);
        for (int r = 0; r < reads.reads.length; r++) {
            for (int ct = 0; ct < reads.counts[r]; ct++) {
                data[index] = SequenceRead.length(reads.reads[r]);
                ++index;
            }
        }
    }
    return data;
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Example 2 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class RNAQCCalcualtor method run.

public void run() {
    updateProgress("Getting features", 0, 1);
    this.geneFeatures = collection.genome().annotationCollection().getFeaturesForType(geneFeatureName);
    if (transcriptFeatureName != null) {
        this.transcriptFeatures = collection.genome().annotationCollection().getFeaturesForType(transcriptFeatureName);
    }
    if (rRNAFeatureName != null) {
        this.rRNAFeatures = collection.genome().annotationCollection().getFeaturesForType(rRNAFeatureName);
    }
    updateProgress("Getting merged genes", 0, 1);
    // Get the merged set of gene locations
    Feature[] mergedGenes = FeatureMerging.getNonOverlappingLocationsForFeatures(geneFeatures, false);
    if (cancel) {
        progressCancelled();
        return;
    }
    updateProgress("Getting merged transcripts", 0, 1);
    // Get the merged set of transcript features
    Feature[] mergedTranscripts = null;
    if (transcriptFeatureName != null) {
        mergedTranscripts = FeatureMerging.getNonOverlappingLocationsForFeatures(transcriptFeatures, true);
    }
    if (cancel) {
        progressCancelled();
        return;
    }
    // Quantitate the genes.
    long[] geneBaseCounts = new long[stores.length];
    int[] measuredGenesCounts = new int[stores.length];
    for (int s = 0; s < stores.length; s++) {
        updateProgress("Quantitating " + stores[s].name() + " over genes", s, stores.length * 4);
        String lastChr = "";
        Chromosome lastChrObject = null;
        for (int f = 0; f < mergedGenes.length; f++) {
            if (mergedGenes[f].chromosomeName() != lastChr) {
                lastChr = mergedGenes[f].chromosomeName();
                lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
            }
            long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, mergedGenes[f].location().packedPosition()));
            // See if we measured anything for this gene
            if (reads.length > 0) {
                ++measuredGenesCounts[s];
            }
            for (int r = 0; r < reads.length; r++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                // Get the length of the overlap
                int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), mergedGenes[f].location().end()) - Math.max(SequenceRead.start(reads[r]), mergedGenes[f].location().start()));
                geneBaseCounts[s] += overlap;
            }
        }
    }
    // Quantitate the transcripts
    long[] transcriptBaseCounts = null;
    long[] transcriptSameStrandBaseCounts = null;
    long[] transcriptOpposingStrandBaseCounts = null;
    if (transcriptFeatureName != null) {
        transcriptBaseCounts = new long[stores.length];
        transcriptSameStrandBaseCounts = new long[stores.length];
        transcriptOpposingStrandBaseCounts = new long[stores.length];
        for (int s = 0; s < stores.length; s++) {
            updateProgress("Quantitating " + stores[s].name() + " over transcripts", s + stores.length, stores.length * 4);
            String lastChr = "";
            Chromosome lastChrObject = null;
            for (int f = 0; f < mergedTranscripts.length; f++) {
                if (mergedTranscripts[f].chromosomeName() != lastChr) {
                    lastChr = mergedTranscripts[f].chromosomeName();
                    lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
                }
                long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, mergedTranscripts[f].location().packedPosition()));
                for (int r = 0; r < reads.length; r++) {
                    if (cancel) {
                        progressCancelled();
                        return;
                    }
                    // Get the length of the overlap
                    int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), mergedTranscripts[f].location().end()) - Math.max(SequenceRead.start(reads[r]), mergedTranscripts[f].location().start()));
                    transcriptBaseCounts[s] += overlap;
                    if (SequenceRead.strand(reads[r]) == mergedTranscripts[f].location().strand()) {
                        transcriptSameStrandBaseCounts[s] += overlap;
                    } else {
                        transcriptOpposingStrandBaseCounts[s] += overlap;
                    }
                }
            }
        }
    }
    // Quantitate the rRNA
    long[] rRNABaseCounts = null;
    if (rRNAFeatureName != null) {
        rRNABaseCounts = new long[stores.length];
        for (int s = 0; s < stores.length; s++) {
            updateProgress("Quantitating " + stores[s].name() + " over rRNAs", s + (stores.length * 2), stores.length * 4);
            String lastChr = "";
            Chromosome lastChrObject = null;
            for (int f = 0; f < rRNAFeatures.length; f++) {
                if (rRNAFeatures[f].chromosomeName() != lastChr) {
                    lastChr = rRNAFeatures[f].chromosomeName();
                    lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
                }
                long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, rRNAFeatures[f].location().packedPosition()));
                for (int r = 0; r < reads.length; r++) {
                    if (cancel) {
                        progressCancelled();
                        return;
                    }
                    // Get the length of the overlap
                    int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), rRNAFeatures[f].location().end()) - Math.max(SequenceRead.start(reads[r]), rRNAFeatures[f].location().start()));
                    rRNABaseCounts[s] += overlap;
                }
            }
        }
    }
    // Quantitate the chromosomes
    long[][] chromosomeBaseCounts = new long[chromosomes.length][stores.length];
    for (int s = 0; s < stores.length; s++) {
        for (int c = 0; c < chromosomes.length; c++) {
            updateProgress("Quantitating " + stores[s].name() + " over " + chromosomes[c].name(), s + (stores.length * 3), stores.length * 4);
            ReadsWithCounts reads = stores[s].getReadsForChromosome(chromosomes[c]);
            for (int r = 0; r < reads.reads.length; r++) {
                chromosomeBaseCounts[c][s] += SequenceRead.length(reads.reads[r]) * reads.counts[r];
            }
        }
    }
    // Finally we make up the data sets we're going to pass back.
    RNAQCResult result = new RNAQCResult(stores);
    double[] percentInGene = new double[stores.length];
    for (int i = 0; i < geneBaseCounts.length; i++) {
        percentInGene[i] = (geneBaseCounts[i] / (double) stores[i].getTotalReadLength()) * 100;
        if (percentInGene[i] > 100) {
            progressWarning("Percent in gene was >100 for " + stores[i]);
            percentInGene[i] = 100;
        }
    }
    result.addPercentageSet("Percent in Gene", percentInGene);
    double[] percentInTranscript = null;
    if (transcriptFeatureName != null) {
        percentInTranscript = new double[stores.length];
        for (int i = 0; i < geneBaseCounts.length; i++) {
            percentInTranscript[i] = (transcriptBaseCounts[i] / (double) geneBaseCounts[i]) * 100;
            if (percentInTranscript[i] > 100) {
                progressWarning("Percent in exons was >100 for " + stores[i]);
                percentInTranscript[i] = 100;
            }
        }
        result.addPercentageSet("Percent in exons", percentInTranscript);
    }
    double[] percentInrRNA = null;
    if (rRNAFeatureName != null) {
        percentInrRNA = new double[stores.length];
        for (int i = 0; i < rRNABaseCounts.length; i++) {
            percentInrRNA[i] = (rRNABaseCounts[i] / (double) stores[i].getTotalReadLength()) * 100;
            if (percentInrRNA[i] > 100) {
                progressWarning("Percent in rRNA was >100 for " + stores[i]);
                percentInrRNA[i] = 100;
            }
        }
        result.addPercentageSet("Percent in rRNA", percentInrRNA);
    }
    double[] percentageMeasuredGenes = new double[stores.length];
    for (int i = 0; i < measuredGenesCounts.length; i++) {
        percentageMeasuredGenes[i] = measuredGenesCounts[i] / (double) mergedGenes.length * 100;
    }
    result.addPercentageSet("Percent Genes Measured", percentageMeasuredGenes);
    // Work out the relative coverage
    double[] percentageOfMaxCoverage = new double[stores.length];
    long maxLength = 0;
    for (int i = 0; i < stores.length; i++) {
        if (stores[i].getTotalReadLength() > maxLength)
            maxLength = stores[i].getTotalReadLength();
    }
    for (int i = 0; i < stores.length; i++) {
        percentageOfMaxCoverage[i] = (stores[i].getTotalReadLength() * 100d) / maxLength;
    }
    result.addPercentageSet("Percentage of max data size", percentageOfMaxCoverage);
    double[][] percentInChromosomes = new double[chromosomes.length][stores.length];
    for (int c = 0; c < percentInChromosomes.length; c++) {
        for (int i = 0; i < chromosomeBaseCounts[c].length; i++) {
            percentInChromosomes[c][i] = (chromosomeBaseCounts[c][i] / (double) stores[i].getTotalReadLength()) * 100;
            if (percentInChromosomes[c][i] > 100) {
                progressWarning("Percent in " + chromosomes[c] + " was >100 for " + stores[i]);
                percentInChromosomes[c][i] = 100;
            }
        }
    }
    for (int c = 0; c < percentInChromosomes.length; c++) {
        result.addPercentageSet("Percent in " + chromosomes[c].name(), percentInChromosomes[c]);
    }
    double[] percentOnSenseStrand = null;
    if (transcriptFeatureName != null) {
        percentOnSenseStrand = new double[stores.length];
        for (int i = 0; i < transcriptBaseCounts.length; i++) {
            percentOnSenseStrand[i] = (transcriptSameStrandBaseCounts[i] / (double) transcriptBaseCounts[i]) * 100;
            if (percentOnSenseStrand[i] > 100) {
                progressWarning("Percent on sense strand was >100 for " + stores[i]);
                percentOnSenseStrand[i] = 100;
            }
        }
        result.addPercentageSet("Percent on sense strand", percentOnSenseStrand);
    }
    progressComplete(result);
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)

Example 3 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class DataSet method getReadsForProbe.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.DataTypes.DataStore#getReadsForProbe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)
	 */
public long[] getReadsForProbe(Probe p) {
    if (!isFinalised)
        finalise();
    ReadsWithCounts allReads;
    loadCacheForChromosome(p.chromosome());
    // We take a copy of the arrays now so that we don't get a problem if something
    // else updates them whilst we're still working otherwise we get index errors.
    allReads = lastCachedReads;
    if (allReads.reads.length == 0)
        return new long[0];
    LongVector reads = new LongVector();
    IntVector counts = new IntVector();
    int startPos;
    if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome && (lastProbeLocation == 0 || SequenceRead.compare(p.packedPosition(), lastProbeLocation) >= 0)) {
        startPos = lastIndex;
    // System.out.println("Using cached start pos "+lastIndex);
    } else // enough back that we can't have missed even the longest read in the set.
    if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome) {
        // System.out.println("Last chr="+lastCachedChromosome+" this chr="+p.chromosome()+" lastProbeLocation="+lastProbeLocation+" diff="+SequenceRead.compare(p.packedPosition(), lastProbeLocation));
        int longestRead = getMaxReadLength();
        for (; lastIndex > 0; lastIndex--) {
            if (p.start() - SequenceRead.start(allReads.reads[lastIndex]) > longestRead) {
                break;
            }
        }
        // System.out.println("Starting from index "+lastIndex+" which starts at "+SequenceRead.start(allReads[lastIndex])+" for "+p.start()+" when max length is "+longestRead);
        startPos = lastIndex;
    } else // If we're on a different chromosome then start from the very beginning
    {
        startPos = 0;
        lastIndex = 0;
    // System.out.println("Starting from the beginning");
    }
    // Can't see how this would happen, but we had a report showing this.
    if (startPos < 0)
        startPos = 0;
    lastProbeLocation = p.packedPosition();
    // We now go forward to see what we can find
    boolean cacheSet = false;
    for (int i = startPos; i < allReads.reads.length; i++) {
        // Reads come in order, so we can stop when we've seen enough.
        if (SequenceRead.start(allReads.reads[i]) > p.end()) {
            break;
        }
        if (SequenceRead.overlaps(allReads.reads[i], p.packedPosition())) {
            // then update the cache
            if (!cacheSet) {
                lastIndex = i;
                cacheSet = true;
            }
            reads.add(allReads.reads[i]);
            counts.add(allReads.counts[i]);
        }
    }
    long[] returnReads = expandReadsAndCounts(reads.toArray(), counts.toArray());
    // SequenceRead.sort(returnReads);
    return returnReads;
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) IntVector(uk.ac.babraham.SeqMonk.Utilities.IntVector) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Example 4 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class EvenCoverageProbeGenerator method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<Probe> newProbes = new Vector<Probe>();
    for (int c = 0; c < chromosomes.length; c++) {
        // Time for an update
        updateGenerationProgress("Processed " + c + " chromosomes", c, chromosomes.length);
        // We'll merge together the reads for all of the selected DataStores and
        // compute a single set of probes which covers all of them.
        ReadsWithCounts[] v = new ReadsWithCounts[selectedStores.length];
        for (int s = 0; s < selectedStores.length; s++) {
            v[s] = selectedStores[s].getReadsForChromosome(chromosomes[c]);
        }
        ReadsWithCounts reads = new ReadsWithCounts(v);
        v = null;
        if (reads.totalCount() == 0) {
            // System.err.println("Skipping strand "+strandsToTry[strand]+" on chr "+chromosomes[c]);
            continue;
        }
        int strandForNewProbes = Location.UNKNOWN;
        int start = SequenceRead.start(reads.reads[0]);
        int end = SequenceRead.end(reads.reads[0]);
        int count = reads.counts[0];
        for (int r = 1; r < reads.reads.length; r++) {
            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }
            if (count > 0 && ((maxSize > 0 && (SequenceRead.end(reads.reads[r]) - start) + 1 > maxSize)) || (count + reads.counts[r] > targetReadCount)) {
                // Make a probe out of what we have and start a new one with the
                // read we're currently looking at
                Probe p = new Probe(chromosomes[c], start, end, strandForNewProbes);
                newProbes.add(p);
                start = end + 1;
                end = Math.max(end + 2, SequenceRead.end(reads.reads[r]));
                count = SequenceRead.end(reads.counts[r]);
            } else {
                count += reads.counts[r];
                if (SequenceRead.end(reads.reads[r]) > end) {
                    end = SequenceRead.end(reads.reads[r]);
                }
            }
        }
        if (count > 0) {
            Probe p = new Probe(chromosomes[c], start, end, strandForNewProbes);
            newProbes.add(p);
        }
    }
    Probe[] finalList = newProbes.toArray(new Probe[0]);
    newProbes.clear();
    ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
    generationComplete(finalSet);
}
Also used : ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Vector(java.util.Vector)

Example 5 with ReadsWithCounts

use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.

the class MacsPeakCaller method getReadsFromDataStoreCollection.

// TODO: Report back ReadsWithCounts rather than flattening to a huge array.
private long[] getReadsFromDataStoreCollection(Chromosome c, DataStore[] stores, int strandOffset) {
    ReadsWithCounts[] allChrReads = new ReadsWithCounts[stores.length];
    for (int d = 0; d < stores.length; d++) {
        allChrReads[d] = stores[d].getReadsForChromosome(c);
    }
    ReadsWithCounts mergedReadsCounts = new ReadsWithCounts(allChrReads);
    int totalProbeReadCount = mergedReadsCounts.totalCount();
    long[] mergedChrReads = new long[totalProbeReadCount];
    int index = 0;
    for (int i = 0; i < mergedReadsCounts.reads.length; i++) {
        if (strandOffset == 0 || SequenceRead.strand(mergedReadsCounts.reads[i]) == Location.UNKNOWN) {
            for (int j = 0; j < mergedReadsCounts.counts[i]; j++) {
                mergedChrReads[index] = mergedReadsCounts.reads[i];
                ++index;
            }
        } else if (SequenceRead.strand(mergedReadsCounts.reads[i]) == Location.FORWARD) {
            long newValue = SequenceRead.packPosition(Math.min(SequenceRead.start(mergedReadsCounts.reads[i]) + strandOffset, c.length()), Math.min(SequenceRead.end(mergedReadsCounts.reads[i]) + strandOffset, c.length()), SequenceRead.strand(mergedReadsCounts.reads[i]));
            for (int j = 0; j < mergedReadsCounts.counts[i]; j++) {
                mergedChrReads[index] = newValue;
                index++;
            }
        } else if (SequenceRead.strand(mergedReadsCounts.reads[i]) == Location.REVERSE) {
            long newValue = SequenceRead.packPosition(Math.max(SequenceRead.start(mergedReadsCounts.reads[i]) - strandOffset, 1), Math.max(SequenceRead.end(mergedReadsCounts.reads[i]) - strandOffset, 1), SequenceRead.strand(mergedReadsCounts.reads[i]));
            for (int j = 0; j < mergedReadsCounts.counts[i]; j++) {
                mergedChrReads[index] = newValue;
                ++index;
            }
        }
    }
    return mergedChrReads;
}
Also used : ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)

Aggregations

ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)13 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)8 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)4 IntVector (uk.ac.babraham.SeqMonk.Utilities.IntVector)4 LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)4 Vector (java.util.Vector)2 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)2 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)2 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ObjectInputStream (java.io.ObjectInputStream)1 LinkedList (java.util.LinkedList)1 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)1 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)1 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)1