use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class ReadLengthHistogramPlot method getReadLengths.
/**
* Gets the read lengths.
*
* @param d the d
* @return the read lengths
*/
private float[] getReadLengths(DataStore d) {
float[] data = new float[d.getTotalReadCount()];
int index = 0;
Chromosome[] chrs = d.collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
ReadsWithCounts reads = d.getReadsForChromosome(chrs[c]);
for (int r = 0; r < reads.reads.length; r++) {
for (int ct = 0; ct < reads.counts[r]; ct++) {
data[index] = SequenceRead.length(reads.reads[r]);
++index;
}
}
}
return data;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class RNAQCCalcualtor method run.
public void run() {
updateProgress("Getting features", 0, 1);
this.geneFeatures = collection.genome().annotationCollection().getFeaturesForType(geneFeatureName);
if (transcriptFeatureName != null) {
this.transcriptFeatures = collection.genome().annotationCollection().getFeaturesForType(transcriptFeatureName);
}
if (rRNAFeatureName != null) {
this.rRNAFeatures = collection.genome().annotationCollection().getFeaturesForType(rRNAFeatureName);
}
updateProgress("Getting merged genes", 0, 1);
// Get the merged set of gene locations
Feature[] mergedGenes = FeatureMerging.getNonOverlappingLocationsForFeatures(geneFeatures, false);
if (cancel) {
progressCancelled();
return;
}
updateProgress("Getting merged transcripts", 0, 1);
// Get the merged set of transcript features
Feature[] mergedTranscripts = null;
if (transcriptFeatureName != null) {
mergedTranscripts = FeatureMerging.getNonOverlappingLocationsForFeatures(transcriptFeatures, true);
}
if (cancel) {
progressCancelled();
return;
}
// Quantitate the genes.
long[] geneBaseCounts = new long[stores.length];
int[] measuredGenesCounts = new int[stores.length];
for (int s = 0; s < stores.length; s++) {
updateProgress("Quantitating " + stores[s].name() + " over genes", s, stores.length * 4);
String lastChr = "";
Chromosome lastChrObject = null;
for (int f = 0; f < mergedGenes.length; f++) {
if (mergedGenes[f].chromosomeName() != lastChr) {
lastChr = mergedGenes[f].chromosomeName();
lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
}
long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, mergedGenes[f].location().packedPosition()));
// See if we measured anything for this gene
if (reads.length > 0) {
++measuredGenesCounts[s];
}
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
// Get the length of the overlap
int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), mergedGenes[f].location().end()) - Math.max(SequenceRead.start(reads[r]), mergedGenes[f].location().start()));
geneBaseCounts[s] += overlap;
}
}
}
// Quantitate the transcripts
long[] transcriptBaseCounts = null;
long[] transcriptSameStrandBaseCounts = null;
long[] transcriptOpposingStrandBaseCounts = null;
if (transcriptFeatureName != null) {
transcriptBaseCounts = new long[stores.length];
transcriptSameStrandBaseCounts = new long[stores.length];
transcriptOpposingStrandBaseCounts = new long[stores.length];
for (int s = 0; s < stores.length; s++) {
updateProgress("Quantitating " + stores[s].name() + " over transcripts", s + stores.length, stores.length * 4);
String lastChr = "";
Chromosome lastChrObject = null;
for (int f = 0; f < mergedTranscripts.length; f++) {
if (mergedTranscripts[f].chromosomeName() != lastChr) {
lastChr = mergedTranscripts[f].chromosomeName();
lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
}
long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, mergedTranscripts[f].location().packedPosition()));
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
// Get the length of the overlap
int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), mergedTranscripts[f].location().end()) - Math.max(SequenceRead.start(reads[r]), mergedTranscripts[f].location().start()));
transcriptBaseCounts[s] += overlap;
if (SequenceRead.strand(reads[r]) == mergedTranscripts[f].location().strand()) {
transcriptSameStrandBaseCounts[s] += overlap;
} else {
transcriptOpposingStrandBaseCounts[s] += overlap;
}
}
}
}
}
// Quantitate the rRNA
long[] rRNABaseCounts = null;
if (rRNAFeatureName != null) {
rRNABaseCounts = new long[stores.length];
for (int s = 0; s < stores.length; s++) {
updateProgress("Quantitating " + stores[s].name() + " over rRNAs", s + (stores.length * 2), stores.length * 4);
String lastChr = "";
Chromosome lastChrObject = null;
for (int f = 0; f < rRNAFeatures.length; f++) {
if (rRNAFeatures[f].chromosomeName() != lastChr) {
lastChr = rRNAFeatures[f].chromosomeName();
lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
}
long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, rRNAFeatures[f].location().packedPosition()));
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
// Get the length of the overlap
int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), rRNAFeatures[f].location().end()) - Math.max(SequenceRead.start(reads[r]), rRNAFeatures[f].location().start()));
rRNABaseCounts[s] += overlap;
}
}
}
}
// Quantitate the chromosomes
long[][] chromosomeBaseCounts = new long[chromosomes.length][stores.length];
for (int s = 0; s < stores.length; s++) {
for (int c = 0; c < chromosomes.length; c++) {
updateProgress("Quantitating " + stores[s].name() + " over " + chromosomes[c].name(), s + (stores.length * 3), stores.length * 4);
ReadsWithCounts reads = stores[s].getReadsForChromosome(chromosomes[c]);
for (int r = 0; r < reads.reads.length; r++) {
chromosomeBaseCounts[c][s] += SequenceRead.length(reads.reads[r]) * reads.counts[r];
}
}
}
// Finally we make up the data sets we're going to pass back.
RNAQCResult result = new RNAQCResult(stores);
double[] percentInGene = new double[stores.length];
for (int i = 0; i < geneBaseCounts.length; i++) {
percentInGene[i] = (geneBaseCounts[i] / (double) stores[i].getTotalReadLength()) * 100;
if (percentInGene[i] > 100) {
progressWarning("Percent in gene was >100 for " + stores[i]);
percentInGene[i] = 100;
}
}
result.addPercentageSet("Percent in Gene", percentInGene);
double[] percentInTranscript = null;
if (transcriptFeatureName != null) {
percentInTranscript = new double[stores.length];
for (int i = 0; i < geneBaseCounts.length; i++) {
percentInTranscript[i] = (transcriptBaseCounts[i] / (double) geneBaseCounts[i]) * 100;
if (percentInTranscript[i] > 100) {
progressWarning("Percent in exons was >100 for " + stores[i]);
percentInTranscript[i] = 100;
}
}
result.addPercentageSet("Percent in exons", percentInTranscript);
}
double[] percentInrRNA = null;
if (rRNAFeatureName != null) {
percentInrRNA = new double[stores.length];
for (int i = 0; i < rRNABaseCounts.length; i++) {
percentInrRNA[i] = (rRNABaseCounts[i] / (double) stores[i].getTotalReadLength()) * 100;
if (percentInrRNA[i] > 100) {
progressWarning("Percent in rRNA was >100 for " + stores[i]);
percentInrRNA[i] = 100;
}
}
result.addPercentageSet("Percent in rRNA", percentInrRNA);
}
double[] percentageMeasuredGenes = new double[stores.length];
for (int i = 0; i < measuredGenesCounts.length; i++) {
percentageMeasuredGenes[i] = measuredGenesCounts[i] / (double) mergedGenes.length * 100;
}
result.addPercentageSet("Percent Genes Measured", percentageMeasuredGenes);
// Work out the relative coverage
double[] percentageOfMaxCoverage = new double[stores.length];
long maxLength = 0;
for (int i = 0; i < stores.length; i++) {
if (stores[i].getTotalReadLength() > maxLength)
maxLength = stores[i].getTotalReadLength();
}
for (int i = 0; i < stores.length; i++) {
percentageOfMaxCoverage[i] = (stores[i].getTotalReadLength() * 100d) / maxLength;
}
result.addPercentageSet("Percentage of max data size", percentageOfMaxCoverage);
double[][] percentInChromosomes = new double[chromosomes.length][stores.length];
for (int c = 0; c < percentInChromosomes.length; c++) {
for (int i = 0; i < chromosomeBaseCounts[c].length; i++) {
percentInChromosomes[c][i] = (chromosomeBaseCounts[c][i] / (double) stores[i].getTotalReadLength()) * 100;
if (percentInChromosomes[c][i] > 100) {
progressWarning("Percent in " + chromosomes[c] + " was >100 for " + stores[i]);
percentInChromosomes[c][i] = 100;
}
}
}
for (int c = 0; c < percentInChromosomes.length; c++) {
result.addPercentageSet("Percent in " + chromosomes[c].name(), percentInChromosomes[c]);
}
double[] percentOnSenseStrand = null;
if (transcriptFeatureName != null) {
percentOnSenseStrand = new double[stores.length];
for (int i = 0; i < transcriptBaseCounts.length; i++) {
percentOnSenseStrand[i] = (transcriptSameStrandBaseCounts[i] / (double) transcriptBaseCounts[i]) * 100;
if (percentOnSenseStrand[i] > 100) {
progressWarning("Percent on sense strand was >100 for " + stores[i]);
percentOnSenseStrand[i] = 100;
}
}
result.addPercentageSet("Percent on sense strand", percentOnSenseStrand);
}
progressComplete(result);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class DataSet method getReadsForProbe.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.DataTypes.DataStore#getReadsForProbe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)
*/
public long[] getReadsForProbe(Probe p) {
if (!isFinalised)
finalise();
ReadsWithCounts allReads;
loadCacheForChromosome(p.chromosome());
// We take a copy of the arrays now so that we don't get a problem if something
// else updates them whilst we're still working otherwise we get index errors.
allReads = lastCachedReads;
if (allReads.reads.length == 0)
return new long[0];
LongVector reads = new LongVector();
IntVector counts = new IntVector();
int startPos;
if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome && (lastProbeLocation == 0 || SequenceRead.compare(p.packedPosition(), lastProbeLocation) >= 0)) {
startPos = lastIndex;
// System.out.println("Using cached start pos "+lastIndex);
} else // enough back that we can't have missed even the longest read in the set.
if (lastCachedChromosome != null && p.chromosome() == lastCachedChromosome) {
// System.out.println("Last chr="+lastCachedChromosome+" this chr="+p.chromosome()+" lastProbeLocation="+lastProbeLocation+" diff="+SequenceRead.compare(p.packedPosition(), lastProbeLocation));
int longestRead = getMaxReadLength();
for (; lastIndex > 0; lastIndex--) {
if (p.start() - SequenceRead.start(allReads.reads[lastIndex]) > longestRead) {
break;
}
}
// System.out.println("Starting from index "+lastIndex+" which starts at "+SequenceRead.start(allReads[lastIndex])+" for "+p.start()+" when max length is "+longestRead);
startPos = lastIndex;
} else // If we're on a different chromosome then start from the very beginning
{
startPos = 0;
lastIndex = 0;
// System.out.println("Starting from the beginning");
}
// Can't see how this would happen, but we had a report showing this.
if (startPos < 0)
startPos = 0;
lastProbeLocation = p.packedPosition();
// We now go forward to see what we can find
boolean cacheSet = false;
for (int i = startPos; i < allReads.reads.length; i++) {
// Reads come in order, so we can stop when we've seen enough.
if (SequenceRead.start(allReads.reads[i]) > p.end()) {
break;
}
if (SequenceRead.overlaps(allReads.reads[i], p.packedPosition())) {
// then update the cache
if (!cacheSet) {
lastIndex = i;
cacheSet = true;
}
reads.add(allReads.reads[i]);
counts.add(allReads.counts[i]);
}
}
long[] returnReads = expandReadsAndCounts(reads.toArray(), counts.toArray());
// SequenceRead.sort(returnReads);
return returnReads;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class EvenCoverageProbeGenerator method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> newProbes = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
// Time for an update
updateGenerationProgress("Processed " + c + " chromosomes", c, chromosomes.length);
// We'll merge together the reads for all of the selected DataStores and
// compute a single set of probes which covers all of them.
ReadsWithCounts[] v = new ReadsWithCounts[selectedStores.length];
for (int s = 0; s < selectedStores.length; s++) {
v[s] = selectedStores[s].getReadsForChromosome(chromosomes[c]);
}
ReadsWithCounts reads = new ReadsWithCounts(v);
v = null;
if (reads.totalCount() == 0) {
// System.err.println("Skipping strand "+strandsToTry[strand]+" on chr "+chromosomes[c]);
continue;
}
int strandForNewProbes = Location.UNKNOWN;
int start = SequenceRead.start(reads.reads[0]);
int end = SequenceRead.end(reads.reads[0]);
int count = reads.counts[0];
for (int r = 1; r < reads.reads.length; r++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
if (count > 0 && ((maxSize > 0 && (SequenceRead.end(reads.reads[r]) - start) + 1 > maxSize)) || (count + reads.counts[r] > targetReadCount)) {
// Make a probe out of what we have and start a new one with the
// read we're currently looking at
Probe p = new Probe(chromosomes[c], start, end, strandForNewProbes);
newProbes.add(p);
start = end + 1;
end = Math.max(end + 2, SequenceRead.end(reads.reads[r]));
count = SequenceRead.end(reads.counts[r]);
} else {
count += reads.counts[r];
if (SequenceRead.end(reads.reads[r]) > end) {
end = SequenceRead.end(reads.reads[r]);
}
}
}
if (count > 0) {
Probe p = new Probe(chromosomes[c], start, end, strandForNewProbes);
newProbes.add(p);
}
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
newProbes.clear();
ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
generationComplete(finalSet);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts in project SeqMonk by s-andrews.
the class MacsPeakCaller method getReadsFromDataStoreCollection.
// TODO: Report back ReadsWithCounts rather than flattening to a huge array.
private long[] getReadsFromDataStoreCollection(Chromosome c, DataStore[] stores, int strandOffset) {
ReadsWithCounts[] allChrReads = new ReadsWithCounts[stores.length];
for (int d = 0; d < stores.length; d++) {
allChrReads[d] = stores[d].getReadsForChromosome(c);
}
ReadsWithCounts mergedReadsCounts = new ReadsWithCounts(allChrReads);
int totalProbeReadCount = mergedReadsCounts.totalCount();
long[] mergedChrReads = new long[totalProbeReadCount];
int index = 0;
for (int i = 0; i < mergedReadsCounts.reads.length; i++) {
if (strandOffset == 0 || SequenceRead.strand(mergedReadsCounts.reads[i]) == Location.UNKNOWN) {
for (int j = 0; j < mergedReadsCounts.counts[i]; j++) {
mergedChrReads[index] = mergedReadsCounts.reads[i];
++index;
}
} else if (SequenceRead.strand(mergedReadsCounts.reads[i]) == Location.FORWARD) {
long newValue = SequenceRead.packPosition(Math.min(SequenceRead.start(mergedReadsCounts.reads[i]) + strandOffset, c.length()), Math.min(SequenceRead.end(mergedReadsCounts.reads[i]) + strandOffset, c.length()), SequenceRead.strand(mergedReadsCounts.reads[i]));
for (int j = 0; j < mergedReadsCounts.counts[i]; j++) {
mergedChrReads[index] = newValue;
index++;
}
} else if (SequenceRead.strand(mergedReadsCounts.reads[i]) == Location.REVERSE) {
long newValue = SequenceRead.packPosition(Math.max(SequenceRead.start(mergedReadsCounts.reads[i]) - strandOffset, 1), Math.max(SequenceRead.end(mergedReadsCounts.reads[i]) - strandOffset, 1), SequenceRead.strand(mergedReadsCounts.reads[i]));
for (int j = 0; j < mergedReadsCounts.counts[i]; j++) {
mergedChrReads[index] = newValue;
++index;
}
}
}
return mergedChrReads;
}
Aggregations