use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class FeaturePositionSelectorPanel method getCoreProbes.
/**
* Gets the set of locations for the core of each feature. This wouldn't
* include additional context added by the options, but would have subtracted
* context removed by the options.
*
* @return
*/
public Probe[] getCoreProbes() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> newProbes = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
Vector<Feature> allFeatures = new Vector<Feature>();
String[] selectedFeatureTypes = selectedFeatureTypes();
for (int f = 0; f < selectedFeatureTypes.length; f++) {
Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chromosomes[c], selectedFeatureTypes[f]);
for (int i = 0; i < features.length; i++) {
allFeatures.add(features[i]);
}
}
Feature[] features = allFeatures.toArray(new Feature[0]);
for (int f = 0; f < features.length; f++) {
if (useSubFeatures()) {
// We need to split this up so get the sub-features
if (features[f].location() instanceof SplitLocation) {
SplitLocation location = (SplitLocation) features[f].location();
Location[] subLocations = location.subLocations();
if (useExonSubfeatures()) {
for (int s = 0; s < subLocations.length; s++) {
makeProbes(features[f], chromosomes[c], subLocations[s], newProbes, true);
}
} else {
// We're making introns
for (int s = 1; s < subLocations.length; s++) {
makeProbes(features[f], chromosomes[c], new Location(subLocations[s - 1].end() + 1, subLocations[s].start() - 1, features[f].location().strand()), newProbes, true);
}
}
} else {
if (useExonSubfeatures()) {
// We can still make a single probe
makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, true);
}
// If we're making introns then we're stuffed and we give up.
}
} else {
makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, true);
}
}
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
if (removeDuplicates()) {
finalList = removeDuplicates(finalList);
}
return finalList;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class InteractionReportTableDialog method mouseClicked.
/* (non-Javadoc)
* @see java.awt.event.MouseListener#mouseClicked(java.awt.event.MouseEvent)
*/
public void mouseClicked(MouseEvent me) {
// We're only interested in double clicks
if (me.getClickCount() != 2)
return;
// This is only linked from the report JTable
JTable t = (JTable) me.getSource();
int r = t.getSelectedRow();
int c = t.getSelectedColumn();
Object clickedOn = t.getModel().getValueAt(r, c);
if (clickedOn == null)
return;
if (clickedOn instanceof Probe) {
Probe p = (Probe) clickedOn;
DisplayPreferences.getInstance().setLocation(p.chromosome(), p.packedPosition());
}
if (clickedOn instanceof Feature) {
Feature f = (Feature) clickedOn;
DisplayPreferences.getInstance().setLocation(application.dataCollection().genome().getChromosome(f.chromosomeName()).chromosome(), f.location().packedPosition());
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class SmallRNAQCCalcualtor method run.
public void run() {
SmallRNAQCResult[] results = new SmallRNAQCResult[stores.length];
for (int s = 0; s < stores.length; s++) {
results[s] = new SmallRNAQCResult(stores[s], minLength, maxLength, features);
}
for (int f = 0; f < features.length; f++) {
for (int d = 0; d < stores.length; d++) {
updateProgress("Quantitating " + features[f] + " for " + stores[d], (stores.length * f) + d, stores.length * features.length);
int[] lengthCounts = new int[(maxLength - minLength) + 1];
String lastChr = "";
Chromosome lastChrObject = null;
Feature[] theseFeatures = collection.genome().annotationCollection().getFeaturesForType(features[f]);
Arrays.sort(theseFeatures);
for (int i = 0; i < theseFeatures.length; i++) {
if (theseFeatures[i].chromosomeName() != lastChr) {
lastChr = theseFeatures[i].chromosomeName();
lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
}
long[] reads = stores[d].getReadsForProbe(new Probe(lastChrObject, theseFeatures[i].location().packedPosition()));
for (int r = 0; r < reads.length; r++) {
int length = SequenceRead.length(reads[r]);
if (length >= minLength && length <= maxLength) {
lengthCounts[length - minLength]++;
if (cancel) {
progressCancelled();
return;
}
}
}
// End each read
results[d].addCountsForFeatureIndex(f, lengthCounts);
}
// End each feature instance
}
// End each data store
}
// End each feature
// Finally we make up the data sets we're going to pass back.
progressComplete(results);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class RNAQCCalcualtor method run.
public void run() {
updateProgress("Getting features", 0, 1);
this.geneFeatures = collection.genome().annotationCollection().getFeaturesForType(geneFeatureName);
if (transcriptFeatureName != null) {
this.transcriptFeatures = collection.genome().annotationCollection().getFeaturesForType(transcriptFeatureName);
}
if (rRNAFeatureName != null) {
this.rRNAFeatures = collection.genome().annotationCollection().getFeaturesForType(rRNAFeatureName);
}
updateProgress("Getting merged genes", 0, 1);
// Get the merged set of gene locations
Feature[] mergedGenes = FeatureMerging.getNonOverlappingLocationsForFeatures(geneFeatures, false);
if (cancel) {
progressCancelled();
return;
}
updateProgress("Getting merged transcripts", 0, 1);
// Get the merged set of transcript features
Feature[] mergedTranscripts = null;
if (transcriptFeatureName != null) {
mergedTranscripts = FeatureMerging.getNonOverlappingLocationsForFeatures(transcriptFeatures, true);
}
if (cancel) {
progressCancelled();
return;
}
// Quantitate the genes.
long[] geneBaseCounts = new long[stores.length];
int[] measuredGenesCounts = new int[stores.length];
for (int s = 0; s < stores.length; s++) {
updateProgress("Quantitating " + stores[s].name() + " over genes", s, stores.length * 4);
String lastChr = "";
Chromosome lastChrObject = null;
for (int f = 0; f < mergedGenes.length; f++) {
if (mergedGenes[f].chromosomeName() != lastChr) {
lastChr = mergedGenes[f].chromosomeName();
lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
}
long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, mergedGenes[f].location().packedPosition()));
// See if we measured anything for this gene
if (reads.length > 0) {
++measuredGenesCounts[s];
}
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
// Get the length of the overlap
int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), mergedGenes[f].location().end()) - Math.max(SequenceRead.start(reads[r]), mergedGenes[f].location().start()));
geneBaseCounts[s] += overlap;
}
}
}
// Quantitate the transcripts
long[] transcriptBaseCounts = null;
long[] transcriptSameStrandBaseCounts = null;
long[] transcriptOpposingStrandBaseCounts = null;
if (transcriptFeatureName != null) {
transcriptBaseCounts = new long[stores.length];
transcriptSameStrandBaseCounts = new long[stores.length];
transcriptOpposingStrandBaseCounts = new long[stores.length];
for (int s = 0; s < stores.length; s++) {
updateProgress("Quantitating " + stores[s].name() + " over transcripts", s + stores.length, stores.length * 4);
String lastChr = "";
Chromosome lastChrObject = null;
for (int f = 0; f < mergedTranscripts.length; f++) {
if (mergedTranscripts[f].chromosomeName() != lastChr) {
lastChr = mergedTranscripts[f].chromosomeName();
lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
}
long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, mergedTranscripts[f].location().packedPosition()));
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
// Get the length of the overlap
int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), mergedTranscripts[f].location().end()) - Math.max(SequenceRead.start(reads[r]), mergedTranscripts[f].location().start()));
transcriptBaseCounts[s] += overlap;
if (SequenceRead.strand(reads[r]) == mergedTranscripts[f].location().strand()) {
transcriptSameStrandBaseCounts[s] += overlap;
} else {
transcriptOpposingStrandBaseCounts[s] += overlap;
}
}
}
}
}
// Quantitate the rRNA
long[] rRNABaseCounts = null;
if (rRNAFeatureName != null) {
rRNABaseCounts = new long[stores.length];
for (int s = 0; s < stores.length; s++) {
updateProgress("Quantitating " + stores[s].name() + " over rRNAs", s + (stores.length * 2), stores.length * 4);
String lastChr = "";
Chromosome lastChrObject = null;
for (int f = 0; f < rRNAFeatures.length; f++) {
if (rRNAFeatures[f].chromosomeName() != lastChr) {
lastChr = rRNAFeatures[f].chromosomeName();
lastChrObject = collection.genome().getExactChromsomeNameMatch(lastChr);
}
long[] reads = stores[s].getReadsForProbe(new Probe(lastChrObject, rRNAFeatures[f].location().packedPosition()));
for (int r = 0; r < reads.length; r++) {
if (cancel) {
progressCancelled();
return;
}
// Get the length of the overlap
int overlap = 1 + (Math.min(SequenceRead.end(reads[r]), rRNAFeatures[f].location().end()) - Math.max(SequenceRead.start(reads[r]), rRNAFeatures[f].location().start()));
rRNABaseCounts[s] += overlap;
}
}
}
}
// Quantitate the chromosomes
long[][] chromosomeBaseCounts = new long[chromosomes.length][stores.length];
for (int s = 0; s < stores.length; s++) {
for (int c = 0; c < chromosomes.length; c++) {
updateProgress("Quantitating " + stores[s].name() + " over " + chromosomes[c].name(), s + (stores.length * 3), stores.length * 4);
ReadsWithCounts reads = stores[s].getReadsForChromosome(chromosomes[c]);
for (int r = 0; r < reads.reads.length; r++) {
chromosomeBaseCounts[c][s] += SequenceRead.length(reads.reads[r]) * reads.counts[r];
}
}
}
// Finally we make up the data sets we're going to pass back.
RNAQCResult result = new RNAQCResult(stores);
double[] percentInGene = new double[stores.length];
for (int i = 0; i < geneBaseCounts.length; i++) {
percentInGene[i] = (geneBaseCounts[i] / (double) stores[i].getTotalReadLength()) * 100;
if (percentInGene[i] > 100) {
progressWarning("Percent in gene was >100 for " + stores[i]);
percentInGene[i] = 100;
}
}
result.addPercentageSet("Percent in Gene", percentInGene);
double[] percentInTranscript = null;
if (transcriptFeatureName != null) {
percentInTranscript = new double[stores.length];
for (int i = 0; i < geneBaseCounts.length; i++) {
percentInTranscript[i] = (transcriptBaseCounts[i] / (double) geneBaseCounts[i]) * 100;
if (percentInTranscript[i] > 100) {
progressWarning("Percent in exons was >100 for " + stores[i]);
percentInTranscript[i] = 100;
}
}
result.addPercentageSet("Percent in exons", percentInTranscript);
}
double[] percentInrRNA = null;
if (rRNAFeatureName != null) {
percentInrRNA = new double[stores.length];
for (int i = 0; i < rRNABaseCounts.length; i++) {
percentInrRNA[i] = (rRNABaseCounts[i] / (double) stores[i].getTotalReadLength()) * 100;
if (percentInrRNA[i] > 100) {
progressWarning("Percent in rRNA was >100 for " + stores[i]);
percentInrRNA[i] = 100;
}
}
result.addPercentageSet("Percent in rRNA", percentInrRNA);
}
double[] percentageMeasuredGenes = new double[stores.length];
for (int i = 0; i < measuredGenesCounts.length; i++) {
percentageMeasuredGenes[i] = measuredGenesCounts[i] / (double) mergedGenes.length * 100;
}
result.addPercentageSet("Percent Genes Measured", percentageMeasuredGenes);
// Work out the relative coverage
double[] percentageOfMaxCoverage = new double[stores.length];
long maxLength = 0;
for (int i = 0; i < stores.length; i++) {
if (stores[i].getTotalReadLength() > maxLength)
maxLength = stores[i].getTotalReadLength();
}
for (int i = 0; i < stores.length; i++) {
percentageOfMaxCoverage[i] = (stores[i].getTotalReadLength() * 100d) / maxLength;
}
result.addPercentageSet("Percentage of max data size", percentageOfMaxCoverage);
double[][] percentInChromosomes = new double[chromosomes.length][stores.length];
for (int c = 0; c < percentInChromosomes.length; c++) {
for (int i = 0; i < chromosomeBaseCounts[c].length; i++) {
percentInChromosomes[c][i] = (chromosomeBaseCounts[c][i] / (double) stores[i].getTotalReadLength()) * 100;
if (percentInChromosomes[c][i] > 100) {
progressWarning("Percent in " + chromosomes[c] + " was >100 for " + stores[i]);
percentInChromosomes[c][i] = 100;
}
}
}
for (int c = 0; c < percentInChromosomes.length; c++) {
result.addPercentageSet("Percent in " + chromosomes[c].name(), percentInChromosomes[c]);
}
double[] percentOnSenseStrand = null;
if (transcriptFeatureName != null) {
percentOnSenseStrand = new double[stores.length];
for (int i = 0; i < transcriptBaseCounts.length; i++) {
percentOnSenseStrand[i] = (transcriptSameStrandBaseCounts[i] / (double) transcriptBaseCounts[i]) * 100;
if (percentOnSenseStrand[i] > 100) {
progressWarning("Percent on sense strand was >100 for " + stores[i]);
percentOnSenseStrand[i] = 100;
}
}
result.addPercentageSet("Percent on sense strand", percentOnSenseStrand);
}
progressComplete(result);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class GFF3AnnotationParser method parseAnnotation.
public AnnotationSet[] parseAnnotation(File file, Genome genome, String prefix) throws Exception {
System.err.println("Parsing " + file);
if (prefix == null) {
featurePrefix = JOptionPane.showInputDialog(SeqMonkApplication.getInstance(), "Feature prefix", "GFFv3/GTP Options", JOptionPane.QUESTION_MESSAGE);
} else {
featurePrefix = prefix;
}
if (featurePrefix == null)
featurePrefix = "";
Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
AnnotationSet currentAnnotation = new AnnotationSet(genome, file.getName());
annotationSets.add(currentAnnotation);
Hashtable<String, FeatureGroup> groupedFeatures = new Hashtable<String, FeatureGroup>();
BufferedReader br;
if (file.getName().toLowerCase().endsWith(".gz")) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
} else {
br = new BufferedReader(new FileReader(file));
}
String line;
int count = 0;
while ((line = br.readLine()) != null) {
if (cancel) {
progressCancelled();
br.close();
return null;
}
if (count % 1000 == 0) {
progressUpdated("Read " + count + " lines from " + file.getName(), 0, 1);
}
if (count > 1000000 && count % 1000000 == 0) {
progressUpdated("Caching...", 0, 1);
currentAnnotation.finalise();
currentAnnotation = new AnnotationSet(genome, file.getName() + "[" + annotationSets.size() + "]");
annotationSets.add(currentAnnotation);
}
++count;
// Ignore blank lines
if (line.trim().length() == 0)
continue;
// Skip comments
if (line.startsWith("#"))
continue;
String[] sections = line.split("\t");
// Check to see if we've got enough data to work with
if (sections.length < 7) {
progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
continue;
}
int strand;
int start;
int end;
try {
start = Integer.parseInt(sections[3]);
end = Integer.parseInt(sections[4]);
// End must always be later than start
if (end < start) {
int temp = start;
start = end;
end = temp;
}
if (sections.length >= 7) {
if (sections[6].equals("+")) {
strand = Location.FORWARD;
} else if (sections[6].equals("-")) {
strand = Location.REVERSE;
} else {
strand = Location.UNKNOWN;
}
} else {
strand = Location.UNKNOWN;
}
} catch (NumberFormatException e) {
progressWarningReceived(new SeqMonkException("Location " + sections[3] + "-" + sections[4] + " was not an integer"));
continue;
}
ChromosomeWithOffset c;
try {
c = genome.getChromosome(sections[0]);
} catch (IllegalArgumentException e) {
progressWarningReceived(new SeqMonkException("Couldn't find a chromosome called " + sections[0]));
continue;
}
start = c.position(start);
end = c.position(end);
// We also don't allow readings which are beyond the end of the chromosome
if (end > c.chromosome().length()) {
int overrun = end - c.chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
continue;
}
if (sections.length > 8 && sections[8].trim().length() > 0) {
// Should check for escaped colons
String[] attributes = sections[8].split(" *; *");
// Make up a data structure of the attributes we have
Hashtable<String, Vector<String>> keyValuePairs = new Hashtable<String, Vector<String>>();
for (int a = 0; a < attributes.length; a++) {
// Should check for escaped equals
String[] keyValue = attributes[a].split("=", 2);
// See if we didn't get split
if (keyValue.length == 1) {
// This could be a GTF file which uses quoted values in space delimited fields
keyValue = attributes[a].split(" \"");
if (keyValue.length == 2) {
// We need to remove the quote from the end of the value
keyValue[1] = keyValue[1].substring(0, keyValue[1].length() - 1);
// System.out.println("Key='"+keyValue[0]+"' value='"+keyValue[1]+"'");
}
}
if (keyValue.length == 2) {
if (keyValuePairs.containsKey(keyValue[0])) {
keyValuePairs.get(keyValue[0]).add(keyValue[1]);
} else {
Vector<String> newVector = new Vector<String>();
newVector.add(keyValue[1]);
keyValuePairs.put(keyValue[0], newVector);
}
} else {
progressWarningReceived(new SeqMonkException("No key value delimiter in " + attributes[a]));
}
}
if (keyValuePairs.containsKey("Parent") && !sections[2].equals("mRNA")) {
// We change exons to mRNA so we don't end up with spliced exon objects
if (sections[2].equals("exon"))
sections[2] = "mRNA";
String[] parents = keyValuePairs.get("Parent").elementAt(0).split(",");
for (int p = 0; p < parents.length; p++) {
if (!groupedFeatures.containsKey(sections[2] + "_" + parents[p])) {
// Make a new feature to which we can add this
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
groupedFeatures.put(sections[2] + "_" + parents[p], new FeatureGroup(feature, strand, feature.location()));
Enumeration<String> en = keyValuePairs.keys();
while (en.hasMoreElements()) {
String key = en.nextElement();
String[] values = keyValuePairs.get(key).toArray(new String[0]);
for (int v = 0; v < values.length; v++) {
feature.addAttribute(key, values[v]);
}
}
}
groupedFeatures.get(sections[2] + "_" + parents[p]).addSublocation(new Location(start, end, strand));
}
} else // parent feature
if (keyValuePairs.containsKey("transcript_id")) {
if (sections[2].equals("exon"))
sections[2] = "mRNA";
if (!groupedFeatures.containsKey(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0))) {
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
Enumeration<String> en = keyValuePairs.keys();
while (en.hasMoreElements()) {
String key = en.nextElement();
String[] values = keyValuePairs.get(key).toArray(new String[0]);
for (int v = 0; v < values.length; v++) {
feature.addAttribute(key, values[v]);
}
}
groupedFeatures.put(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0), new FeatureGroup(feature, strand, feature.location()));
}
groupedFeatures.get(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0)).addSublocation(new Location(start, end, strand));
} else {
// If we get here we're making a feature with attributes
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
feature.setLocation(new Location(start, end, strand));
Enumeration<String> en = keyValuePairs.keys();
while (en.hasMoreElements()) {
String key = en.nextElement();
String[] values = keyValuePairs.get(key).toArray(new String[0]);
for (int v = 0; v < values.length; v++) {
feature.addAttribute(key, values[v]);
}
}
if (keyValuePairs.containsKey("ID")) {
// This is a feature which may end up having subfeatures
groupedFeatures.put(sections[2] + "_" + keyValuePairs.get("ID").elementAt(0), new FeatureGroup(feature, strand, feature.location()));
// System.out.println("Making new entry for "+keyValuePairs.get("ID").elementAt(0));
} else {
// We can just add this to the annotation collection
currentAnnotation.addFeature(feature);
}
}
} else {
// No group parameter to worry about
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
feature.setLocation(new Location(start, end, strand));
currentAnnotation.addFeature(feature);
}
}
br.close();
// Now go through the grouped features adding them to the annotation set
Iterator<FeatureGroup> i = groupedFeatures.values().iterator();
while (i.hasNext()) {
Feature f = i.next().feature();
currentAnnotation.addFeature(f);
}
return annotationSets.toArray(new AnnotationSet[0]);
}
Aggregations