use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class FeatureMerging method getNonOverlappingLocationsForFeatures.
public static Feature[] getNonOverlappingLocationsForFeatures(Feature[] features, boolean useSubFeatures) {
// Start by splitting these into chromosomes
Hashtable<String, Vector<Feature>> chrs = new Hashtable<String, Vector<Feature>>();
for (int f = 0; f < features.length; f++) {
if (!chrs.containsKey(features[f].chromosomeName())) {
chrs.put(features[f].chromosomeName(), new Vector<Feature>());
}
chrs.get(features[f].chromosomeName()).add(features[f]);
}
// Now we build up a set of features for each chromosome
Vector<Feature> returnFeatures = new Vector<Feature>();
Enumeration<String> chrNames = chrs.keys();
while (chrNames.hasMoreElements()) {
String chr = chrNames.nextElement();
Location[] locs = getNonOverlappingFeaturesWithinChromosome(chrs.get(chr).toArray(new Feature[0]), useSubFeatures);
for (int l = 0; l < locs.length; l++) {
Feature f = new Feature("merged_location", chr);
f.setLocation(locs[l]);
returnFeatures.add(f);
}
}
return (returnFeatures.toArray(new Feature[0]));
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class SeqMonkParser method parseAnnotation.
/**
* Parses an external set of annotations
*
* @param sections The tab split initial annotation line
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private AnnotationSet parseAnnotation(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 3) {
throw new SeqMonkException("Annotation line didn't contain 3 sections");
}
AnnotationSet set = new AnnotationSet(application.dataCollection().genome(), sections[1]);
int featureCount = Integer.parseInt(sections[2]);
for (int i = 0; i < featureCount; i++) {
if (i % 1000 == 0) {
progressUpdated("Parsing annotation in " + set.name(), i, featureCount);
}
sections = br.readLine().split("\\t");
Chromosome c;
try {
c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
} catch (Exception sme) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressWarningReceived(new SeqMonkException("Annotation feature could not be mapped to chromosome '" + sections[1] + "'"));
}
continue;
}
Feature f = new Feature(sections[0], c.name());
// TODO: Can we improve this to not use a Split Location each time?
f.setLocation(new SplitLocation(sections[2]));
for (int a = 3; a + 1 < sections.length; a += 2) {
f.addAttribute(sections[a], sections[a + 1]);
}
set.addFeature(f);
}
set.finalise();
return set;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class VisibleStoresParser method processNormalDataStore.
private DataSet processNormalDataStore(DataStore store) {
int extendBy = prefs.extendReads();
boolean reverse = prefs.reverseReads();
boolean removeStrand = prefs.removeStrandInfo();
DataSet newData = new DataSet(store.name() + "_reimport", "Reimported from " + store.name(), prefs.removeDuplicates());
// Now process the data
Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
progressUpdated("Processing " + store.name() + " chr " + chrs[c].name(), c, chrs.length);
ReadsWithCounts reads = store.getReadsForChromosome(chrs[c]);
Feature[] features = null;
if (filterByFeature) {
features = collection.genome().annotationCollection().getFeaturesForType(chrs[c], featureType);
Arrays.sort(features);
}
int currentFeaturePostion = 0;
for (int r = 0; r < reads.reads.length; r++) {
for (int ct = 0; ct < reads.counts[r]; ct++) {
long thisRead = reads.reads[r];
if (cancel) {
progressCancelled();
return null;
}
if (downsample && downsampleProbabilty < 1) {
if (Math.random() > downsampleProbabilty) {
continue;
}
}
long read;
int start = SequenceRead.start(thisRead);
int end = SequenceRead.end(thisRead);
int strand = SequenceRead.strand(thisRead);
if (filterByStrand) {
if (strand == Location.FORWARD && !keepForward)
continue;
if (strand == Location.REVERSE && !keepReverse)
continue;
if (strand == Location.UNKNOWN && !keepUnknown)
continue;
}
if (filterByLength) {
int length = SequenceRead.length(thisRead);
if (minLength != null && length < minLength)
continue;
if (maxLength != null && length > maxLength)
continue;
}
if (strand == Location.FORWARD) {
start += forwardOffset;
end += forwardOffset;
}
if (strand == Location.REVERSE) {
start -= reverseOffset;
end -= reverseOffset;
}
if (filterByFeature && features.length == 0 && !excludeFeature)
continue;
if (filterByFeature && features.length > 0) {
// See if we're comparing against the right feature
while (SequenceRead.start(thisRead) > features[currentFeaturePostion].location().end() && currentFeaturePostion < (features.length - 1)) {
currentFeaturePostion++;
}
// Test to see if we overlap
if (SequenceRead.overlaps(thisRead, features[currentFeaturePostion].location().packedPosition())) {
if (excludeFeature)
continue;
} else {
if (!excludeFeature)
continue;
}
}
if (reverse) {
if (strand == Location.FORWARD) {
strand = Location.REVERSE;
} else if (strand == Location.REVERSE) {
strand = Location.FORWARD;
}
}
if (removeStrand) {
strand = Location.UNKNOWN;
}
if (extractCentres) {
int centre = start + ((end - start) / 2);
start = centre - centreExtractContext;
end = centre + centreExtractContext;
}
if (extendBy != 0) {
// We now allow negative extensions to shorten reads
if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
end += extendBy;
if (end < start)
end = start;
} else if (strand == Location.REVERSE) {
start -= extendBy;
if (start > end)
start = end;
}
}
// We don't allow reads before the start of the chromosome
if (start < 1) {
int overrun = (0 - start) + 1;
progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// We also don't allow readings which are beyond the end of the chromosome
if (end > chrs[c].length()) {
int overrun = end - chrs[c].length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
continue;
}
// We can now make the new reading
try {
read = SequenceRead.packPosition(start, end, strand);
if (!prefs.isHiC()) {
// HiC additions are deferred until we know the other end is OK too.
newData.addData(chrs[c], read);
}
} catch (SeqMonkException e) {
progressWarningReceived(e);
continue;
}
}
}
}
return newData;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class FeatureListViewer method mouseClicked.
/* (non-Javadoc)
* @see java.awt.event.MouseListener#mouseClicked(java.awt.event.MouseEvent)
*/
public void mouseClicked(MouseEvent me) {
// We're only interested in double clicks
if (me.getClickCount() != 2)
return;
// This is only linked from the report JTable
JTable t = (JTable) me.getSource();
int r = t.getSelectedRow();
Feature f = (Feature) t.getValueAt(r, 0);
DisplayPreferences.getInstance().setLocation(application.dataCollection().genome().getChromosome(f.chromosomeName()).chromosome(), SequenceRead.packPosition(f.location().start(), f.location().end(), Location.UNKNOWN));
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class FindFeaturesByNameDialog method actionPerformed.
/* (non-Javadoc)
* @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
*/
public void actionPerformed(ActionEvent ae) {
if (ae.getActionCommand().equals("close")) {
setVisible(false);
dispose();
} else if (ae.getActionCommand().equals("search")) {
Thread t = new Thread(this);
t.start();
} else if (ae.getActionCommand().equals("save_annotation_all")) {
// Find a name for the type of feature they want to create
String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, "Name matched features");
// They cancelled
if (name == null)
return;
// Now we can go ahead and make the new annotation set
AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), "Named features search results");
for (int f = 0; f < lastHits.length; f++) {
Feature feature = new Feature(name, lastHits[f].chromosomeName());
feature.setLocation(lastHits[f].location());
AnnotationTagValue[] tags = lastHits[f].getAnnotationTagValues();
for (int t = 0; t < tags.length; t++) {
feature.addAttribute(tags[t].tag(), tags[t].value());
}
searchAnnotations.addFeature(feature);
}
dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
} else if (ae.getActionCommand().equals("save_annotation_selected")) {
Feature[] selectedHits = viewer.getSelectedFeatures();
if (selectedHits.length == 0) {
JOptionPane.showMessageDialog(this, "There are no selected features from which to make a track", "Can't make track", JOptionPane.INFORMATION_MESSAGE);
return;
}
// Find a name for the type of feature they want to create
String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, "Selected name matched features");
// They cancelled
if (name == null)
return;
// Now we can go ahead and make the new annotation set
AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), "Named features search results");
for (int f = 0; f < selectedHits.length; f++) {
Feature feature = new Feature(name, selectedHits[f].chromosomeName());
feature.setLocation(selectedHits[f].location());
AnnotationTagValue[] tags = selectedHits[f].getAnnotationTagValues();
for (int t = 0; t < tags.length; t++) {
feature.addAttribute(tags[t].tag(), tags[t].value());
}
searchAnnotations.addFeature(feature);
}
dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
}
}
Aggregations