use of uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet in project SeqMonk by s-andrews.
the class GFF3AnnotationParser method parseAnnotation.
public AnnotationSet[] parseAnnotation(File file, Genome genome, String prefix) throws Exception {
System.err.println("Parsing " + file);
if (prefix == null) {
featurePrefix = JOptionPane.showInputDialog(SeqMonkApplication.getInstance(), "Feature prefix", "GFFv3/GTP Options", JOptionPane.QUESTION_MESSAGE);
} else {
featurePrefix = prefix;
}
if (featurePrefix == null)
featurePrefix = "";
Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
AnnotationSet currentAnnotation = new AnnotationSet(genome, file.getName());
annotationSets.add(currentAnnotation);
Hashtable<String, FeatureGroup> groupedFeatures = new Hashtable<String, FeatureGroup>();
BufferedReader br;
if (file.getName().toLowerCase().endsWith(".gz")) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
} else {
br = new BufferedReader(new FileReader(file));
}
String line;
int count = 0;
while ((line = br.readLine()) != null) {
if (cancel) {
progressCancelled();
br.close();
return null;
}
if (count % 1000 == 0) {
progressUpdated("Read " + count + " lines from " + file.getName(), 0, 1);
}
if (count > 1000000 && count % 1000000 == 0) {
progressUpdated("Caching...", 0, 1);
currentAnnotation.finalise();
currentAnnotation = new AnnotationSet(genome, file.getName() + "[" + annotationSets.size() + "]");
annotationSets.add(currentAnnotation);
}
++count;
// Ignore blank lines
if (line.trim().length() == 0)
continue;
// Skip comments
if (line.startsWith("#"))
continue;
String[] sections = line.split("\t");
// Check to see if we've got enough data to work with
if (sections.length < 7) {
progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
continue;
}
int strand;
int start;
int end;
try {
start = Integer.parseInt(sections[3]);
end = Integer.parseInt(sections[4]);
// End must always be later than start
if (end < start) {
int temp = start;
start = end;
end = temp;
}
if (sections.length >= 7) {
if (sections[6].equals("+")) {
strand = Location.FORWARD;
} else if (sections[6].equals("-")) {
strand = Location.REVERSE;
} else {
strand = Location.UNKNOWN;
}
} else {
strand = Location.UNKNOWN;
}
} catch (NumberFormatException e) {
progressWarningReceived(new SeqMonkException("Location " + sections[3] + "-" + sections[4] + " was not an integer"));
continue;
}
ChromosomeWithOffset c;
try {
c = genome.getChromosome(sections[0]);
} catch (IllegalArgumentException e) {
progressWarningReceived(new SeqMonkException("Couldn't find a chromosome called " + sections[0]));
continue;
}
start = c.position(start);
end = c.position(end);
// We also don't allow readings which are beyond the end of the chromosome
if (end > c.chromosome().length()) {
int overrun = end - c.chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
continue;
}
if (sections.length > 8 && sections[8].trim().length() > 0) {
// Should check for escaped colons
String[] attributes = sections[8].split(" *; *");
// Make up a data structure of the attributes we have
Hashtable<String, Vector<String>> keyValuePairs = new Hashtable<String, Vector<String>>();
for (int a = 0; a < attributes.length; a++) {
// Should check for escaped equals
String[] keyValue = attributes[a].split("=", 2);
// See if we didn't get split
if (keyValue.length == 1) {
// This could be a GTF file which uses quoted values in space delimited fields
keyValue = attributes[a].split(" \"");
if (keyValue.length == 2) {
// We need to remove the quote from the end of the value
keyValue[1] = keyValue[1].substring(0, keyValue[1].length() - 1);
// System.out.println("Key='"+keyValue[0]+"' value='"+keyValue[1]+"'");
}
}
if (keyValue.length == 2) {
if (keyValuePairs.containsKey(keyValue[0])) {
keyValuePairs.get(keyValue[0]).add(keyValue[1]);
} else {
Vector<String> newVector = new Vector<String>();
newVector.add(keyValue[1]);
keyValuePairs.put(keyValue[0], newVector);
}
} else {
progressWarningReceived(new SeqMonkException("No key value delimiter in " + attributes[a]));
}
}
if (keyValuePairs.containsKey("Parent") && !sections[2].equals("mRNA")) {
// We change exons to mRNA so we don't end up with spliced exon objects
if (sections[2].equals("exon"))
sections[2] = "mRNA";
String[] parents = keyValuePairs.get("Parent").elementAt(0).split(",");
for (int p = 0; p < parents.length; p++) {
if (!groupedFeatures.containsKey(sections[2] + "_" + parents[p])) {
// Make a new feature to which we can add this
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
groupedFeatures.put(sections[2] + "_" + parents[p], new FeatureGroup(feature, strand, feature.location()));
Enumeration<String> en = keyValuePairs.keys();
while (en.hasMoreElements()) {
String key = en.nextElement();
String[] values = keyValuePairs.get(key).toArray(new String[0]);
for (int v = 0; v < values.length; v++) {
feature.addAttribute(key, values[v]);
}
}
}
groupedFeatures.get(sections[2] + "_" + parents[p]).addSublocation(new Location(start, end, strand));
}
} else // parent feature
if (keyValuePairs.containsKey("transcript_id")) {
if (sections[2].equals("exon"))
sections[2] = "mRNA";
if (!groupedFeatures.containsKey(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0))) {
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
Enumeration<String> en = keyValuePairs.keys();
while (en.hasMoreElements()) {
String key = en.nextElement();
String[] values = keyValuePairs.get(key).toArray(new String[0]);
for (int v = 0; v < values.length; v++) {
feature.addAttribute(key, values[v]);
}
}
groupedFeatures.put(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0), new FeatureGroup(feature, strand, feature.location()));
}
groupedFeatures.get(sections[2] + "_" + keyValuePairs.get("transcript_id").elementAt(0)).addSublocation(new Location(start, end, strand));
} else {
// If we get here we're making a feature with attributes
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
feature.setLocation(new Location(start, end, strand));
Enumeration<String> en = keyValuePairs.keys();
while (en.hasMoreElements()) {
String key = en.nextElement();
String[] values = keyValuePairs.get(key).toArray(new String[0]);
for (int v = 0; v < values.length; v++) {
feature.addAttribute(key, values[v]);
}
}
if (keyValuePairs.containsKey("ID")) {
// This is a feature which may end up having subfeatures
groupedFeatures.put(sections[2] + "_" + keyValuePairs.get("ID").elementAt(0), new FeatureGroup(feature, strand, feature.location()));
// System.out.println("Making new entry for "+keyValuePairs.get("ID").elementAt(0));
} else {
// We can just add this to the annotation collection
currentAnnotation.addFeature(feature);
}
}
} else {
// No group parameter to worry about
Feature feature = new Feature(featurePrefix + sections[2], c.chromosome().name());
feature.setLocation(new Location(start, end, strand));
currentAnnotation.addFeature(feature);
}
}
br.close();
// Now go through the grouped features adding them to the annotation set
Iterator<FeatureGroup> i = groupedFeatures.values().iterator();
while (i.hasNext()) {
Feature f = i.next().feature();
currentAnnotation.addFeature(f);
}
return annotationSets.toArray(new AnnotationSet[0]);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet in project SeqMonk by s-andrews.
the class GenomeParser method parseGenomeFiles.
private void parseGenomeFiles(SingleGenome genome, File baseLocation) {
// which defines the size and extent of the chromosomes
try {
parseChrListFile(genome, baseLocation);
} catch (Exception ex) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressExceptionReceived(ex);
}
return;
}
// We need a list of all of the .dat files inside the baseLocation
File[] files = baseLocation.listFiles(new FileFilter() {
public boolean accept(File f) {
if (f.getName().toLowerCase().endsWith(".dat")) {
return true;
} else {
return false;
}
}
});
AnnotationSet coreAnnotation = new CoreAnnotationSet(genome);
for (int i = 0; i < files.length; i++) {
// Update the listeners
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Loading Genome File " + files[i].getName(), i, files.length);
}
try {
processEMBLFile(files[i], coreAnnotation, genome);
} catch (Exception ex) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressExceptionReceived(ex);
}
return;
}
}
// Update the listeners
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Caching annotation data", 1, 1);
}
// Now do the same thing for gff files.
// We need a list of all of the .gff/gtf files inside the baseLocation
files = baseLocation.listFiles(new FileFilter() {
public boolean accept(File f) {
if (f.getName().toLowerCase().endsWith(".gff") || f.getName().toLowerCase().endsWith(".gtf") || f.getName().toLowerCase().endsWith(".gff3") || f.getName().toLowerCase().endsWith(".gff.gz") || f.getName().toLowerCase().endsWith(".gtf.gz") || f.getName().toLowerCase().endsWith(".gff3.gz")) {
return true;
} else {
return false;
}
}
});
GFF3AnnotationParser gffParser = new GFF3AnnotationParser(genome);
for (int i = 0; i < files.length; i++) {
// System.err.println("Parsing "+files[i]);
// Update the listeners
e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Loading Genome File " + files[i].getName(), i, files.length);
}
try {
AnnotationSet[] newSets = gffParser.parseAnnotation(files[i], genome, "");
for (int s = 0; s < newSets.length; s++) {
Feature[] features = newSets[s].getAllFeatures();
for (int f = 0; f < features.length; f++) {
coreAnnotation.addFeature(features[f]);
}
}
} catch (Exception ex) {
Enumeration<ProgressListener> en = listeners.elements();
while (en.hasMoreElements()) {
en.nextElement().progressExceptionReceived(ex);
}
return;
}
}
// Update the listeners
e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressUpdated("Caching annotation data", 1, 1);
}
genome.annotationCollection().addAnnotationSets(new AnnotationSet[] { coreAnnotation });
// Debugging - put out some stats
// System.err.println("Made genome with "+genome.getAllChromosomes().length+" chromosomes");
// System.err.println("There are "+genome.annotationCollection().listAvailableFeatureTypes().length+" different feature types");
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet in project SeqMonk by s-andrews.
the class ProbeListAnnotationParser method parseAnnotation.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.AnnotationParsers.AnnotationParser#parseAnnotation(java.io.File, uk.ac.babraham.SeqMonk.DataTypes.Genome.Genome)
*/
protected AnnotationSet[] parseAnnotation(File file, Genome genome) throws Exception {
Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
AnnotationSet currentAnnotation = new AnnotationSet(genome, probeList.name());
annotationSets.add(currentAnnotation);
Probe[] probes = probeList.getAllProbes();
for (int p = 0; p < probes.length; p++) {
if (p % 1 + (probes.length / 100) == 0) {
progressUpdated("Converted " + p + " probes", p, probes.length);
}
if (p > 1000000 && p % 1000000 == 0) {
progressUpdated("Caching...", 0, 1);
currentAnnotation.finalise();
currentAnnotation = new AnnotationSet(genome, probeList.name() + "[" + annotationSets.size() + "]");
annotationSets.add(currentAnnotation);
}
Feature feature = new Feature(featureType, probes[p].chromosome().name());
if (probes[p].hasDefinedName()) {
feature.addAttribute("name", probes[p].name());
}
feature.setLocation(new Location(probes[p].start(), probes[p].end(), probes[p].strand()));
currentAnnotation.addFeature(feature);
}
return annotationSets.toArray(new AnnotationSet[0]);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet in project SeqMonk by s-andrews.
the class DataTreeRenderer method getTreeCellRendererComponent.
/* (non-Javadoc)
* @see javax.swing.tree.DefaultTreeCellRenderer#getTreeCellRendererComponent(javax.swing.JTree, java.lang.Object, boolean, boolean, boolean, int, boolean)
*/
public Component getTreeCellRendererComponent(JTree tree, Object value, boolean selected, boolean expanded, boolean leaf, int row, boolean hasFocus) {
if (value instanceof DataSet) {
JLabel label = new JLabel(value.toString(), dataSetIcon, JLabel.LEFT);
if (value instanceof HiCDataStore && ((HiCDataStore) value).isValidHiC()) {
label.setText("[HiC] " + label.getText());
}
if (selected) {
label.setOpaque(true);
label.setBackground(Color.LIGHT_GRAY);
}
return label;
} else if (value instanceof DataGroup) {
JLabel label = new JLabel(value.toString(), dataGroupIcon, JLabel.LEFT);
if (value instanceof HiCDataStore && ((HiCDataStore) value).isValidHiC()) {
label.setText("[HiC] " + label.getText());
}
if (selected) {
label.setOpaque(true);
label.setBackground(Color.LIGHT_GRAY);
}
return label;
} else if (value instanceof ReplicateSet) {
JLabel label = new JLabel(value.toString(), replicateSetIcon, JLabel.LEFT);
if (value instanceof HiCDataStore && ((HiCDataStore) value).isValidHiC()) {
label.setText("[HiC] " + label.getText());
}
if (selected) {
label.setOpaque(true);
label.setBackground(Color.LIGHT_GRAY);
}
return label;
} else if (value instanceof ProbeList) {
JLabel label = new JLabel(value.toString(), probeListIcon, JLabel.LEFT);
if (selected) {
label.setOpaque(true);
label.setBackground(Color.LIGHT_GRAY);
}
return label;
} else if (value instanceof AnnotationSet) {
JLabel label = new JLabel(value.toString(), annotationSetIcon, JLabel.LEFT);
if (selected) {
label.setOpaque(true);
label.setBackground(Color.LIGHT_GRAY);
}
return label;
} else {
return super.getTreeCellRendererComponent(tree, value, selected, expanded, leaf, row, hasFocus);
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet in project SeqMonk by s-andrews.
the class FindFeatureDialog method actionPerformed.
/* (non-Javadoc)
* @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
*/
public void actionPerformed(ActionEvent ae) {
if (ae.getActionCommand().equals("close")) {
setVisible(false);
dispose();
} else if (ae.getActionCommand().equals("search")) {
Thread t = new Thread(this);
t.start();
} else if (ae.getActionCommand().equals("save_annotation_all")) {
// Find a name for the type of feature they want to create
String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, search.getText() + " " + featureType.getSelectedItem() + " search");
// They cancelled
if (name == null)
return;
// Now we can go ahead and make the new annotation set
AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), search.getText() + " " + featureType.getSelectedItem() + " search");
for (int f = 0; f < lastHits.length; f++) {
Feature feature = new Feature(name, lastHits[f].chromosomeName());
feature.setLocation(lastHits[f].location());
AnnotationTagValue[] tags = lastHits[f].getAnnotationTagValues();
for (int t = 0; t < tags.length; t++) {
feature.addAttribute(tags[t].tag(), tags[t].value());
}
searchAnnotations.addFeature(feature);
}
dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
} else if (ae.getActionCommand().equals("save_annotation_selected")) {
Feature[] selectedHits = viewer.getSelectedFeatures();
if (selectedHits.length == 0) {
JOptionPane.showMessageDialog(this, "There are no selected features from which to make a track", "Can't make track", JOptionPane.INFORMATION_MESSAGE);
return;
}
// Find a name for the type of feature they want to create
String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, "selected " + search.getText());
// They cancelled
if (name == null)
return;
// Now we can go ahead and make the new annotation set
AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), search.getText() + " search results");
for (int f = 0; f < selectedHits.length; f++) {
Feature feature = new Feature(name, selectedHits[f].chromosomeName());
feature.setLocation(selectedHits[f].location());
AnnotationTagValue[] tags = selectedHits[f].getAnnotationTagValues();
for (int t = 0; t < tags.length; t++) {
feature.addAttribute(tags[t].tag(), tags[t].value());
}
searchAnnotations.addFeature(feature);
}
dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
}
}
Aggregations