Search in sources :

Example 21 with Feature

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.

the class FeatureMerging method getNonOverlappingLocationsForFeatures.

public static Feature[] getNonOverlappingLocationsForFeatures(Feature[] features, boolean useSubFeatures) {
    // Start by splitting these into chromosomes
    Hashtable<String, Vector<Feature>> chrs = new Hashtable<String, Vector<Feature>>();
    for (int f = 0; f < features.length; f++) {
        if (!chrs.containsKey(features[f].chromosomeName())) {
            chrs.put(features[f].chromosomeName(), new Vector<Feature>());
        }
        chrs.get(features[f].chromosomeName()).add(features[f]);
    }
    // Now we build up a set of features for each chromosome
    Vector<Feature> returnFeatures = new Vector<Feature>();
    Enumeration<String> chrNames = chrs.keys();
    while (chrNames.hasMoreElements()) {
        String chr = chrNames.nextElement();
        Location[] locs = getNonOverlappingFeaturesWithinChromosome(chrs.get(chr).toArray(new Feature[0]), useSubFeatures);
        for (int l = 0; l < locs.length; l++) {
            Feature f = new Feature("merged_location", chr);
            f.setLocation(locs[l]);
            returnFeatures.add(f);
        }
    }
    return (returnFeatures.toArray(new Feature[0]));
}
Also used : Hashtable(java.util.Hashtable) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 22 with Feature

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.

the class SeqMonkParser method parseAnnotation.

/**
 * Parses an external set of annotations
 *
 * @param sections The tab split initial annotation line
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private AnnotationSet parseAnnotation(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 3) {
        throw new SeqMonkException("Annotation line didn't contain 3 sections");
    }
    AnnotationSet set = new AnnotationSet(application.dataCollection().genome(), sections[1]);
    int featureCount = Integer.parseInt(sections[2]);
    for (int i = 0; i < featureCount; i++) {
        if (i % 1000 == 0) {
            progressUpdated("Parsing annotation in " + set.name(), i, featureCount);
        }
        sections = br.readLine().split("\\t");
        Chromosome c;
        try {
            c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
        } catch (Exception sme) {
            Enumeration<ProgressListener> e = listeners.elements();
            while (e.hasMoreElements()) {
                e.nextElement().progressWarningReceived(new SeqMonkException("Annotation feature could not be mapped to chromosome '" + sections[1] + "'"));
            }
            continue;
        }
        Feature f = new Feature(sections[0], c.name());
        // TODO: Can we improve this to not use a Split Location each time?
        f.setLocation(new SplitLocation(sections[2]));
        for (int a = 3; a + 1 < sections.length; a += 2) {
            f.addAttribute(sections[a], sections[a + 1]);
        }
        set.addFeature(f);
    }
    set.finalise();
    return set;
}
Also used : Enumeration(java.util.Enumeration) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) FileNotFoundException(java.io.FileNotFoundException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 23 with Feature

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.

the class VisibleStoresParser method processNormalDataStore.

private DataSet processNormalDataStore(DataStore store) {
    int extendBy = prefs.extendReads();
    boolean reverse = prefs.reverseReads();
    boolean removeStrand = prefs.removeStrandInfo();
    DataSet newData = new DataSet(store.name() + "_reimport", "Reimported from " + store.name(), prefs.removeDuplicates());
    // Now process the data
    Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Processing " + store.name() + " chr " + chrs[c].name(), c, chrs.length);
        ReadsWithCounts reads = store.getReadsForChromosome(chrs[c]);
        Feature[] features = null;
        if (filterByFeature) {
            features = collection.genome().annotationCollection().getFeaturesForType(chrs[c], featureType);
            Arrays.sort(features);
        }
        int currentFeaturePostion = 0;
        for (int r = 0; r < reads.reads.length; r++) {
            for (int ct = 0; ct < reads.counts[r]; ct++) {
                long thisRead = reads.reads[r];
                if (cancel) {
                    progressCancelled();
                    return null;
                }
                if (downsample && downsampleProbabilty < 1) {
                    if (Math.random() > downsampleProbabilty) {
                        continue;
                    }
                }
                long read;
                int start = SequenceRead.start(thisRead);
                int end = SequenceRead.end(thisRead);
                int strand = SequenceRead.strand(thisRead);
                if (filterByStrand) {
                    if (strand == Location.FORWARD && !keepForward)
                        continue;
                    if (strand == Location.REVERSE && !keepReverse)
                        continue;
                    if (strand == Location.UNKNOWN && !keepUnknown)
                        continue;
                }
                if (filterByLength) {
                    int length = SequenceRead.length(thisRead);
                    if (minLength != null && length < minLength)
                        continue;
                    if (maxLength != null && length > maxLength)
                        continue;
                }
                if (strand == Location.FORWARD) {
                    start += forwardOffset;
                    end += forwardOffset;
                }
                if (strand == Location.REVERSE) {
                    start -= reverseOffset;
                    end -= reverseOffset;
                }
                if (filterByFeature && features.length == 0 && !excludeFeature)
                    continue;
                if (filterByFeature && features.length > 0) {
                    // See if we're comparing against the right feature
                    while (SequenceRead.start(thisRead) > features[currentFeaturePostion].location().end() && currentFeaturePostion < (features.length - 1)) {
                        currentFeaturePostion++;
                    }
                    // Test to see if we overlap
                    if (SequenceRead.overlaps(thisRead, features[currentFeaturePostion].location().packedPosition())) {
                        if (excludeFeature)
                            continue;
                    } else {
                        if (!excludeFeature)
                            continue;
                    }
                }
                if (reverse) {
                    if (strand == Location.FORWARD) {
                        strand = Location.REVERSE;
                    } else if (strand == Location.REVERSE) {
                        strand = Location.FORWARD;
                    }
                }
                if (removeStrand) {
                    strand = Location.UNKNOWN;
                }
                if (extractCentres) {
                    int centre = start + ((end - start) / 2);
                    start = centre - centreExtractContext;
                    end = centre + centreExtractContext;
                }
                if (extendBy != 0) {
                    // We now allow negative extensions to shorten reads
                    if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
                        end += extendBy;
                        if (end < start)
                            end = start;
                    } else if (strand == Location.REVERSE) {
                        start -= extendBy;
                        if (start > end)
                            start = end;
                    }
                }
                // We don't allow reads before the start of the chromosome
                if (start < 1) {
                    int overrun = (0 - start) + 1;
                    progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                    continue;
                }
                // We also don't allow readings which are beyond the end of the chromosome
                if (end > chrs[c].length()) {
                    int overrun = end - chrs[c].length();
                    progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                    continue;
                }
                // We can now make the new reading
                try {
                    read = SequenceRead.packPosition(start, end, strand);
                    if (!prefs.isHiC()) {
                        // HiC additions are deferred until we know the other end is OK too.
                        newData.addData(chrs[c], read);
                    }
                } catch (SeqMonkException e) {
                    progressWarningReceived(e);
                    continue;
                }
            }
        }
    }
    return newData;
}
Also used : DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) PairedDataSet(uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ReadsWithCounts(uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)

Example 24 with Feature

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.

the class FeatureListViewer method mouseClicked.

/* (non-Javadoc)
	 * @see java.awt.event.MouseListener#mouseClicked(java.awt.event.MouseEvent)
	 */
public void mouseClicked(MouseEvent me) {
    // We're only interested in double clicks
    if (me.getClickCount() != 2)
        return;
    // This is only linked from the report JTable
    JTable t = (JTable) me.getSource();
    int r = t.getSelectedRow();
    Feature f = (Feature) t.getValueAt(r, 0);
    DisplayPreferences.getInstance().setLocation(application.dataCollection().genome().getChromosome(f.chromosomeName()).chromosome(), SequenceRead.packPosition(f.location().start(), f.location().end(), Location.UNKNOWN));
}
Also used : JTable(javax.swing.JTable) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)

Example 25 with Feature

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.

the class FindFeaturesByNameDialog method actionPerformed.

/* (non-Javadoc)
	 * @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
	 */
public void actionPerformed(ActionEvent ae) {
    if (ae.getActionCommand().equals("close")) {
        setVisible(false);
        dispose();
    } else if (ae.getActionCommand().equals("search")) {
        Thread t = new Thread(this);
        t.start();
    } else if (ae.getActionCommand().equals("save_annotation_all")) {
        // Find a name for the type of feature they want to create
        String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, "Name matched features");
        // They cancelled
        if (name == null)
            return;
        // Now we can go ahead and make the new annotation set
        AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), "Named features search results");
        for (int f = 0; f < lastHits.length; f++) {
            Feature feature = new Feature(name, lastHits[f].chromosomeName());
            feature.setLocation(lastHits[f].location());
            AnnotationTagValue[] tags = lastHits[f].getAnnotationTagValues();
            for (int t = 0; t < tags.length; t++) {
                feature.addAttribute(tags[t].tag(), tags[t].value());
            }
            searchAnnotations.addFeature(feature);
        }
        dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
    } else if (ae.getActionCommand().equals("save_annotation_selected")) {
        Feature[] selectedHits = viewer.getSelectedFeatures();
        if (selectedHits.length == 0) {
            JOptionPane.showMessageDialog(this, "There are no selected features from which to make a track", "Can't make track", JOptionPane.INFORMATION_MESSAGE);
            return;
        }
        // Find a name for the type of feature they want to create
        String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, "Selected name matched features");
        // They cancelled
        if (name == null)
            return;
        // Now we can go ahead and make the new annotation set
        AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), "Named features search results");
        for (int f = 0; f < selectedHits.length; f++) {
            Feature feature = new Feature(name, selectedHits[f].chromosomeName());
            feature.setLocation(selectedHits[f].location());
            AnnotationTagValue[] tags = selectedHits[f].getAnnotationTagValues();
            for (int t = 0; t < tags.length; t++) {
                feature.addAttribute(tags[t].tag(), tags[t].value());
            }
            searchAnnotations.addFeature(feature);
        }
        dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
    }
}
Also used : AnnotationTagValue(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationTagValue) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)

Aggregations

Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)37 Vector (java.util.Vector)23 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)23 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)20 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)13 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)11 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)9 AnnotationSet (uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet)7 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)7 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)5 Hashtable (java.util.Hashtable)4 AbstractTableModel (javax.swing.table.AbstractTableModel)4 TableModel (javax.swing.table.TableModel)4 LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)4 BufferedReader (java.io.BufferedReader)3 FileReader (java.io.FileReader)3 Enumeration (java.util.Enumeration)3 HashSet (java.util.HashSet)3 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)3 FileInputStream (java.io.FileInputStream)2