Search in sources :

Example 1 with SplitLocation

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.

the class FeaturePositionSelectorPanel method getCoreProbes.

/**
 * Gets the set of locations for the core of each feature.  This wouldn't
 * include additional context added by the options, but would have subtracted
 * context removed by the options.
 *
 * @return
 */
public Probe[] getCoreProbes() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<Probe> newProbes = new Vector<Probe>();
    for (int c = 0; c < chromosomes.length; c++) {
        Vector<Feature> allFeatures = new Vector<Feature>();
        String[] selectedFeatureTypes = selectedFeatureTypes();
        for (int f = 0; f < selectedFeatureTypes.length; f++) {
            Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chromosomes[c], selectedFeatureTypes[f]);
            for (int i = 0; i < features.length; i++) {
                allFeatures.add(features[i]);
            }
        }
        Feature[] features = allFeatures.toArray(new Feature[0]);
        for (int f = 0; f < features.length; f++) {
            if (useSubFeatures()) {
                // We need to split this up so get the sub-features
                if (features[f].location() instanceof SplitLocation) {
                    SplitLocation location = (SplitLocation) features[f].location();
                    Location[] subLocations = location.subLocations();
                    if (useExonSubfeatures()) {
                        for (int s = 0; s < subLocations.length; s++) {
                            makeProbes(features[f], chromosomes[c], subLocations[s], newProbes, true);
                        }
                    } else {
                        // We're making introns
                        for (int s = 1; s < subLocations.length; s++) {
                            makeProbes(features[f], chromosomes[c], new Location(subLocations[s - 1].end() + 1, subLocations[s].start() - 1, features[f].location().strand()), newProbes, true);
                        }
                    }
                } else {
                    if (useExonSubfeatures()) {
                        // We can still make a single probe
                        makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, true);
                    }
                // If we're making introns then we're stuffed and we give up.
                }
            } else {
                makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, true);
            }
        }
    }
    Probe[] finalList = newProbes.toArray(new Probe[0]);
    if (removeDuplicates()) {
        finalList = removeDuplicates(finalList);
    }
    return finalList;
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Vector(java.util.Vector) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 2 with SplitLocation

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.

the class FeaturePercentileProbeGenerator method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<Probe> newProbes = new Vector<Probe>();
    for (int c = 0; c < chromosomes.length; c++) {
        // Time for an update
        updateGenerationProgress("Processed " + c + " chromosomes", c, chromosomes.length);
        Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chromosomes[c], featureType);
        for (int f = 0; f < features.length; f++) {
            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }
            if (useSubfeatures && (features[f].location() instanceof SplitLocation)) {
                SplitLocation location = (SplitLocation) features[f].location();
                Location[] subLocations = location.subLocations();
                for (int s = 0; s < subLocations.length; s++) {
                    makeProbes(features[f], chromosomes[c], subLocations[s], newProbes);
                }
            } else {
                makeProbes(features[f], chromosomes[c], features[f].location(), newProbes);
            }
        }
    }
    Probe[] finalList = newProbes.toArray(new Probe[0]);
    ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
    generationComplete(finalSet);
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Vector(java.util.Vector) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 3 with SplitLocation

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.

the class FeatureGroup method getSubLocations.

public Location[] getSubLocations() {
    if (features.size() == 1) {
        Location loc = features.elementAt(0).location();
        if (loc instanceof SplitLocation) {
            return ((SplitLocation) loc).subLocations();
        } else {
            return new Location[] { loc };
        }
    }
    LongVector allLocs = new LongVector();
    Enumeration<Feature> en = features.elements();
    while (en.hasMoreElements()) {
        Location loc = en.nextElement().location();
        if (loc instanceof SplitLocation) {
            Location[] subLocs = ((SplitLocation) loc).subLocations();
            for (int s = 0; s < subLocs.length; s++) {
                allLocs.add(subLocs[s].packedPosition());
            }
        } else {
            allLocs.add(loc.packedPosition());
        }
    }
    long[] locs = allLocs.toArray();
    SequenceRead.sort(locs);
    Vector<Location> mergedLocs = new Vector<Location>();
    long current = locs[0];
    for (int i = 1; i < locs.length; i++) {
        // if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
        if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
            // if (debug) {System.err.println("They overlap, extending...");}
            current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
        } else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
            // if (debug) {System.err.println("This is a subset, ignoring it");}
            continue;
        } else {
            // if (debug) {System.err.println("They don't overlap, moving on...");}
            mergedLocs.add(new Location(current));
            current = locs[i];
        }
    }
    mergedLocs.add(new Location(current));
    return mergedLocs.toArray(new Location[0]);
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 4 with SplitLocation

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.

the class FeatureMerging method getNonOverlappingFeaturesWithinChromosome.

private static Location[] getNonOverlappingFeaturesWithinChromosome(Feature[] features, boolean useSubFeatures) {
    // See if we can take some shortcuts
    if (features.length == 1) {
        Location loc = features[0].location();
        if (loc instanceof SplitLocation) {
            return ((SplitLocation) loc).subLocations();
        } else {
            return new Location[] { loc };
        }
    }
    LongVector allLocs = new LongVector();
    for (int f = 0; f < features.length; f++) {
        Location loc = features[f].location();
        if (loc instanceof SplitLocation) {
            Location[] subLocs = ((SplitLocation) loc).subLocations();
            for (int s = 0; s < subLocs.length; s++) {
                allLocs.add(subLocs[s].packedPosition());
            }
        } else {
            allLocs.add(loc.packedPosition());
        }
    }
    long[] locs = allLocs.toArray();
    SequenceRead.sort(locs);
    Vector<Location> mergedLocs = new Vector<Location>();
    long current = locs[0];
    for (int i = 1; i < locs.length; i++) {
        // if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
        if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
            // if (debug) {System.err.println("They overlap, extending...");}
            current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
        } else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
            // if (debug) {System.err.println("This is a subset, ignoring it");}
            continue;
        } else {
            // if (debug) {System.err.println("They don't overlap, moving on...");}
            mergedLocs.add(new Location(current));
            current = locs[i];
        }
    }
    mergedLocs.add(new Location(current));
    Location[] finalLocations = mergedLocs.toArray(new Location[0]);
    Arrays.sort(finalLocations);
    return mergedLocs.toArray(finalLocations);
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 5 with SplitLocation

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.

the class SeqMonkParser method parseAnnotation.

/**
 * Parses an external set of annotations
 *
 * @param sections The tab split initial annotation line
 * @throws SeqMonkException
 * @throws IOException Signals that an I/O exception has occurred.
 */
private AnnotationSet parseAnnotation(String[] sections) throws SeqMonkException, IOException {
    if (sections.length != 3) {
        throw new SeqMonkException("Annotation line didn't contain 3 sections");
    }
    AnnotationSet set = new AnnotationSet(application.dataCollection().genome(), sections[1]);
    int featureCount = Integer.parseInt(sections[2]);
    for (int i = 0; i < featureCount; i++) {
        if (i % 1000 == 0) {
            progressUpdated("Parsing annotation in " + set.name(), i, featureCount);
        }
        sections = br.readLine().split("\\t");
        Chromosome c;
        try {
            c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
        } catch (Exception sme) {
            Enumeration<ProgressListener> e = listeners.elements();
            while (e.hasMoreElements()) {
                e.nextElement().progressWarningReceived(new SeqMonkException("Annotation feature could not be mapped to chromosome '" + sections[1] + "'"));
            }
            continue;
        }
        Feature f = new Feature(sections[0], c.name());
        // TODO: Can we improve this to not use a Split Location each time?
        f.setLocation(new SplitLocation(sections[2]));
        for (int a = 3; a + 1 < sections.length; a += 2) {
            f.addAttribute(sections[a], sections[a + 1]);
        }
        set.addFeature(f);
    }
    set.finalise();
    return set;
}
Also used : Enumeration(java.util.Enumeration) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) FileNotFoundException(java.io.FileNotFoundException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Aggregations

SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)10 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)9 Vector (java.util.Vector)8 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)8 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)6 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)6 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)3 LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)3 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)2 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 Enumeration (java.util.Enumeration)1 Hashtable (java.util.Hashtable)1 SimpleRegression (org.apache.commons.math3.stat.regression.SimpleRegression)1 AnnotationSet (uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet)1