Search in sources :

Example 6 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class FeatureMerging method getNonOverlappingFeaturesWithinChromosome.

private static Location[] getNonOverlappingFeaturesWithinChromosome(Feature[] features, boolean useSubFeatures) {
    // See if we can take some shortcuts
    if (features.length == 1) {
        Location loc = features[0].location();
        if (loc instanceof SplitLocation) {
            return ((SplitLocation) loc).subLocations();
        } else {
            return new Location[] { loc };
        }
    }
    LongVector allLocs = new LongVector();
    for (int f = 0; f < features.length; f++) {
        Location loc = features[f].location();
        if (loc instanceof SplitLocation) {
            Location[] subLocs = ((SplitLocation) loc).subLocations();
            for (int s = 0; s < subLocs.length; s++) {
                allLocs.add(subLocs[s].packedPosition());
            }
        } else {
            allLocs.add(loc.packedPosition());
        }
    }
    long[] locs = allLocs.toArray();
    SequenceRead.sort(locs);
    Vector<Location> mergedLocs = new Vector<Location>();
    long current = locs[0];
    for (int i = 1; i < locs.length; i++) {
        // if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
        if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
            // if (debug) {System.err.println("They overlap, extending...");}
            current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
        } else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
            // if (debug) {System.err.println("This is a subset, ignoring it");}
            continue;
        } else {
            // if (debug) {System.err.println("They don't overlap, moving on...");}
            mergedLocs.add(new Location(current));
            current = locs[i];
        }
    }
    mergedLocs.add(new Location(current));
    Location[] finalLocations = mergedLocs.toArray(new Location[0]);
    Arrays.sort(finalLocations);
    return mergedLocs.toArray(finalLocations);
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 7 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class FeatureMerging method getNonOverlappingLocationsForFeatures.

public static Feature[] getNonOverlappingLocationsForFeatures(Feature[] features, boolean useSubFeatures) {
    // Start by splitting these into chromosomes
    Hashtable<String, Vector<Feature>> chrs = new Hashtable<String, Vector<Feature>>();
    for (int f = 0; f < features.length; f++) {
        if (!chrs.containsKey(features[f].chromosomeName())) {
            chrs.put(features[f].chromosomeName(), new Vector<Feature>());
        }
        chrs.get(features[f].chromosomeName()).add(features[f]);
    }
    // Now we build up a set of features for each chromosome
    Vector<Feature> returnFeatures = new Vector<Feature>();
    Enumeration<String> chrNames = chrs.keys();
    while (chrNames.hasMoreElements()) {
        String chr = chrNames.nextElement();
        Location[] locs = getNonOverlappingFeaturesWithinChromosome(chrs.get(chr).toArray(new Feature[0]), useSubFeatures);
        for (int l = 0; l < locs.length; l++) {
            Feature f = new Feature("merged_location", chr);
            f.setLocation(locs[l]);
            returnFeatures.add(f);
        }
    }
    return (returnFeatures.toArray(new Feature[0]));
}
Also used : Hashtable(java.util.Hashtable) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 8 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class CodonBiasPanel method run.

public void run() {
    // First we need to know the length of the feature we'll be analysing.  This isn't the
    // full length in the genome but the sum length of the exons.  We'll also make up an
    // array of offsets so that we can convert genomic positions into positions within the
    // feature easily.
    Location[] subLocations;
    if (feature.location() instanceof SplitLocation) {
        subLocations = ((SplitLocation) (feature.location())).subLocations();
    } else {
        subLocations = new Location[] { feature.location() };
    }
    System.err.println("Working with " + feature.name());
    System.err.println("There are " + subLocations.length + " sublocations");
    // First work out the total transcript length so we can make an appropriate data structure
    int totalLength = 0;
    for (int e = 0; e < subLocations.length; e++) {
        totalLength += subLocations[e].length();
    }
    System.err.println("Total exon length is " + totalLength);
    int[] abundance = new int[totalLength];
    // Now work out the exon boundary positions within the feature
    // We can also work out a mapping between relative genomic position and
    // feature position.
    int[] exonBoundaries = new int[subLocations.length];
    int[] genomeToFeatureMap = new int[1 + feature.location().end() - feature.location().start()];
    for (int j = 0; j < genomeToFeatureMap.length; j++) {
        genomeToFeatureMap[j] = -1;
    }
    System.err.println("Genome to feature map length is " + genomeToFeatureMap.length);
    if (feature.location().strand() == Location.FORWARD) {
        System.err.println("Feature is forward strand");
        int length = 0;
        int positionInFeature = 0;
        for (int i = 0; i < subLocations.length; i++) {
            System.err.println("Looking at sublocation " + i + " from " + subLocations[i].start() + " to " + subLocations[i].end());
            exonBoundaries[i] = length;
            System.err.println("Added exon boundary at " + exonBoundaries[i]);
            length += subLocations[i].length();
            for (int x = 0; x < subLocations[i].length(); x++) {
                int genomePostion = subLocations[i].start() + x;
                int relativeGenomePosition = genomePostion - feature.location().start();
                System.err.println("Sublocation Pos=" + x + " Genome Pos=" + genomePostion + " Rel Genome Pos=" + relativeGenomePosition + " Feature pos=" + positionInFeature);
                genomeToFeatureMap[relativeGenomePosition] = positionInFeature;
                positionInFeature++;
            }
        }
    } else if (feature.location().strand() == Location.REVERSE) {
        int length = 0;
        int positionInFeature = 0;
        for (int i = subLocations.length - 1; i >= 0; i--) {
            exonBoundaries[i] = length;
            length += subLocations[i].length();
            for (int x = 0; x < subLocations[i].length(); x++) {
                genomeToFeatureMap[subLocations[i].end() - x] = positionInFeature;
                positionInFeature++;
            }
        }
    }
    // Now we can get all of the reads and position them within the read.
    long[] reads = store.getReadsForProbe(new Probe(SeqMonkApplication.getInstance().dataCollection().genome().getExactChromsomeNameMatch(feature.chromosomeName()), feature.location().packedPosition()));
    for (int r = 0; r < reads.length; r++) {
        // We need to work out the position of this read in the feature.  This will depend
        // on whether the feature is forward or reverse strand, and whether we're reversing
        // the direction of reads.
        System.err.println("Looking at read " + SequenceRead.toString(reads[r]));
        int genomicPosition = 0;
        if (feature.location().strand() == Location.FORWARD) {
            System.err.println("It's a forward feature");
            if (reverse) {
                System.err.println("We're a same strand library");
                if (SequenceRead.strand(reads[r]) != Location.REVERSE)
                    continue;
                genomicPosition = SequenceRead.end(reads[r]);
            } else {
                System.err.println("We're an opposing strand library");
                if (SequenceRead.strand(reads[r]) != Location.FORWARD)
                    continue;
                genomicPosition = SequenceRead.start(reads[r]);
            }
            System.err.println("Raw genomic position is " + genomicPosition);
            genomicPosition = genomicPosition - feature.location().start();
            System.err.println("Corrected genomic position is " + genomicPosition);
        } else if (feature.location().strand() == Location.REVERSE) {
            if (reverse) {
                if (SequenceRead.strand(reads[r]) != Location.REVERSE)
                    continue;
                genomicPosition = SequenceRead.start(reads[r]);
            } else {
                if (SequenceRead.strand(reads[r]) != Location.FORWARD)
                    continue;
                genomicPosition = SequenceRead.end(reads[r]);
            }
            genomicPosition = feature.location().end() - genomicPosition;
        }
        System.err.println("Final genomic position is " + genomicPosition);
        if (genomicPosition < 0 || genomicPosition >= genomeToFeatureMap.length)
            continue;
        System.err.println("Position in feature is " + genomeToFeatureMap[genomicPosition]);
        if (genomeToFeatureMap[genomicPosition] != -1) {
            abundance[genomeToFeatureMap[genomicPosition]]++;
        }
    }
    this.abundance = abundance;
    this.exonBoundaries = exonBoundaries;
    calculated = true;
    repaint();
}
Also used : SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 9 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class FeaturePositionSelectorPanel method getProbes.

/**
 * Gets the set of probes with appropriate context for the options
 * currently set.
 * @return
 */
public Probe[] getProbes() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<Probe> newProbes = new Vector<Probe>();
    for (int c = 0; c < chromosomes.length; c++) {
        Vector<Feature> allFeatures = new Vector<Feature>();
        String[] selectedFeatureTypes = selectedFeatureTypes();
        for (int f = 0; f < selectedFeatureTypes.length; f++) {
            Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chromosomes[c], selectedFeatureTypes[f]);
            for (int i = 0; i < features.length; i++) {
                allFeatures.add(features[i]);
            }
        }
        Feature[] features = allFeatures.toArray(new Feature[0]);
        for (int f = 0; f < features.length; f++) {
            if (useSubFeatures()) {
                // We need to split this up so get the sub-features
                if (features[f].location() instanceof SplitLocation) {
                    SplitLocation location = (SplitLocation) features[f].location();
                    Location[] subLocations = location.subLocations();
                    if (useExonSubfeatures()) {
                        // System.err.println("Making exon probes");
                        for (int s = 0; s < subLocations.length; s++) {
                            makeProbes(features[f], chromosomes[c], subLocations[s], newProbes, false);
                        }
                    } else {
                        // We're making introns
                        for (int s = 1; s < subLocations.length; s++) {
                            makeProbes(features[f], chromosomes[c], new Location(subLocations[s - 1].end() + 1, subLocations[s].start() - 1, features[f].location().strand()), newProbes, false);
                        }
                    }
                } else {
                    if (useExonSubfeatures()) {
                        // We can still make a single probe
                        makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, false);
                    }
                // If we're making introns then we're stuffed and we give up.
                }
            } else {
                makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, false);
            }
        }
    }
    Probe[] finalList = newProbes.toArray(new Probe[0]);
    if (removeDuplicates()) {
        finalList = removeDuplicates(finalList);
    }
    return finalList;
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Vector(java.util.Vector) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 10 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class IntronFeatureGroup method getSubLocations.

public Location[] getSubLocations() {
    if (features.size() == 1) {
        Location loc = features.elementAt(0).location();
        if (loc instanceof SplitLocation) {
            Location[] subLocs = ((SplitLocation) loc).subLocations();
            Location[] interLocs = new Location[subLocs.length - 1];
            for (int i = 0; i < subLocs.length - 1; i++) {
                interLocs[i] = new Location(subLocs[i].end() + 1, subLocs[i + 1].start() - 1, subLocs[i].strand());
            }
            return interLocs;
        } else {
            return new Location[0];
        }
    }
    LongVector allLocs = new LongVector();
    Enumeration<Feature> en = features.elements();
    while (en.hasMoreElements()) {
        Location loc = en.nextElement().location();
        if (loc instanceof SplitLocation) {
            Location[] subLocs = ((SplitLocation) loc).subLocations();
            for (int s = 0; s < subLocs.length; s++) {
                allLocs.add(subLocs[s].packedPosition());
            }
        } else {
            allLocs.add(loc.packedPosition());
        }
    }
    long[] locs = allLocs.toArray();
    SequenceRead.sort(locs);
    Vector<Location> mergedLocs = new Vector<Location>();
    long current = locs[0];
    for (int i = 1; i < locs.length; i++) {
        // if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
        if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
            // if (debug) {System.err.println("They overlap, extending...");}
            current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
        } else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
            // if (debug) {System.err.println("This is a subset, ignoring it");}
            continue;
        } else {
            // if (debug) {System.err.println("They don't overlap, moving on...");}
            mergedLocs.add(new Location(current));
            current = locs[i];
        }
    }
    mergedLocs.add(new Location(current));
    Location[] interLocs = new Location[mergedLocs.size() - 1];
    for (int i = 0; i < mergedLocs.size() - 1; i++) {
        interLocs[i] = new Location(mergedLocs.elementAt(i).end() + 1, mergedLocs.elementAt(i + 1).start() - 1, mergedLocs.elementAt(i).strand());
    }
    return interLocs;
}
Also used : LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Aggregations

Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)15 Vector (java.util.Vector)14 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)13 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)12 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)9 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)7 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)5 LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)5 Hashtable (java.util.Hashtable)3 AnnotationSet (uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet)3 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)3 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 FileReader (java.io.FileReader)2 InputStreamReader (java.io.InputStreamReader)2 GZIPInputStream (java.util.zip.GZIPInputStream)2 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)2 Enumeration (java.util.Enumeration)1 HashSet (java.util.HashSet)1