use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.
the class FeatureMerging method getNonOverlappingFeaturesWithinChromosome.
private static Location[] getNonOverlappingFeaturesWithinChromosome(Feature[] features, boolean useSubFeatures) {
// See if we can take some shortcuts
if (features.length == 1) {
Location loc = features[0].location();
if (loc instanceof SplitLocation) {
return ((SplitLocation) loc).subLocations();
} else {
return new Location[] { loc };
}
}
LongVector allLocs = new LongVector();
for (int f = 0; f < features.length; f++) {
Location loc = features[f].location();
if (loc instanceof SplitLocation) {
Location[] subLocs = ((SplitLocation) loc).subLocations();
for (int s = 0; s < subLocs.length; s++) {
allLocs.add(subLocs[s].packedPosition());
}
} else {
allLocs.add(loc.packedPosition());
}
}
long[] locs = allLocs.toArray();
SequenceRead.sort(locs);
Vector<Location> mergedLocs = new Vector<Location>();
long current = locs[0];
for (int i = 1; i < locs.length; i++) {
// if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
// if (debug) {System.err.println("They overlap, extending...");}
current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
} else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
// if (debug) {System.err.println("This is a subset, ignoring it");}
continue;
} else {
// if (debug) {System.err.println("They don't overlap, moving on...");}
mergedLocs.add(new Location(current));
current = locs[i];
}
}
mergedLocs.add(new Location(current));
Location[] finalLocations = mergedLocs.toArray(new Location[0]);
Arrays.sort(finalLocations);
return mergedLocs.toArray(finalLocations);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.
the class FeatureMerging method getNonOverlappingLocationsForFeatures.
public static Feature[] getNonOverlappingLocationsForFeatures(Feature[] features, boolean useSubFeatures) {
// Start by splitting these into chromosomes
Hashtable<String, Vector<Feature>> chrs = new Hashtable<String, Vector<Feature>>();
for (int f = 0; f < features.length; f++) {
if (!chrs.containsKey(features[f].chromosomeName())) {
chrs.put(features[f].chromosomeName(), new Vector<Feature>());
}
chrs.get(features[f].chromosomeName()).add(features[f]);
}
// Now we build up a set of features for each chromosome
Vector<Feature> returnFeatures = new Vector<Feature>();
Enumeration<String> chrNames = chrs.keys();
while (chrNames.hasMoreElements()) {
String chr = chrNames.nextElement();
Location[] locs = getNonOverlappingFeaturesWithinChromosome(chrs.get(chr).toArray(new Feature[0]), useSubFeatures);
for (int l = 0; l < locs.length; l++) {
Feature f = new Feature("merged_location", chr);
f.setLocation(locs[l]);
returnFeatures.add(f);
}
}
return (returnFeatures.toArray(new Feature[0]));
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.
the class CodonBiasPanel method run.
public void run() {
// First we need to know the length of the feature we'll be analysing. This isn't the
// full length in the genome but the sum length of the exons. We'll also make up an
// array of offsets so that we can convert genomic positions into positions within the
// feature easily.
Location[] subLocations;
if (feature.location() instanceof SplitLocation) {
subLocations = ((SplitLocation) (feature.location())).subLocations();
} else {
subLocations = new Location[] { feature.location() };
}
System.err.println("Working with " + feature.name());
System.err.println("There are " + subLocations.length + " sublocations");
// First work out the total transcript length so we can make an appropriate data structure
int totalLength = 0;
for (int e = 0; e < subLocations.length; e++) {
totalLength += subLocations[e].length();
}
System.err.println("Total exon length is " + totalLength);
int[] abundance = new int[totalLength];
// Now work out the exon boundary positions within the feature
// We can also work out a mapping between relative genomic position and
// feature position.
int[] exonBoundaries = new int[subLocations.length];
int[] genomeToFeatureMap = new int[1 + feature.location().end() - feature.location().start()];
for (int j = 0; j < genomeToFeatureMap.length; j++) {
genomeToFeatureMap[j] = -1;
}
System.err.println("Genome to feature map length is " + genomeToFeatureMap.length);
if (feature.location().strand() == Location.FORWARD) {
System.err.println("Feature is forward strand");
int length = 0;
int positionInFeature = 0;
for (int i = 0; i < subLocations.length; i++) {
System.err.println("Looking at sublocation " + i + " from " + subLocations[i].start() + " to " + subLocations[i].end());
exonBoundaries[i] = length;
System.err.println("Added exon boundary at " + exonBoundaries[i]);
length += subLocations[i].length();
for (int x = 0; x < subLocations[i].length(); x++) {
int genomePostion = subLocations[i].start() + x;
int relativeGenomePosition = genomePostion - feature.location().start();
System.err.println("Sublocation Pos=" + x + " Genome Pos=" + genomePostion + " Rel Genome Pos=" + relativeGenomePosition + " Feature pos=" + positionInFeature);
genomeToFeatureMap[relativeGenomePosition] = positionInFeature;
positionInFeature++;
}
}
} else if (feature.location().strand() == Location.REVERSE) {
int length = 0;
int positionInFeature = 0;
for (int i = subLocations.length - 1; i >= 0; i--) {
exonBoundaries[i] = length;
length += subLocations[i].length();
for (int x = 0; x < subLocations[i].length(); x++) {
genomeToFeatureMap[subLocations[i].end() - x] = positionInFeature;
positionInFeature++;
}
}
}
// Now we can get all of the reads and position them within the read.
long[] reads = store.getReadsForProbe(new Probe(SeqMonkApplication.getInstance().dataCollection().genome().getExactChromsomeNameMatch(feature.chromosomeName()), feature.location().packedPosition()));
for (int r = 0; r < reads.length; r++) {
// We need to work out the position of this read in the feature. This will depend
// on whether the feature is forward or reverse strand, and whether we're reversing
// the direction of reads.
System.err.println("Looking at read " + SequenceRead.toString(reads[r]));
int genomicPosition = 0;
if (feature.location().strand() == Location.FORWARD) {
System.err.println("It's a forward feature");
if (reverse) {
System.err.println("We're a same strand library");
if (SequenceRead.strand(reads[r]) != Location.REVERSE)
continue;
genomicPosition = SequenceRead.end(reads[r]);
} else {
System.err.println("We're an opposing strand library");
if (SequenceRead.strand(reads[r]) != Location.FORWARD)
continue;
genomicPosition = SequenceRead.start(reads[r]);
}
System.err.println("Raw genomic position is " + genomicPosition);
genomicPosition = genomicPosition - feature.location().start();
System.err.println("Corrected genomic position is " + genomicPosition);
} else if (feature.location().strand() == Location.REVERSE) {
if (reverse) {
if (SequenceRead.strand(reads[r]) != Location.REVERSE)
continue;
genomicPosition = SequenceRead.start(reads[r]);
} else {
if (SequenceRead.strand(reads[r]) != Location.FORWARD)
continue;
genomicPosition = SequenceRead.end(reads[r]);
}
genomicPosition = feature.location().end() - genomicPosition;
}
System.err.println("Final genomic position is " + genomicPosition);
if (genomicPosition < 0 || genomicPosition >= genomeToFeatureMap.length)
continue;
System.err.println("Position in feature is " + genomeToFeatureMap[genomicPosition]);
if (genomeToFeatureMap[genomicPosition] != -1) {
abundance[genomeToFeatureMap[genomicPosition]]++;
}
}
this.abundance = abundance;
this.exonBoundaries = exonBoundaries;
calculated = true;
repaint();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.
the class FeaturePositionSelectorPanel method getProbes.
/**
* Gets the set of probes with appropriate context for the options
* currently set.
* @return
*/
public Probe[] getProbes() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> newProbes = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
Vector<Feature> allFeatures = new Vector<Feature>();
String[] selectedFeatureTypes = selectedFeatureTypes();
for (int f = 0; f < selectedFeatureTypes.length; f++) {
Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chromosomes[c], selectedFeatureTypes[f]);
for (int i = 0; i < features.length; i++) {
allFeatures.add(features[i]);
}
}
Feature[] features = allFeatures.toArray(new Feature[0]);
for (int f = 0; f < features.length; f++) {
if (useSubFeatures()) {
// We need to split this up so get the sub-features
if (features[f].location() instanceof SplitLocation) {
SplitLocation location = (SplitLocation) features[f].location();
Location[] subLocations = location.subLocations();
if (useExonSubfeatures()) {
// System.err.println("Making exon probes");
for (int s = 0; s < subLocations.length; s++) {
makeProbes(features[f], chromosomes[c], subLocations[s], newProbes, false);
}
} else {
// We're making introns
for (int s = 1; s < subLocations.length; s++) {
makeProbes(features[f], chromosomes[c], new Location(subLocations[s - 1].end() + 1, subLocations[s].start() - 1, features[f].location().strand()), newProbes, false);
}
}
} else {
if (useExonSubfeatures()) {
// We can still make a single probe
makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, false);
}
// If we're making introns then we're stuffed and we give up.
}
} else {
makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, false);
}
}
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
if (removeDuplicates()) {
finalList = removeDuplicates(finalList);
}
return finalList;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.
the class IntronFeatureGroup method getSubLocations.
public Location[] getSubLocations() {
if (features.size() == 1) {
Location loc = features.elementAt(0).location();
if (loc instanceof SplitLocation) {
Location[] subLocs = ((SplitLocation) loc).subLocations();
Location[] interLocs = new Location[subLocs.length - 1];
for (int i = 0; i < subLocs.length - 1; i++) {
interLocs[i] = new Location(subLocs[i].end() + 1, subLocs[i + 1].start() - 1, subLocs[i].strand());
}
return interLocs;
} else {
return new Location[0];
}
}
LongVector allLocs = new LongVector();
Enumeration<Feature> en = features.elements();
while (en.hasMoreElements()) {
Location loc = en.nextElement().location();
if (loc instanceof SplitLocation) {
Location[] subLocs = ((SplitLocation) loc).subLocations();
for (int s = 0; s < subLocs.length; s++) {
allLocs.add(subLocs[s].packedPosition());
}
} else {
allLocs.add(loc.packedPosition());
}
}
long[] locs = allLocs.toArray();
SequenceRead.sort(locs);
Vector<Location> mergedLocs = new Vector<Location>();
long current = locs[0];
for (int i = 1; i < locs.length; i++) {
// if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
// if (debug) {System.err.println("They overlap, extending...");}
current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
} else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
// if (debug) {System.err.println("This is a subset, ignoring it");}
continue;
} else {
// if (debug) {System.err.println("They don't overlap, moving on...");}
mergedLocs.add(new Location(current));
current = locs[i];
}
}
mergedLocs.add(new Location(current));
Location[] interLocs = new Location[mergedLocs.size() - 1];
for (int i = 0; i < mergedLocs.size() - 1; i++) {
interLocs[i] = new Location(mergedLocs.elementAt(i).end() + 1, mergedLocs.elementAt(i + 1).start() - 1, mergedLocs.elementAt(i).strand());
}
return interLocs;
}
Aggregations