use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.
the class FeaturePositionSelectorPanel method getCoreProbes.
/**
* Gets the set of locations for the core of each feature. This wouldn't
* include additional context added by the options, but would have subtracted
* context removed by the options.
*
* @return
*/
public Probe[] getCoreProbes() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> newProbes = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
Vector<Feature> allFeatures = new Vector<Feature>();
String[] selectedFeatureTypes = selectedFeatureTypes();
for (int f = 0; f < selectedFeatureTypes.length; f++) {
Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chromosomes[c], selectedFeatureTypes[f]);
for (int i = 0; i < features.length; i++) {
allFeatures.add(features[i]);
}
}
Feature[] features = allFeatures.toArray(new Feature[0]);
for (int f = 0; f < features.length; f++) {
if (useSubFeatures()) {
// We need to split this up so get the sub-features
if (features[f].location() instanceof SplitLocation) {
SplitLocation location = (SplitLocation) features[f].location();
Location[] subLocations = location.subLocations();
if (useExonSubfeatures()) {
for (int s = 0; s < subLocations.length; s++) {
makeProbes(features[f], chromosomes[c], subLocations[s], newProbes, true);
}
} else {
// We're making introns
for (int s = 1; s < subLocations.length; s++) {
makeProbes(features[f], chromosomes[c], new Location(subLocations[s - 1].end() + 1, subLocations[s].start() - 1, features[f].location().strand()), newProbes, true);
}
}
} else {
if (useExonSubfeatures()) {
// We can still make a single probe
makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, true);
}
// If we're making introns then we're stuffed and we give up.
}
} else {
makeProbes(features[f], chromosomes[c], features[f].location(), newProbes, true);
}
}
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
if (removeDuplicates()) {
finalList = removeDuplicates(finalList);
}
return finalList;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.
the class FeaturePercentileProbeGenerator method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> newProbes = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
// Time for an update
updateGenerationProgress("Processed " + c + " chromosomes", c, chromosomes.length);
Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chromosomes[c], featureType);
for (int f = 0; f < features.length; f++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
if (useSubfeatures && (features[f].location() instanceof SplitLocation)) {
SplitLocation location = (SplitLocation) features[f].location();
Location[] subLocations = location.subLocations();
for (int s = 0; s < subLocations.length; s++) {
makeProbes(features[f], chromosomes[c], subLocations[s], newProbes);
}
} else {
makeProbes(features[f], chromosomes[c], features[f].location(), newProbes);
}
}
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
generationComplete(finalSet);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.
the class FeatureGroup method getSubLocations.
public Location[] getSubLocations() {
if (features.size() == 1) {
Location loc = features.elementAt(0).location();
if (loc instanceof SplitLocation) {
return ((SplitLocation) loc).subLocations();
} else {
return new Location[] { loc };
}
}
LongVector allLocs = new LongVector();
Enumeration<Feature> en = features.elements();
while (en.hasMoreElements()) {
Location loc = en.nextElement().location();
if (loc instanceof SplitLocation) {
Location[] subLocs = ((SplitLocation) loc).subLocations();
for (int s = 0; s < subLocs.length; s++) {
allLocs.add(subLocs[s].packedPosition());
}
} else {
allLocs.add(loc.packedPosition());
}
}
long[] locs = allLocs.toArray();
SequenceRead.sort(locs);
Vector<Location> mergedLocs = new Vector<Location>();
long current = locs[0];
for (int i = 1; i < locs.length; i++) {
// if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
// if (debug) {System.err.println("They overlap, extending...");}
current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
} else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
// if (debug) {System.err.println("This is a subset, ignoring it");}
continue;
} else {
// if (debug) {System.err.println("They don't overlap, moving on...");}
mergedLocs.add(new Location(current));
current = locs[i];
}
}
mergedLocs.add(new Location(current));
return mergedLocs.toArray(new Location[0]);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.
the class FeatureMerging method getNonOverlappingFeaturesWithinChromosome.
private static Location[] getNonOverlappingFeaturesWithinChromosome(Feature[] features, boolean useSubFeatures) {
// See if we can take some shortcuts
if (features.length == 1) {
Location loc = features[0].location();
if (loc instanceof SplitLocation) {
return ((SplitLocation) loc).subLocations();
} else {
return new Location[] { loc };
}
}
LongVector allLocs = new LongVector();
for (int f = 0; f < features.length; f++) {
Location loc = features[f].location();
if (loc instanceof SplitLocation) {
Location[] subLocs = ((SplitLocation) loc).subLocations();
for (int s = 0; s < subLocs.length; s++) {
allLocs.add(subLocs[s].packedPosition());
}
} else {
allLocs.add(loc.packedPosition());
}
}
long[] locs = allLocs.toArray();
SequenceRead.sort(locs);
Vector<Location> mergedLocs = new Vector<Location>();
long current = locs[0];
for (int i = 1; i < locs.length; i++) {
// if (debug) {System.err.println("Looking at "+SequenceRead.start(locs[i])+"-"+SequenceRead.end(locs[i])+" current is "+SequenceRead.start(current)+"-"+SequenceRead.end(current));}
if (SequenceRead.overlaps(current, locs[i]) && SequenceRead.end(locs[i]) > SequenceRead.end(current)) {
// if (debug) {System.err.println("They overlap, extending...");}
current = SequenceRead.packPosition(SequenceRead.start(current), SequenceRead.end(locs[i]), SequenceRead.strand(current));
} else if (SequenceRead.end(locs[i]) <= SequenceRead.end(current)) {
// if (debug) {System.err.println("This is a subset, ignoring it");}
continue;
} else {
// if (debug) {System.err.println("They don't overlap, moving on...");}
mergedLocs.add(new Location(current));
current = locs[i];
}
}
mergedLocs.add(new Location(current));
Location[] finalLocations = mergedLocs.toArray(new Location[0]);
Arrays.sort(finalLocations);
return mergedLocs.toArray(finalLocations);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation in project SeqMonk by s-andrews.
the class SeqMonkParser method parseAnnotation.
/**
* Parses an external set of annotations
*
* @param sections The tab split initial annotation line
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private AnnotationSet parseAnnotation(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 3) {
throw new SeqMonkException("Annotation line didn't contain 3 sections");
}
AnnotationSet set = new AnnotationSet(application.dataCollection().genome(), sections[1]);
int featureCount = Integer.parseInt(sections[2]);
for (int i = 0; i < featureCount; i++) {
if (i % 1000 == 0) {
progressUpdated("Parsing annotation in " + set.name(), i, featureCount);
}
sections = br.readLine().split("\\t");
Chromosome c;
try {
c = application.dataCollection().genome().getChromosome(sections[1]).chromosome();
} catch (Exception sme) {
Enumeration<ProgressListener> e = listeners.elements();
while (e.hasMoreElements()) {
e.nextElement().progressWarningReceived(new SeqMonkException("Annotation feature could not be mapped to chromosome '" + sections[1] + "'"));
}
continue;
}
Feature f = new Feature(sections[0], c.name());
// TODO: Can we improve this to not use a Split Location each time?
f.setLocation(new SplitLocation(sections[2]));
for (int a = 3; a + 1 < sections.length; a += 2) {
f.addAttribute(sections[a], sections[a + 1]);
}
set.addFeature(f);
}
set.finalise();
return set;
}
Aggregations