use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class CodonBiasPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
double pValue = optionsPanel.pValue();
String libraryType = optionsPanel.libraryType();
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Making probes for chr" + chrs[c].name(), c, chrs.length * 2);
Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
probes.add(p);
}
}
allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
// Now we can quantitate each individual feature and test for whether it is significantly
// showing codon bias
ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
// data contains the data stores that this pipeline is going to use. We need to test each data store.
for (int d = 0; d < data.length; d++) {
significantProbes.add(new Vector<ProbeTTestValue>());
}
int probeCounter = 0;
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
for (int p = 0; p < features.length; p++) {
// Get the corresponding feature and work out the mapping between genomic position and codon sub position.
int[] mappingArray = createGenomeMappingArray(features[p]);
DATASTORE_LOOP: for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
long[] reads = data[d].getReadsForProbe(allProbes[probeCounter]);
// TODO: make this configurable
if (reads.length < 5) {
data[d].setValueForProbe(allProbes[probeCounter], Float.NaN);
continue DATASTORE_LOOP;
}
int pos1Count = 0;
int pos2Count = 0;
int pos3Count = 0;
READ_LOOP: for (int r = 0; r < reads.length; r++) {
int genomicReadStart = SequenceRead.start(reads[r]);
int genomicReadEnd = SequenceRead.end(reads[r]);
int readStrand = SequenceRead.strand(reads[r]);
int relativeReadStart = -1;
// forward reads
if (readStrand == 1) {
if (libraryType == "Same strand specific") {
if (features[p].location().strand() == Location.FORWARD) {
// The start of the read needs to be within the feature
if (genomicReadStart - features[p].location().start() < 0) {
continue READ_LOOP;
} else {
// look up the read start pos in the mapping array
relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
}
}
} else if (libraryType == "Opposing strand specific") {
if (features[p].location().strand() == Location.REVERSE) {
// The "start" of a reverse read/probe is actually the end
if (features[p].location().end() - genomicReadEnd < 0) {
continue READ_LOOP;
} else {
relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
}
}
}
}
// reverse reads
if (readStrand == -1) {
if (libraryType == "Same strand specific") {
if (features[p].location().strand() == Location.REVERSE) {
if (features[p].location().end() - genomicReadEnd < 0) {
continue READ_LOOP;
} else {
// System.out.println("features[p].location().end() is " + features[p].location().end() + ", genomicReadEnd is " + genomicReadEnd);
// System.out.println("mapping array[0] is " + mappingArray[0]);
relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
}
}
} else if (libraryType == "Opposing strand specific") {
if (features[p].location().strand() == Location.FORWARD) {
// The start of the read needs to be within the feature
if (genomicReadStart - features[p].location().start() < 0) {
continue READ_LOOP;
} else {
// look up the read start position in the mapping array
relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
}
}
}
}
// find out which position the read is in
if (relativeReadStart == -1) {
continue READ_LOOP;
} else if (relativeReadStart % 3 == 0) {
pos3Count++;
continue READ_LOOP;
} else if ((relativeReadStart + 1) % 3 == 0) {
pos2Count++;
continue READ_LOOP;
} else if ((relativeReadStart + 2) % 3 == 0) {
pos1Count++;
}
}
// closing bracket for read loop
// System.out.println("pos1Count for "+ features[p].name() + " is " + pos1Count);
// System.out.println("pos2Count for "+ features[p].name() + " is " + pos2Count);
// System.out.println("pos3Count for "+ features[p].name() + " is " + pos3Count);
int interestingCodonCount = 0;
int otherCodonCount = 0;
if (optionsPanel.codonSubPosition() == 1) {
interestingCodonCount = pos1Count;
otherCodonCount = pos2Count + pos3Count;
} else if (optionsPanel.codonSubPosition() == 2) {
interestingCodonCount = pos2Count;
otherCodonCount = pos1Count + pos3Count;
} else if (optionsPanel.codonSubPosition() == 3) {
interestingCodonCount = pos3Count;
otherCodonCount = pos1Count + pos2Count;
}
int totalCount = interestingCodonCount + otherCodonCount;
// BinomialDistribution bd = new BinomialDistribution(interestingCodonCount+otherCodonCount, 1/3d);
BinomialDistribution bd = new BinomialDistribution(totalCount, 1 / 3d);
// Since the binomial distribution gives the probability of getting a value higher than
// this we need to subtract one so we get the probability of this or higher.
double thisPValue = 1 - bd.cumulativeProbability(interestingCodonCount - 1);
if (interestingCodonCount == 0)
thisPValue = 1;
// We have to add all results at this stage so we don't mess up the multiple
// testing correction later on.
significantProbes.get(d).add(new ProbeTTestValue(allProbes[probeCounter], thisPValue));
float percentageCount;
if (totalCount == 0) {
percentageCount = 0;
} else {
percentageCount = ((float) interestingCodonCount / (float) totalCount) * 100;
}
data[d].setValueForProbe(allProbes[probeCounter], percentageCount);
// System.out.println("totalCount = " + totalCount);
// System.out.println("interestingCodonCount " + interestingCodonCount);
// System.out.println("pValue = " + thisPValue);
// System.out.println("percentageCount = " + percentageCount);
// System.out.println("");
}
probeCounter++;
}
}
// filtering those which pass our p-value cutoff
for (int d = 0; d < data.length; d++) {
ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
BenjHochFDR.calculateQValues(ttestResults);
ProbeList newList = new ProbeList(collection().probeSet(), "Codon bias < " + pValue + " in " + data[d].name(), "Probes showing significant codon bias for position " + optionsPanel.codonSubPosition() + " with a cutoff of " + pValue, "FDR");
for (int i = 0; i < ttestResults.length; i++) {
if (ttestResults[i].q < pValue) {
newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
}
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Codon bias pipeline using codon position " + optionsPanel.codonSubPosition() + " for " + optionsPanel.libraryType() + " library.");
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class TranscriptionTerminationPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
int minCount = optionsPanel.minCount();
int measurementLength = optionsPanel.measurementLength();
QuantitationStrandType readFilter = optionsPanel.readFilter();
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Creating Probes" + chrs[c].name(), c, chrs.length * 2);
Feature[] features = getValidFeatures(chrs[c], measurementLength);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
if (features[f].location().strand() == Location.REVERSE) {
Probe p = new Probe(chrs[c], features[f].location().start() - measurementLength, features[f].location().start() + measurementLength, features[f].location().strand(), features[f].name());
probes.add(p);
} else {
Probe p = new Probe(chrs[c], features[f].location().end() - measurementLength, features[f].location().end() + measurementLength, features[f].location().strand(), features[f].name());
probes.add(p);
}
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features " + measurementLength + "bp around the end of " + optionsPanel.getSelectedFeatureType(), allProbes));
int probeIndex = 0;
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
Feature[] features = getValidFeatures(chrs[c], measurementLength);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
for (int d = 0; d < data.length; d++) {
if (allProbes[probeIndex].strand() == Location.REVERSE) {
Probe downstreamProbe = new Probe(chrs[c], features[f].location().start() - measurementLength, features[f].location().start(), features[f].location().strand(), features[f].name());
Probe upstreamProbe = new Probe(chrs[c], features[f].location().start(), features[f].location().start() + measurementLength, features[f].location().strand(), features[f].name());
long[] upstreamReads = data[d].getReadsForProbe(upstreamProbe);
long[] downstreamReads = data[d].getReadsForProbe(downstreamProbe);
int upstreamCount = 0;
for (int i = 0; i < upstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], upstreamReads[i]))
++upstreamCount;
}
int downstreamCount = 0;
for (int i = 0; i < downstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], downstreamReads[i]))
++downstreamCount;
}
float percentage = ((upstreamCount - downstreamCount) / (float) upstreamCount) * 100f;
if (upstreamCount >= minCount) {
data[d].setValueForProbe(allProbes[probeIndex], percentage);
} else {
data[d].setValueForProbe(allProbes[probeIndex], Float.NaN);
}
} else {
Probe upstreamProbe = new Probe(chrs[c], features[f].location().end() - measurementLength, features[f].location().end(), features[f].location().strand(), features[f].name());
Probe downstreamProbe = new Probe(chrs[c], features[f].location().end(), features[f].location().end() + measurementLength, features[f].location().strand(), features[f].name());
long[] upstreamReads = data[d].getReadsForProbe(upstreamProbe);
long[] downstreamReads = data[d].getReadsForProbe(downstreamProbe);
int upstreamCount = 0;
for (int i = 0; i < upstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], upstreamReads[i]))
++upstreamCount;
}
int downstreamCount = 0;
for (int i = 0; i < downstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], downstreamReads[i]))
++downstreamCount;
}
float percentage = ((upstreamCount - downstreamCount) / (float) upstreamCount) * 100f;
if (upstreamCount >= minCount) {
data[d].setValueForProbe(allProbes[probeIndex], percentage);
} else {
data[d].setValueForProbe(allProbes[probeIndex], Float.NaN);
}
}
}
++probeIndex;
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Transcription termination pipeline quantitation ");
quantitationDescription.append(". Directionality was ");
quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
quantitationDescription.append(". Measurement length was ");
quantitationDescription.append(optionsPanel.measurementLength());
quantitationDescription.append(". Min count was ");
quantitationDescription.append(optionsPanel.minCount());
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class FeatureNameFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
annotationType = optionsPanel.annotationTypeBox.getSelectedItem().toString();
stripSuffixes = optionsPanel.stripSuffixesBox.isSelected();
stripTranscriptSuffixes = optionsPanel.stripTranscriptSuffixesBox.isSelected();
ProbeList passedProbes = new ProbeList(startingList, "", "", startingList.getValueName());
// Since we're going to be making the annotations on the
// basis of position we should go through all probes one
// chromosome at a time. We therefore make a stipulation that
// not only do the feature names have to match, so do the
// chromosomes.
Chromosome[] chrs = collection.genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
// We start by building a list of the feature names we're going to
// check against.
HashSet<String> featureNames = new HashSet<String>();
progressUpdated("Processing features on Chr " + chrs[c].name(), c, chrs.length);
Probe[] probes = startingList.getProbesForChromosome(chrs[c]);
Feature[] features = collection.genome().annotationCollection().getFeaturesForType(chrs[c], annotationType);
for (int f = 0; f < features.length; f++) {
String name = features[f].name();
if (stripSuffixes) {
name = name.replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
}
if (stripTranscriptSuffixes) {
name = name.replaceAll("-\\d\\d\\d$", "");
}
featureNames.add(name);
}
// We can now step through the probes looking for a match to the stored feature names
for (int p = 0; p < probes.length; p++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
String name = probes[p].name();
if (stripSuffixes) {
name = name.replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
}
if (stripTranscriptSuffixes) {
name = name.replaceAll("-\\d\\d\\d$", "");
}
if (featureNames.contains(name)) {
passedProbes.addProbe(probes[p], startingList.getValueForProbe(probes[p]));
}
}
}
filterFinished(passedProbes);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class FindFeatureDialog method actionPerformed.
/* (non-Javadoc)
* @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
*/
public void actionPerformed(ActionEvent ae) {
if (ae.getActionCommand().equals("close")) {
setVisible(false);
dispose();
} else if (ae.getActionCommand().equals("search")) {
Thread t = new Thread(this);
t.start();
} else if (ae.getActionCommand().equals("save_annotation_all")) {
// Find a name for the type of feature they want to create
String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, search.getText() + " " + featureType.getSelectedItem() + " search");
// They cancelled
if (name == null)
return;
// Now we can go ahead and make the new annotation set
AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), search.getText() + " " + featureType.getSelectedItem() + " search");
for (int f = 0; f < lastHits.length; f++) {
Feature feature = new Feature(name, lastHits[f].chromosomeName());
feature.setLocation(lastHits[f].location());
AnnotationTagValue[] tags = lastHits[f].getAnnotationTagValues();
for (int t = 0; t < tags.length; t++) {
feature.addAttribute(tags[t].tag(), tags[t].value());
}
searchAnnotations.addFeature(feature);
}
dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
} else if (ae.getActionCommand().equals("save_annotation_selected")) {
Feature[] selectedHits = viewer.getSelectedFeatures();
if (selectedHits.length == 0) {
JOptionPane.showMessageDialog(this, "There are no selected features from which to make a track", "Can't make track", JOptionPane.INFORMATION_MESSAGE);
return;
}
// Find a name for the type of feature they want to create
String name = (String) JOptionPane.showInputDialog(this, "Feature type", "Make Annotation Track", JOptionPane.QUESTION_MESSAGE, null, null, "selected " + search.getText());
// They cancelled
if (name == null)
return;
// Now we can go ahead and make the new annotation set
AnnotationSet searchAnnotations = new AnnotationSet(dataCollection.genome(), search.getText() + " search results");
for (int f = 0; f < selectedHits.length; f++) {
Feature feature = new Feature(name, selectedHits[f].chromosomeName());
feature.setLocation(selectedHits[f].location());
AnnotationTagValue[] tags = selectedHits[f].getAnnotationTagValues();
for (int t = 0; t < tags.length; t++) {
feature.addAttribute(tags[t].tag(), tags[t].value());
}
searchAnnotations.addFeature(feature);
}
dataCollection.genome().annotationCollection().addAnnotationSets(new AnnotationSet[] { searchAnnotations });
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature in project SeqMonk by s-andrews.
the class AnnotatedInteractionReport method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
String annotationTypeValue = (String) annotationType.getSelectedItem();
int distanceLimit = 0;
// Check what to do with unannotated probes
boolean includeAll = true;
if (((String) excludes.getSelectedItem()).equals("Exclude")) {
includeAll = false;
}
String annotationPositionValue = (String) annotationPosition.getSelectedItem();
// We're going to set up a set of booleans which tell us which kinds
// of relationships we're allowed to look for later.
boolean surrounding = true;
boolean upstream = true;
boolean downstream = true;
boolean matchname = false;
if (annotationPositionValue.equals("[Don't annotate]")) {
upstream = false;
downstream = false;
surrounding = false;
} else if (annotationPositionValue.equals("overlapping")) {
upstream = false;
downstream = false;
} else if (annotationPositionValue.equals("surrounding or upstream")) {
downstream = false;
} else if (annotationPositionValue.equals("surrounding or downstream")) {
upstream = false;
} else if (annotationPositionValue.equals("upstream")) {
surrounding = false;
downstream = false;
} else if (annotationPositionValue.equals("downstream")) {
surrounding = false;
upstream = false;
} else if (annotationPositionValue.equals("closest")) {
// Leave things as they are!
} else if (annotationPositionValue.equals("name matched")) {
matchname = true;
upstream = false;
surrounding = false;
downstream = false;
} else {
System.err.println("Didn't recognise position value '" + annotationPositionValue + "'");
}
// surrounding.
if (!annotationPositionValue.equals("surrounding")) {
if (annotationLimit.getText().length() > 0) {
distanceLimit = Integer.parseInt(annotationLimit.getText());
}
}
// Since we're going to be making the annotations on the
// basis of position we should go through all probes one
// chromosome at a time.
Feature[] features = null;
Chromosome lastChr = null;
// We can now step through the probes looking for the best feature match
for (int p = 0; p < probes.length; p++) {
if (cancel) {
progressCancelled();
return;
}
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (!probes[p].chromosome().equals(lastChr)) {
features = collection.genome().annotationCollection().getFeaturesForType(probes[p].chromosome(), annotationTypeValue);
lastChr = probes[p].chromosome();
}
String nameWithoutExtensions = "";
String nameWithoutTranscript = "";
if (matchname) {
nameWithoutExtensions = probes[p].name().replaceFirst("_upstream$", "").replaceAll("_downstream$", "").replaceAll("_gene$", "");
nameWithoutTranscript = nameWithoutExtensions.replaceAll("-\\d\\d\\d$", "");
}
Feature bestFeature = null;
int closestDistance = 0;
for (int f = 0; f < features.length; f++) {
if (matchname) {
// Simplest check is if the name matches exactly
if (features[f].name().equals(probes[p].name()) || features[f].name().equals(nameWithoutExtensions) || features[f].name().equals(nameWithoutTranscript)) {
bestFeature = features[f];
closestDistance = 0;
break;
}
}
if (surrounding) {
if (probes[p].start() <= features[f].location().end() && probes[p].end() >= features[f].location().start()) {
bestFeature = features[f];
closestDistance = 0;
// Once we've found an overlapping feature we quit.
break;
}
}
if (downstream) {
// Check if the feature is downstream
// Get the distance to the start
int d = 0;
if (features[f].location().strand() == Location.FORWARD) {
d = features[f].location().start() - probes[p].end();
} else {
d = probes[p].start() - features[f].location().end();
}
if (d >= 0) {
if (d > distanceLimit || (bestFeature != null && d > closestDistance)) {
continue;
}
// See if this is the closest feature we have so far...
if (bestFeature == null || d < closestDistance) {
bestFeature = features[f];
closestDistance = d;
}
continue;
}
}
if (upstream) {
// Check if the feature is upstream
// Get the distance to the start
int d = 0;
if (features[f].location().strand() == Location.FORWARD) {
d = probes[p].start() - features[f].location().end();
} else {
d = features[f].location().start() - probes[p].end();
}
if (d >= 0) {
if (d > distanceLimit || (bestFeature != null && d > closestDistance)) {
continue;
}
// See if this is the closest feature we have so far...
if (bestFeature == null || d < closestDistance) {
bestFeature = features[f];
closestDistance = d;
}
continue;
}
}
}
if (bestFeature == null) {
continue;
}
probeAnnotations.put(probes[p], bestFeature);
}
if (!includeAll) {
// We need to filter the interaction list to include only those which
// have annotations on both probes
Vector<InteractionProbePair> filteredInteractions = new Vector<InteractionProbePair>();
for (int i = 0; i < interactions.length; i++) {
if (probeAnnotations.containsKey(interactions[i].probe1()) && probeAnnotations.containsKey(interactions[i].probe2())) {
filteredInteractions.add(interactions[i]);
}
}
interactions = filteredInteractions.toArray(new InteractionProbePair[0]);
}
TableModel model = new AnnotationTableModel();
reportComplete(model);
}
Aggregations