use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class CodonBiasPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
double pValue = optionsPanel.pValue();
String libraryType = optionsPanel.libraryType();
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Making probes for chr" + chrs[c].name(), c, chrs.length * 2);
Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
Probe p = new Probe(chrs[c], features[f].location().start(), features[f].location().end(), features[f].location().strand(), features[f].name());
probes.add(p);
}
}
allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
// Now we can quantitate each individual feature and test for whether it is significantly
// showing codon bias
ArrayList<Vector<ProbeTTestValue>> significantProbes = new ArrayList<Vector<ProbeTTestValue>>();
// data contains the data stores that this pipeline is going to use. We need to test each data store.
for (int d = 0; d < data.length; d++) {
significantProbes.add(new Vector<ProbeTTestValue>());
}
int probeCounter = 0;
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
for (int p = 0; p < features.length; p++) {
// Get the corresponding feature and work out the mapping between genomic position and codon sub position.
int[] mappingArray = createGenomeMappingArray(features[p]);
DATASTORE_LOOP: for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
long[] reads = data[d].getReadsForProbe(allProbes[probeCounter]);
// TODO: make this configurable
if (reads.length < 5) {
data[d].setValueForProbe(allProbes[probeCounter], Float.NaN);
continue DATASTORE_LOOP;
}
int pos1Count = 0;
int pos2Count = 0;
int pos3Count = 0;
READ_LOOP: for (int r = 0; r < reads.length; r++) {
int genomicReadStart = SequenceRead.start(reads[r]);
int genomicReadEnd = SequenceRead.end(reads[r]);
int readStrand = SequenceRead.strand(reads[r]);
int relativeReadStart = -1;
// forward reads
if (readStrand == 1) {
if (libraryType == "Same strand specific") {
if (features[p].location().strand() == Location.FORWARD) {
// The start of the read needs to be within the feature
if (genomicReadStart - features[p].location().start() < 0) {
continue READ_LOOP;
} else {
// look up the read start pos in the mapping array
relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
}
}
} else if (libraryType == "Opposing strand specific") {
if (features[p].location().strand() == Location.REVERSE) {
// The "start" of a reverse read/probe is actually the end
if (features[p].location().end() - genomicReadEnd < 0) {
continue READ_LOOP;
} else {
relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
}
}
}
}
// reverse reads
if (readStrand == -1) {
if (libraryType == "Same strand specific") {
if (features[p].location().strand() == Location.REVERSE) {
if (features[p].location().end() - genomicReadEnd < 0) {
continue READ_LOOP;
} else {
// System.out.println("features[p].location().end() is " + features[p].location().end() + ", genomicReadEnd is " + genomicReadEnd);
// System.out.println("mapping array[0] is " + mappingArray[0]);
relativeReadStart = mappingArray[features[p].location().end() - genomicReadEnd];
}
}
} else if (libraryType == "Opposing strand specific") {
if (features[p].location().strand() == Location.FORWARD) {
// The start of the read needs to be within the feature
if (genomicReadStart - features[p].location().start() < 0) {
continue READ_LOOP;
} else {
// look up the read start position in the mapping array
relativeReadStart = mappingArray[genomicReadStart - features[p].location().start()];
}
}
}
}
// find out which position the read is in
if (relativeReadStart == -1) {
continue READ_LOOP;
} else if (relativeReadStart % 3 == 0) {
pos3Count++;
continue READ_LOOP;
} else if ((relativeReadStart + 1) % 3 == 0) {
pos2Count++;
continue READ_LOOP;
} else if ((relativeReadStart + 2) % 3 == 0) {
pos1Count++;
}
}
// closing bracket for read loop
// System.out.println("pos1Count for "+ features[p].name() + " is " + pos1Count);
// System.out.println("pos2Count for "+ features[p].name() + " is " + pos2Count);
// System.out.println("pos3Count for "+ features[p].name() + " is " + pos3Count);
int interestingCodonCount = 0;
int otherCodonCount = 0;
if (optionsPanel.codonSubPosition() == 1) {
interestingCodonCount = pos1Count;
otherCodonCount = pos2Count + pos3Count;
} else if (optionsPanel.codonSubPosition() == 2) {
interestingCodonCount = pos2Count;
otherCodonCount = pos1Count + pos3Count;
} else if (optionsPanel.codonSubPosition() == 3) {
interestingCodonCount = pos3Count;
otherCodonCount = pos1Count + pos2Count;
}
int totalCount = interestingCodonCount + otherCodonCount;
// BinomialDistribution bd = new BinomialDistribution(interestingCodonCount+otherCodonCount, 1/3d);
BinomialDistribution bd = new BinomialDistribution(totalCount, 1 / 3d);
// Since the binomial distribution gives the probability of getting a value higher than
// this we need to subtract one so we get the probability of this or higher.
double thisPValue = 1 - bd.cumulativeProbability(interestingCodonCount - 1);
if (interestingCodonCount == 0)
thisPValue = 1;
// We have to add all results at this stage so we don't mess up the multiple
// testing correction later on.
significantProbes.get(d).add(new ProbeTTestValue(allProbes[probeCounter], thisPValue));
float percentageCount;
if (totalCount == 0) {
percentageCount = 0;
} else {
percentageCount = ((float) interestingCodonCount / (float) totalCount) * 100;
}
data[d].setValueForProbe(allProbes[probeCounter], percentageCount);
// System.out.println("totalCount = " + totalCount);
// System.out.println("interestingCodonCount " + interestingCodonCount);
// System.out.println("pValue = " + thisPValue);
// System.out.println("percentageCount = " + percentageCount);
// System.out.println("");
}
probeCounter++;
}
}
// filtering those which pass our p-value cutoff
for (int d = 0; d < data.length; d++) {
ProbeTTestValue[] ttestResults = significantProbes.get(d).toArray(new ProbeTTestValue[0]);
BenjHochFDR.calculateQValues(ttestResults);
ProbeList newList = new ProbeList(collection().probeSet(), "Codon bias < " + pValue + " in " + data[d].name(), "Probes showing significant codon bias for position " + optionsPanel.codonSubPosition() + " with a cutoff of " + pValue, "FDR");
for (int i = 0; i < ttestResults.length; i++) {
if (ttestResults[i].q < pValue) {
newList.addProbe(ttestResults[i].probe, (float) ttestResults[i].q);
}
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Codon bias pipeline using codon position " + optionsPanel.codonSubPosition() + " for " + optionsPanel.libraryType() + " library.");
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class TranscriptionTerminationPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
int minCount = optionsPanel.minCount();
int measurementLength = optionsPanel.measurementLength();
QuantitationStrandType readFilter = optionsPanel.readFilter();
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Creating Probes" + chrs[c].name(), c, chrs.length * 2);
Feature[] features = getValidFeatures(chrs[c], measurementLength);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
if (features[f].location().strand() == Location.REVERSE) {
Probe p = new Probe(chrs[c], features[f].location().start() - measurementLength, features[f].location().start() + measurementLength, features[f].location().strand(), features[f].name());
probes.add(p);
} else {
Probe p = new Probe(chrs[c], features[f].location().end() - measurementLength, features[f].location().end() + measurementLength, features[f].location().strand(), features[f].name());
probes.add(p);
}
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
collection().setProbeSet(new ProbeSet("Features " + measurementLength + "bp around the end of " + optionsPanel.getSelectedFeatureType(), allProbes));
int probeIndex = 0;
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
Feature[] features = getValidFeatures(chrs[c], measurementLength);
for (int f = 0; f < features.length; f++) {
if (cancel) {
progressCancelled();
return;
}
for (int d = 0; d < data.length; d++) {
if (allProbes[probeIndex].strand() == Location.REVERSE) {
Probe downstreamProbe = new Probe(chrs[c], features[f].location().start() - measurementLength, features[f].location().start(), features[f].location().strand(), features[f].name());
Probe upstreamProbe = new Probe(chrs[c], features[f].location().start(), features[f].location().start() + measurementLength, features[f].location().strand(), features[f].name());
long[] upstreamReads = data[d].getReadsForProbe(upstreamProbe);
long[] downstreamReads = data[d].getReadsForProbe(downstreamProbe);
int upstreamCount = 0;
for (int i = 0; i < upstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], upstreamReads[i]))
++upstreamCount;
}
int downstreamCount = 0;
for (int i = 0; i < downstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], downstreamReads[i]))
++downstreamCount;
}
float percentage = ((upstreamCount - downstreamCount) / (float) upstreamCount) * 100f;
if (upstreamCount >= minCount) {
data[d].setValueForProbe(allProbes[probeIndex], percentage);
} else {
data[d].setValueForProbe(allProbes[probeIndex], Float.NaN);
}
} else {
Probe upstreamProbe = new Probe(chrs[c], features[f].location().end() - measurementLength, features[f].location().end(), features[f].location().strand(), features[f].name());
Probe downstreamProbe = new Probe(chrs[c], features[f].location().end(), features[f].location().end() + measurementLength, features[f].location().strand(), features[f].name());
long[] upstreamReads = data[d].getReadsForProbe(upstreamProbe);
long[] downstreamReads = data[d].getReadsForProbe(downstreamProbe);
int upstreamCount = 0;
for (int i = 0; i < upstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], upstreamReads[i]))
++upstreamCount;
}
int downstreamCount = 0;
for (int i = 0; i < downstreamReads.length; i++) {
if (readFilter.useRead(allProbes[probeIndex], downstreamReads[i]))
++downstreamCount;
}
float percentage = ((upstreamCount - downstreamCount) / (float) upstreamCount) * 100f;
if (upstreamCount >= minCount) {
data[d].setValueForProbe(allProbes[probeIndex], percentage);
} else {
data[d].setValueForProbe(allProbes[probeIndex], Float.NaN);
}
}
}
++probeIndex;
}
}
StringBuffer quantitationDescription = new StringBuffer();
quantitationDescription.append("Transcription termination pipeline quantitation ");
quantitationDescription.append(". Directionality was ");
quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
quantitationDescription.append(". Measurement length was ");
quantitationDescription.append(optionsPanel.measurementLength());
quantitationDescription.append(". Min count was ");
quantitationDescription.append(optionsPanel.minCount());
collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class BoxWhiskerFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
Hashtable<Probe, Integer> hitCounts = new Hashtable<Probe, Integer>();
for (int s = 0; s < stores.length; s++) {
progressUpdated("Processing " + stores[s].name(), s, stores.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
BoxWhisker bw;
try {
bw = new BoxWhisker(stores[s], startingList, stringency);
} catch (SeqMonkException e) {
System.err.println("Ignoring unquantitated dataset");
e.printStackTrace();
continue;
}
if (useUpper) {
Probe[] p = bw.upperProbeOutliers();
if (cancel) {
cancel = false;
progressCancelled();
return;
}
for (int i = 0; i < p.length; i++) {
if (hitCounts.containsKey(p[i])) {
hitCounts.put(p[i], hitCounts.get(p[i]).intValue() + 1);
} else {
hitCounts.put(p[i], 1);
}
}
}
if (useLower) {
Probe[] p = bw.lowerProbeOutliers();
for (int i = 0; i < p.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (hitCounts.containsKey(p[i])) {
hitCounts.put(p[i], hitCounts.get(p[i]).intValue() + 1);
} else {
hitCounts.put(p[i], 1);
}
}
}
}
// Now we can go through the probes which hit and see if
// we had enough hits to put them into our final list.
Enumeration<Probe> candidates = hitCounts.keys();
while (candidates.hasMoreElements()) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
Probe candidate = candidates.nextElement();
int count = hitCounts.get(candidate).intValue();
// probe to the probe set.
switch(filterType) {
case EXACTLY:
if (count == storeCutoff)
newList.addProbe(candidate, null);
break;
case AT_LEAST:
if (count >= storeCutoff)
newList.addProbe(candidate, null);
break;
case NO_MORE_THAN:
if (count <= storeCutoff)
newList.addProbe(candidate, null);
break;
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class VariancePlotPanel method calculateNonredundantSet.
/**
* This collapses individual points which are over the same
* pixel when redrawing the plot at a different scale
*/
private synchronized void calculateNonredundantSet() {
closestPoint = null;
ProbePairValue[][] grid = new ProbePairValue[getWidth()][getHeight()];
Probe[] probes = probeList.getAllProbes();
try {
for (int p = 0; p < probes.length; p++) {
float xValue = repSet.getValueForProbeExcludingUnmeasured(probes[p]);
float yValue = getYValue(probes[p]);
if (Float.isNaN(xValue) || Float.isInfinite(xValue) || Float.isNaN(yValue) || Float.isInfinite(yValue)) {
continue;
}
int x = getX(xValue);
int y = getY(yValue);
if (grid[x][y] == null) {
grid[x][y] = new ProbePairValue(xValue, yValue, x, y);
grid[x][y].setProbe(probes[p]);
} else {
// belong to
if (subLists == null)
grid[x][y].count++;
// As we have multiple probes at this point we remove the
// specific probe annotation.
grid[x][y].setProbe(null);
}
}
if (subLists != null) {
for (int s = 0; s < subLists.length; s++) {
Probe[] subListProbes = subLists[s].getAllProbes();
for (int p = 0; p < subListProbes.length; p++) {
float xValue = repSet.getValueForProbeExcludingUnmeasured(subListProbes[p]);
float yValue = getYValue(subListProbes[p]);
int x = getX(xValue);
int y = getY(yValue);
if (grid[x][y] == null) {
// This messes up where we catch it in the middle of a redraw
continue;
// throw new IllegalArgumentException("Found subList position not in main list");
}
// 1 = no list so 2 is the lowest sublist index
grid[x][y].count = s + 2;
}
}
}
} catch (SeqMonkException e) {
throw new IllegalStateException(e);
}
// Now we need to put all of the ProbePairValues into
// a single array;
int count = 0;
for (int x = 0; x < grid.length; x++) {
for (int y = 0; y < grid[x].length; y++) {
if (grid[x][y] != null)
count++;
}
}
ProbePairValue[] nonred = new ProbePairValue[count];
count--;
for (int x = 0; x < grid.length; x++) {
for (int y = 0; y < grid[x].length; y++) {
if (grid[x][y] != null) {
nonred[count] = grid[x][y];
count--;
}
}
}
Arrays.sort(nonred);
// Work out the 95% percentile count
int minCount = 1;
int maxCount = 2;
if (nonred.length > 0) {
minCount = nonred[0].count;
maxCount = nonred[((nonred.length - 1) * 95) / 100].count;
}
// Go through every nonred assigning a suitable colour
ColourGradient gradient = new HotColdColourGradient();
for (int i = 0; i < nonred.length; i++) {
if (subLists == null) {
nonred[i].color = gradient.getColor(nonred[i].count, minCount, maxCount);
} else {
if (nonred[i].count > subLists.length + 1) {
throw new IllegalArgumentException("Count above threshold when showing sublists");
}
if (nonred[i].count == 1) {
nonred[i].color = VERY_LIGHT_GREY;
} else {
nonred[i].color = ColourIndexSet.getColour(nonred[i].count - 2);
}
}
}
nonRedundantValues = nonred;
lastNonredWidth = getWidth();
lastNonredHeight = getHeight();
// System.out.println("Nonred was "+nonRedundantValues.length+" from "+probes.length);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class IntensityDifferenceFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
Probe[] probes = startingList.getAllProbes();
// We'll pull the number of probes to sample from the preferences if they've changed it
Integer updatedProbesPerSet = optionsPanel.probesPerSet();
if (updatedProbesPerSet != null)
probesPerSet = updatedProbesPerSet;
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
// We'll build up a set of p-values as we go along
float[] lowestPValues = new float[probes.length];
for (int p = 0; p < lowestPValues.length; p++) {
lowestPValues[p] = 1;
}
// This is going to be the temporary array we populate with the set of
// differences we are going to analyse.
double[] currentDiffSet = new double[probesPerSet];
// First work out the set of comparisons we're going to make
Vector<SingleComparison> comparisons = new Vector<IntensityDifferenceFilter.SingleComparison>();
for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
if (fromStores[fromIndex] == toStores[toIndex])
continue;
// If we can find the fromStore in the toStores we've already done and the
// toStore anywhere in the fromStores then we can skip this.
boolean canSkip = false;
for (int i = 0; i < fromIndex; i++) {
if (fromStores[i] == toStores[toIndex]) {
for (int j = 0; j < toStores.length; j++) {
if (toStores[j] == fromStores[fromIndex]) {
canSkip = true;
break;
}
}
break;
}
}
if (canSkip)
continue;
comparisons.add(new SingleComparison(fromIndex, toIndex));
}
}
// Put something in the progress whilst we're ordering the probe values to make
// the comparison.
progressUpdated("Generating background model", 0, 1);
for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
// We need to generate a set of probe indices ordered by their average intensity
Integer[] indices = new Integer[probes.length];
for (int i = 0; i < probes.length; i++) {
indices[i] = i;
}
Comparator<Integer> comp = new AverageIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
Arrays.sort(indices, comp);
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
IndexTTestValue[] currentPValues = new IndexTTestValue[indices.length];
for (int i = 0; i < indices.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (i % 1000 == 0) {
int progress = (i * 100) / indices.length;
progress += 100 * comparisonIndex;
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
}
// We need to make up the set of differences to represent this probe
int startingIndex = i - (probesPerSet / 2);
if (startingIndex < 0)
startingIndex = 0;
if (startingIndex + (probesPerSet + 1) >= probes.length)
startingIndex = probes.length - (probesPerSet + 1);
try {
for (int j = startingIndex; j < startingIndex + (probesPerSet + 1); j++) {
if (// Don't include the point being tested in the background model
j == startingIndex)
// Don't include the point being tested in the background model
continue;
else if (j < startingIndex) {
currentDiffSet[j - startingIndex] = fromStores[fromIndex].getValueForProbe(probes[indices[j]]) - toStores[toIndex].getValueForProbe(probes[indices[j]]);
} else {
currentDiffSet[(j - startingIndex) - 1] = fromStores[fromIndex].getValueForProbe(probes[indices[j]]) - toStores[toIndex].getValueForProbe(probes[indices[j]]);
}
}
// Should we fix the mean at 0?
double mean = 0;
// double mean = SimpleStats.mean(currentDiffSet);
double stdev = SimpleStats.stdev(currentDiffSet, mean);
if (stdev == 0) {
currentPValues[indices[i]] = new IndexTTestValue(indices[i], 1);
continue;
}
// Get the difference for this point
double diff = fromStores[fromIndex].getValueForProbe(probes[indices[i]]) - toStores[toIndex].getValueForProbe(probes[indices[i]]);
NormalDistribution nd = new NormalDistribution(mean, stdev);
double significance;
if (diff < mean) {
significance = nd.cumulativeProbability(diff);
} else {
significance = 1 - nd.cumulativeProbability(diff);
}
currentPValues[indices[i]] = new IndexTTestValue(indices[i], significance);
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
// We now need to correct the set of pValues
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(currentPValues);
}
// the combined set
if (applyMultipleTestingCorrection) {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
}
}
} else {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
}
}
}
}
// pass the filter.
for (int i = 0; i < lowestPValues.length; i++) {
if (lowestPValues[i] < pValueLimit) {
newList.addProbe(probes[i], lowestPValues[i]);
}
}
filterFinished(newList);
}
Aggregations