use of org.apache.commons.math3.stat.interval.ConfidenceInterval in project SeqMonk by s-andrews.
the class BinomialFilterForRev method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
boolean aboveOnly = false;
boolean belowOnly = false;
if (options.directionBox.getSelectedItem().equals("Above"))
aboveOnly = true;
else if (options.directionBox.getSelectedItem().equals("Below"))
belowOnly = true;
if (options.stringencyField.getText().length() == 0) {
stringency = 0.05;
} else {
stringency = Double.parseDouble(options.stringencyField.getText());
}
if (options.minObservationsField.getText().length() == 0) {
minObservations = 10;
} else {
minObservations = Integer.parseInt(options.minObservationsField.getText());
}
if (options.minDifferenceField.getText().length() == 0) {
minPercentShift = 10;
} else {
minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
}
useCurrentQuant = options.useCurrentQuantBox.isSelected();
applyMultipleTestingCorrection = options.multiTestBox.isSelected();
ProbeList newList;
if (applyMultipleTestingCorrection) {
newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
} else {
newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
}
Probe[] probes = startingList.getAllProbes();
// We need to create a set of mean end methylation values for all starting values
// We found to the nearest percent so we'll end up with a set of 101 values (0-100)
// which are the expected end points
double[] expectedEnds = calculateEnds(probes);
// They cancelled whilst calculating.
if (expectedEnds == null)
return;
for (int i = 0; i < expectedEnds.length; i++) {
System.err.println("" + i + "\t" + expectedEnds[i]);
}
// This is where we'll store any hits
Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
BinomialTest bt = new BinomialTest();
AlternativeHypothesis hypothesis = AlternativeHypothesis.TWO_SIDED;
if (aboveOnly)
hypothesis = AlternativeHypothesis.GREATER_THAN;
if (belowOnly)
hypothesis = AlternativeHypothesis.LESS_THAN;
for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
long[] reads = fromStore.getReadsForProbe(probes[p]);
int forCount = 0;
int revCount = 0;
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forCount;
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++revCount;
}
}
if (forCount + revCount < minObservations)
continue;
int fromPercent = Math.round((forCount * 100f) / (forCount + revCount));
// We need to calculate the confidence range for the from reads and work
// out the most pessimistic value we could take as a starting value
WilsonScoreInterval wi = new WilsonScoreInterval();
ConfidenceInterval ci = wi.createInterval(forCount + revCount, forCount, 1 - stringency);
// System.err.println("From percent="+fromPercent+" meth="+forCount+" unmeth="+revCount+" sig="+stringency+" ci="+ci.getLowerBound()*100+" - "+ci.getUpperBound()*100);
reads = toStore.getReadsForProbe(probes[p]);
forCount = 0;
revCount = 0;
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forCount;
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++revCount;
}
}
if (forCount + revCount < minObservations)
continue;
float toPercent = (forCount * 100f) / (forCount + revCount);
// System.err.println("Observed toPercent is "+toPercent+ "from meth="+forCount+" unmeth="+revCount+" and true predicted is "+expectedEnds[Math.round(toPercent)]);
// Find the most pessimistic fromPercent such that the expected toPercent is as close
// to the observed value based on the confidence interval we calculated before.
double worseCaseExpectedPercent = 0;
double smallestTheoreticalToActualDiff = 100;
// Just taking the abs diff can still leave us with a closest value which is still
// quite far from where we are. We therefore also check if our confidence interval
// gives us a potential value range which spans the actual value, and if it does we
// fail it without even running the test.
boolean seenLower = false;
boolean seenHigher = false;
for (int m = Math.max((int) Math.floor(ci.getLowerBound() * 100), 0); m <= Math.min((int) Math.ceil(ci.getUpperBound() * 100), 100); m++) {
double expectedPercent = expectedEnds[m];
double diff = expectedPercent - toPercent;
if (diff <= 0)
seenLower = true;
if (diff >= 0)
seenHigher = true;
if (Math.abs(diff) < smallestTheoreticalToActualDiff) {
worseCaseExpectedPercent = expectedPercent;
smallestTheoreticalToActualDiff = Math.abs(diff);
}
}
// Sanity check
if (smallestTheoreticalToActualDiff > Math.abs((toPercent - expectedEnds[Math.round(fromPercent)]))) {
throw new IllegalStateException("Can't have a worst case which is better than the actual");
}
if (Math.abs(toPercent - worseCaseExpectedPercent) < minPercentShift)
continue;
// If they want to use the current quantitation as well then we can do that calculation now.
if (useCurrentQuant) {
try {
if (Math.abs(toStore.getValueForProbe(probes[p]) - expectedEnds[Math.round(fromStore.getValueForProbe(probes[p]))]) < minPercentShift)
continue;
} catch (SeqMonkException sme) {
throw new IllegalStateException(sme);
}
}
// Check the directionality
if (aboveOnly && worseCaseExpectedPercent - toPercent > 0)
continue;
if (belowOnly && worseCaseExpectedPercent - toPercent < 0)
continue;
// Now perform the Binomial test.
double pValue = bt.binomialTest(forCount + revCount, forCount, worseCaseExpectedPercent / 100d, hypothesis);
// Our confidence range spanned the actual value we had so we can't be significant
if (seenLower && seenHigher)
pValue = 0.5;
// System.err.println("P value is "+pValue);
// Store this as a potential hit (after correcting p-values later)
hits.add(new ProbeTTestValue(probes[p], pValue));
}
// Now we can correct the p-values if we need to
ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
if (applyMultipleTestingCorrection) {
// System.err.println("Correcting for "+rawHits.length+" tests");
BenjHochFDR.calculateQValues(rawHits);
}
for (int h = 0; h < rawHits.length; h++) {
if (applyMultipleTestingCorrection) {
if (rawHits[h].q < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
}
} else {
if (rawHits[h].p < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
}
}
}
filterFinished(newList);
}
Aggregations