use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.
the class BinomialFilterForRev method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
boolean aboveOnly = false;
boolean belowOnly = false;
if (options.directionBox.getSelectedItem().equals("Above"))
aboveOnly = true;
else if (options.directionBox.getSelectedItem().equals("Below"))
belowOnly = true;
if (options.stringencyField.getText().length() == 0) {
stringency = 0.05;
} else {
stringency = Double.parseDouble(options.stringencyField.getText());
}
if (options.minObservationsField.getText().length() == 0) {
minObservations = 10;
} else {
minObservations = Integer.parseInt(options.minObservationsField.getText());
}
if (options.minDifferenceField.getText().length() == 0) {
minPercentShift = 10;
} else {
minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
}
useCurrentQuant = options.useCurrentQuantBox.isSelected();
applyMultipleTestingCorrection = options.multiTestBox.isSelected();
ProbeList newList;
if (applyMultipleTestingCorrection) {
newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
} else {
newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
}
Probe[] probes = startingList.getAllProbes();
// We need to create a set of mean end methylation values for all starting values
// We found to the nearest percent so we'll end up with a set of 101 values (0-100)
// which are the expected end points
double[] expectedEnds = calculateEnds(probes);
// They cancelled whilst calculating.
if (expectedEnds == null)
return;
for (int i = 0; i < expectedEnds.length; i++) {
System.err.println("" + i + "\t" + expectedEnds[i]);
}
// This is where we'll store any hits
Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
BinomialTest bt = new BinomialTest();
AlternativeHypothesis hypothesis = AlternativeHypothesis.TWO_SIDED;
if (aboveOnly)
hypothesis = AlternativeHypothesis.GREATER_THAN;
if (belowOnly)
hypothesis = AlternativeHypothesis.LESS_THAN;
for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
long[] reads = fromStore.getReadsForProbe(probes[p]);
int forCount = 0;
int revCount = 0;
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forCount;
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++revCount;
}
}
if (forCount + revCount < minObservations)
continue;
int fromPercent = Math.round((forCount * 100f) / (forCount + revCount));
// We need to calculate the confidence range for the from reads and work
// out the most pessimistic value we could take as a starting value
WilsonScoreInterval wi = new WilsonScoreInterval();
ConfidenceInterval ci = wi.createInterval(forCount + revCount, forCount, 1 - stringency);
// System.err.println("From percent="+fromPercent+" meth="+forCount+" unmeth="+revCount+" sig="+stringency+" ci="+ci.getLowerBound()*100+" - "+ci.getUpperBound()*100);
reads = toStore.getReadsForProbe(probes[p]);
forCount = 0;
revCount = 0;
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forCount;
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++revCount;
}
}
if (forCount + revCount < minObservations)
continue;
float toPercent = (forCount * 100f) / (forCount + revCount);
// System.err.println("Observed toPercent is "+toPercent+ "from meth="+forCount+" unmeth="+revCount+" and true predicted is "+expectedEnds[Math.round(toPercent)]);
// Find the most pessimistic fromPercent such that the expected toPercent is as close
// to the observed value based on the confidence interval we calculated before.
double worseCaseExpectedPercent = 0;
double smallestTheoreticalToActualDiff = 100;
// Just taking the abs diff can still leave us with a closest value which is still
// quite far from where we are. We therefore also check if our confidence interval
// gives us a potential value range which spans the actual value, and if it does we
// fail it without even running the test.
boolean seenLower = false;
boolean seenHigher = false;
for (int m = Math.max((int) Math.floor(ci.getLowerBound() * 100), 0); m <= Math.min((int) Math.ceil(ci.getUpperBound() * 100), 100); m++) {
double expectedPercent = expectedEnds[m];
double diff = expectedPercent - toPercent;
if (diff <= 0)
seenLower = true;
if (diff >= 0)
seenHigher = true;
if (Math.abs(diff) < smallestTheoreticalToActualDiff) {
worseCaseExpectedPercent = expectedPercent;
smallestTheoreticalToActualDiff = Math.abs(diff);
}
}
// Sanity check
if (smallestTheoreticalToActualDiff > Math.abs((toPercent - expectedEnds[Math.round(fromPercent)]))) {
throw new IllegalStateException("Can't have a worst case which is better than the actual");
}
if (Math.abs(toPercent - worseCaseExpectedPercent) < minPercentShift)
continue;
// If they want to use the current quantitation as well then we can do that calculation now.
if (useCurrentQuant) {
try {
if (Math.abs(toStore.getValueForProbe(probes[p]) - expectedEnds[Math.round(fromStore.getValueForProbe(probes[p]))]) < minPercentShift)
continue;
} catch (SeqMonkException sme) {
throw new IllegalStateException(sme);
}
}
// Check the directionality
if (aboveOnly && worseCaseExpectedPercent - toPercent > 0)
continue;
if (belowOnly && worseCaseExpectedPercent - toPercent < 0)
continue;
// Now perform the Binomial test.
double pValue = bt.binomialTest(forCount + revCount, forCount, worseCaseExpectedPercent / 100d, hypothesis);
// Our confidence range spanned the actual value we had so we can't be significant
if (seenLower && seenHigher)
pValue = 0.5;
// System.err.println("P value is "+pValue);
// Store this as a potential hit (after correcting p-values later)
hits.add(new ProbeTTestValue(probes[p], pValue));
}
// Now we can correct the p-values if we need to
ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
if (applyMultipleTestingCorrection) {
// System.err.println("Correcting for "+rawHits.length+" tests");
BenjHochFDR.calculateQValues(rawHits);
}
for (int h = 0; h < rawHits.length; h++) {
if (applyMultipleTestingCorrection) {
if (rawHits[h].q < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
}
} else {
if (rawHits[h].p < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.
the class ChiSquareFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
if (options.stringencyField.getText().length() == 0) {
stringency = 0.05;
} else {
stringency = Double.parseDouble(options.stringencyField.getText());
}
if (options.minObservationsField.getText().length() == 0) {
minObservations = 10;
} else {
minObservations = Integer.parseInt(options.minObservationsField.getText());
}
if (options.minDifferenceField.getText().length() == 0) {
minPercentShift = 10;
} else {
minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
}
applyMultipleTestingCorrection = options.multiTestBox.isSelected();
pairs = options.getSelectedPairs();
ProbeList newList;
if (applyMultipleTestingCorrection) {
newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
} else {
newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
}
Probe[] probes = startingList.getAllProbes();
int[][] pairCounts = new int[pairs.length][2];
// This is where we'll store any hits
Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
// For each dataset make up a list of forward and reverse probes under this probe
for (int d = 0; d < pairs.length; d++) {
pairCounts[d][0] = pairs[d].store1.getReadsForProbe(probes[p]).length;
pairCounts[d][1] = pairs[d].store2.getReadsForProbe(probes[p]).length;
}
// See if we have enough counts and difference to go on with this
double minPercent = 0;
double maxPercent = 0;
for (int d = 0; d < pairs.length; d++) {
if (pairCounts[d][0] + pairCounts[d][1] < minObservations) {
continue PROBE;
}
double percent = (((double) pairCounts[d][0]) / (pairCounts[d][0] + pairCounts[d][1])) * 100;
if (d == 0 || percent < minPercent)
minPercent = percent;
if (d == 0 || percent > maxPercent)
maxPercent = percent;
}
if (maxPercent - minPercent < minPercentShift)
continue PROBE;
// Now perform the Chi-Square test.
double pValue = ChiSquareTest.chiSquarePvalue(pairCounts);
// Store this as a potential hit (after correcting p-values later)
hits.add(new ProbeTTestValue(probes[p], pValue));
}
// Now we can correct the p-values if we need to
ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(rawHits);
}
for (int h = 0; h < rawHits.length; h++) {
if (applyMultipleTestingCorrection) {
if (rawHits[h].q < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
}
} else {
if (rawHits[h].p < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeTTestValue in project SeqMonk by s-andrews.
the class ChiSquareFilterForRev method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
if (options.stringencyField.getText().length() == 0) {
stringency = 0.05;
} else {
stringency = Double.parseDouble(options.stringencyField.getText());
}
if (options.minObservationsField.getText().length() == 0) {
minObservations = 10;
} else {
minObservations = Integer.parseInt(options.minObservationsField.getText());
}
if (options.minDifferenceField.getText().length() == 0) {
minPercentShift = 10;
} else {
minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
}
applyMultipleTestingCorrection = options.multiTestBox.isSelected();
resample = options.resampleBox.isSelected();
Probe[] probes = startingList.getAllProbes();
if (resample) {
// Do a sanity check that the current quantitation actually looks like
// percentage values.
progressUpdated("Checking current quantitations", 0, 1);
for (int p = 0; p < probes.length; p++) {
if (cancel) {
progressCancelled();
return;
}
try {
for (int d = 0; d < stores.length; d++) {
float value = stores[d].getValueForProbe(probes[p]);
// NaN is OK
if (Float.isNaN(value))
continue;
if (value < 0 || value > 100) {
progressExceptionReceived(new SeqMonkException("For resampling quantitations must be between 0 and 100. Probe " + probes[p].name() + " had value " + value + " in " + stores[d].name()));
return;
}
}
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
}
ProbeList newList;
if (applyMultipleTestingCorrection) {
newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
} else {
newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
}
int[][] forRevCounts = new int[stores.length][2];
// This is where we'll store any hits
Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
// For each dataset make up a list of forward and reverse probes under this probe
for (int d = 0; d < stores.length; d++) {
long[] reads = stores[d].getReadsForProbe(probes[p]);
forRevCounts[d][0] = 0;
forRevCounts[d][1] = 0;
if (resample) {
try {
float value = stores[d].getValueForProbe(probes[p]);
// We redistribute the total count based on the percentage value
// in the quantitation
int methCount = Math.round((reads.length * value) / 100);
int unmethCount = reads.length - methCount;
forRevCounts[d][0] = methCount;
forRevCounts[d][1] = unmethCount;
} catch (SeqMonkException sme) {
continue PROBE;
}
} else {
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forRevCounts[d][0];
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++forRevCounts[d][1];
}
}
}
}
// See if we have enough counts and difference to go on with this
double minPercent = 0;
double maxPercent = 0;
for (int d = 0; d < stores.length; d++) {
if (forRevCounts[d][0] + forRevCounts[d][1] < minObservations) {
continue PROBE;
}
double percent = (((double) forRevCounts[d][0]) / (forRevCounts[d][0] + forRevCounts[d][1])) * 100;
if (d == 0 || percent < minPercent)
minPercent = percent;
if (d == 0 || percent > maxPercent)
maxPercent = percent;
}
if (maxPercent - minPercent < minPercentShift)
continue PROBE;
// Now perform the Chi-Square test.
double pValue = ChiSquareTest.chiSquarePvalue(forRevCounts);
// Store this as a potential hit (after correcting p-values later)
hits.add(new ProbeTTestValue(probes[p], pValue));
}
// Now we can correct the p-values if we need to
ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(rawHits);
}
for (int h = 0; h < rawHits.length; h++) {
if (applyMultipleTestingCorrection) {
if (rawHits[h].q < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
}
} else {
if (rawHits[h].p < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
}
}
}
filterFinished(newList);
}
Aggregations