use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class WindowedValuesFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
for (int c = 0; c < chromosomes.length; c++) {
HashSet<Probe> passedProbes = new HashSet<Probe>();
progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
ProbeGroupGenerator gen = null;
if (windowType == DISTANCE_WINDOW) {
gen = new ProbeWindowGenerator(probes, windowSize);
} else if (windowType == CONSECUTIVE_WINDOW) {
gen = new ConsecutiveProbeGenerator(probes, windowSize);
} else if (windowType == FEATURE_WINDOW) {
gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
} else {
progressExceptionReceived(new SeqMonkException("No window type known with number " + windowType));
return;
}
while (true) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
Probe[] theseProbes = gen.nextSet();
if (theseProbes == null) {
break;
}
int count = 0;
for (int s = 0; s < stores.length; s++) {
double totalValue = 0;
for (int i = 0; i < theseProbes.length; i++) {
// Get the values for the probes in this set
try {
totalValue += stores[s].getValueForProbe(theseProbes[i]);
} catch (SeqMonkException e) {
}
}
totalValue /= theseProbes.length;
// Now we have the value we need to know if it passes the test
if (upperLimit != null)
if (totalValue > upperLimit.doubleValue())
continue;
if (lowerLimit != null)
if (totalValue < lowerLimit.doubleValue())
continue;
// This one passes, we can add it to the count
++count;
}
// probe to the probe set.
switch(limitType) {
case EXACTLY:
if (count == storesLimit)
for (int i = 0; i < theseProbes.length; i++) {
if (passedProbes.contains(theseProbes[i]))
continue;
newList.addProbe(theseProbes[i], null);
passedProbes.add(theseProbes[i]);
}
break;
case AT_LEAST:
if (count >= storesLimit)
for (int i = 0; i < theseProbes.length; i++) {
if (passedProbes.contains(theseProbes[i]))
continue;
newList.addProbe(theseProbes[i], null);
passedProbes.add(theseProbes[i]);
}
break;
case NO_MORE_THAN:
if (count <= storesLimit)
for (int i = 0; i < theseProbes.length; i++) {
if (passedProbes.contains(theseProbes[i]))
continue;
newList.addProbe(theseProbes[i], null);
passedProbes.add(theseProbes[i]);
}
break;
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class BinomialFilterForRev method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
boolean aboveOnly = false;
boolean belowOnly = false;
if (options.directionBox.getSelectedItem().equals("Above"))
aboveOnly = true;
else if (options.directionBox.getSelectedItem().equals("Below"))
belowOnly = true;
if (options.stringencyField.getText().length() == 0) {
stringency = 0.05;
} else {
stringency = Double.parseDouble(options.stringencyField.getText());
}
if (options.minObservationsField.getText().length() == 0) {
minObservations = 10;
} else {
minObservations = Integer.parseInt(options.minObservationsField.getText());
}
if (options.minDifferenceField.getText().length() == 0) {
minPercentShift = 10;
} else {
minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
}
useCurrentQuant = options.useCurrentQuantBox.isSelected();
applyMultipleTestingCorrection = options.multiTestBox.isSelected();
ProbeList newList;
if (applyMultipleTestingCorrection) {
newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
} else {
newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
}
Probe[] probes = startingList.getAllProbes();
// We need to create a set of mean end methylation values for all starting values
// We found to the nearest percent so we'll end up with a set of 101 values (0-100)
// which are the expected end points
double[] expectedEnds = calculateEnds(probes);
// They cancelled whilst calculating.
if (expectedEnds == null)
return;
for (int i = 0; i < expectedEnds.length; i++) {
System.err.println("" + i + "\t" + expectedEnds[i]);
}
// This is where we'll store any hits
Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
BinomialTest bt = new BinomialTest();
AlternativeHypothesis hypothesis = AlternativeHypothesis.TWO_SIDED;
if (aboveOnly)
hypothesis = AlternativeHypothesis.GREATER_THAN;
if (belowOnly)
hypothesis = AlternativeHypothesis.LESS_THAN;
for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
long[] reads = fromStore.getReadsForProbe(probes[p]);
int forCount = 0;
int revCount = 0;
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forCount;
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++revCount;
}
}
if (forCount + revCount < minObservations)
continue;
int fromPercent = Math.round((forCount * 100f) / (forCount + revCount));
// We need to calculate the confidence range for the from reads and work
// out the most pessimistic value we could take as a starting value
WilsonScoreInterval wi = new WilsonScoreInterval();
ConfidenceInterval ci = wi.createInterval(forCount + revCount, forCount, 1 - stringency);
// System.err.println("From percent="+fromPercent+" meth="+forCount+" unmeth="+revCount+" sig="+stringency+" ci="+ci.getLowerBound()*100+" - "+ci.getUpperBound()*100);
reads = toStore.getReadsForProbe(probes[p]);
forCount = 0;
revCount = 0;
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forCount;
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++revCount;
}
}
if (forCount + revCount < minObservations)
continue;
float toPercent = (forCount * 100f) / (forCount + revCount);
// System.err.println("Observed toPercent is "+toPercent+ "from meth="+forCount+" unmeth="+revCount+" and true predicted is "+expectedEnds[Math.round(toPercent)]);
// Find the most pessimistic fromPercent such that the expected toPercent is as close
// to the observed value based on the confidence interval we calculated before.
double worseCaseExpectedPercent = 0;
double smallestTheoreticalToActualDiff = 100;
// Just taking the abs diff can still leave us with a closest value which is still
// quite far from where we are. We therefore also check if our confidence interval
// gives us a potential value range which spans the actual value, and if it does we
// fail it without even running the test.
boolean seenLower = false;
boolean seenHigher = false;
for (int m = Math.max((int) Math.floor(ci.getLowerBound() * 100), 0); m <= Math.min((int) Math.ceil(ci.getUpperBound() * 100), 100); m++) {
double expectedPercent = expectedEnds[m];
double diff = expectedPercent - toPercent;
if (diff <= 0)
seenLower = true;
if (diff >= 0)
seenHigher = true;
if (Math.abs(diff) < smallestTheoreticalToActualDiff) {
worseCaseExpectedPercent = expectedPercent;
smallestTheoreticalToActualDiff = Math.abs(diff);
}
}
// Sanity check
if (smallestTheoreticalToActualDiff > Math.abs((toPercent - expectedEnds[Math.round(fromPercent)]))) {
throw new IllegalStateException("Can't have a worst case which is better than the actual");
}
if (Math.abs(toPercent - worseCaseExpectedPercent) < minPercentShift)
continue;
// If they want to use the current quantitation as well then we can do that calculation now.
if (useCurrentQuant) {
try {
if (Math.abs(toStore.getValueForProbe(probes[p]) - expectedEnds[Math.round(fromStore.getValueForProbe(probes[p]))]) < minPercentShift)
continue;
} catch (SeqMonkException sme) {
throw new IllegalStateException(sme);
}
}
// Check the directionality
if (aboveOnly && worseCaseExpectedPercent - toPercent > 0)
continue;
if (belowOnly && worseCaseExpectedPercent - toPercent < 0)
continue;
// Now perform the Binomial test.
double pValue = bt.binomialTest(forCount + revCount, forCount, worseCaseExpectedPercent / 100d, hypothesis);
// Our confidence range spanned the actual value we had so we can't be significant
if (seenLower && seenHigher)
pValue = 0.5;
// System.err.println("P value is "+pValue);
// Store this as a potential hit (after correcting p-values later)
hits.add(new ProbeTTestValue(probes[p], pValue));
}
// Now we can correct the p-values if we need to
ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
if (applyMultipleTestingCorrection) {
// System.err.println("Correcting for "+rawHits.length+" tests");
BenjHochFDR.calculateQValues(rawHits);
}
for (int h = 0; h < rawHits.length; h++) {
if (applyMultipleTestingCorrection) {
if (rawHits[h].q < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
}
} else {
if (rawHits[h].p < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class ChiSquareFilterForRev method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
if (options.stringencyField.getText().length() == 0) {
stringency = 0.05;
} else {
stringency = Double.parseDouble(options.stringencyField.getText());
}
if (options.minObservationsField.getText().length() == 0) {
minObservations = 10;
} else {
minObservations = Integer.parseInt(options.minObservationsField.getText());
}
if (options.minDifferenceField.getText().length() == 0) {
minPercentShift = 10;
} else {
minPercentShift = Integer.parseInt(options.minDifferenceField.getText());
}
applyMultipleTestingCorrection = options.multiTestBox.isSelected();
resample = options.resampleBox.isSelected();
Probe[] probes = startingList.getAllProbes();
if (resample) {
// Do a sanity check that the current quantitation actually looks like
// percentage values.
progressUpdated("Checking current quantitations", 0, 1);
for (int p = 0; p < probes.length; p++) {
if (cancel) {
progressCancelled();
return;
}
try {
for (int d = 0; d < stores.length; d++) {
float value = stores[d].getValueForProbe(probes[p]);
// NaN is OK
if (Float.isNaN(value))
continue;
if (value < 0 || value > 100) {
progressExceptionReceived(new SeqMonkException("For resampling quantitations must be between 0 and 100. Probe " + probes[p].name() + " had value " + value + " in " + stores[d].name()));
return;
}
}
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
}
ProbeList newList;
if (applyMultipleTestingCorrection) {
newList = new ProbeList(startingList, "Filtered Probes", "", "Q-value");
} else {
newList = new ProbeList(startingList, "Filtered Probes", "", "P-value");
}
int[][] forRevCounts = new int[stores.length][2];
// This is where we'll store any hits
Vector<ProbeTTestValue> hits = new Vector<ProbeTTestValue>();
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 100 == 0) {
progressUpdated("Processed " + p + " probes", p, probes.length);
}
if (cancel) {
cancel = false;
progressCancelled();
return;
}
// For each dataset make up a list of forward and reverse probes under this probe
for (int d = 0; d < stores.length; d++) {
long[] reads = stores[d].getReadsForProbe(probes[p]);
forRevCounts[d][0] = 0;
forRevCounts[d][1] = 0;
if (resample) {
try {
float value = stores[d].getValueForProbe(probes[p]);
// We redistribute the total count based on the percentage value
// in the quantitation
int methCount = Math.round((reads.length * value) / 100);
int unmethCount = reads.length - methCount;
forRevCounts[d][0] = methCount;
forRevCounts[d][1] = unmethCount;
} catch (SeqMonkException sme) {
continue PROBE;
}
} else {
for (int r = 0; r < reads.length; r++) {
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++forRevCounts[d][0];
} else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
++forRevCounts[d][1];
}
}
}
}
// See if we have enough counts and difference to go on with this
double minPercent = 0;
double maxPercent = 0;
for (int d = 0; d < stores.length; d++) {
if (forRevCounts[d][0] + forRevCounts[d][1] < minObservations) {
continue PROBE;
}
double percent = (((double) forRevCounts[d][0]) / (forRevCounts[d][0] + forRevCounts[d][1])) * 100;
if (d == 0 || percent < minPercent)
minPercent = percent;
if (d == 0 || percent > maxPercent)
maxPercent = percent;
}
if (maxPercent - minPercent < minPercentShift)
continue PROBE;
// Now perform the Chi-Square test.
double pValue = ChiSquareTest.chiSquarePvalue(forRevCounts);
// Store this as a potential hit (after correcting p-values later)
hits.add(new ProbeTTestValue(probes[p], pValue));
}
// Now we can correct the p-values if we need to
ProbeTTestValue[] rawHits = hits.toArray(new ProbeTTestValue[0]);
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(rawHits);
}
for (int h = 0; h < rawHits.length; h++) {
if (applyMultipleTestingCorrection) {
if (rawHits[h].q < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].q);
}
} else {
if (rawHits[h].p < stringency) {
newList.addProbe(rawHits[h].probe, (float) rawHits[h].p);
}
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class CorrelationCluster method minRValue.
public double minRValue(Probe p, double cutoff) {
// We test this probe against all of the probes in our
// set and if it matches against all of them we keep it
double minR = 1;
try {
for (int s = 0; s < stores.length; s++) {
values1[s] = stores[s].getValueForProbe(p);
}
Enumeration<Probe> ep = probes.elements();
while (ep.hasMoreElements()) {
Probe p2 = ep.nextElement();
for (int s = 0; s < stores.length; s++) {
values2[s] = stores[s].getValueForProbe(p2);
}
double r = PearsonCorrelation.calculateCorrelation(values1, values2);
if (r < minR) {
minR = r;
}
// we're not keeping it anyway
if (minR < cutoff) {
return minR;
}
}
return minR;
} catch (SeqMonkException e) {
return 0;
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class CorrelationFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
Probe[] probes = startingList.getAllProbes();
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "R-value");
double[] referenceProfile = null;
try {
referenceProfile = getReferneceProfile();
} catch (SeqMonkException ex) {
progressExceptionReceived(ex);
}
double[] currentProfile = new double[stores.length];
for (int p = 0; p < probes.length; p++) {
progressUpdated(p, probes.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
try {
for (int s = 0; s < stores.length; s++) {
currentProfile[s] = stores[s].getValueForProbe(probes[p]);
}
float r = PearsonCorrelation.calculateCorrelation(referenceProfile, currentProfile);
if (correlationCutoff > 0) {
if (r >= correlationCutoff) {
newList.addProbe(probes[p], r);
}
} else {
if (r <= correlationCutoff) {
newList.addProbe(probes[p], r);
}
}
} catch (SeqMonkException ex) {
continue;
}
}
filterFinished(newList);
}
Aggregations