use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class LimmaFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
// System.err.println("Temp dir is "+tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LimmaFilter/limma_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
// Say which p value column we're filtering on
if (multiTest) {
template.setValue("CORRECTED", "adj.P.Val");
} else {
template.setValue("CORRECTED", "P.Value");
}
StringBuffer sb = new StringBuffer();
for (int i = 0; i < fromStores.length; i++) {
if (i > 0)
sb.append(",");
sb.append("\"from\"");
}
for (int i = 0; i < toStores.length; i++) {
sb.append(",");
sb.append("\"to\"");
}
template.setValue("CONDITIONS", sb.toString());
template.setValue("PVALUE", "" + cutoff);
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File countFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(countFile);
sb = new StringBuffer();
sb.append("probe");
for (int i = 0; i < fromStores.length; i++) {
sb.append("\t");
sb.append("from");
sb.append(i);
}
for (int i = 0; i < toStores.length; i++) {
sb.append("\t");
sb.append("to");
sb.append(i);
}
pr.println(sb.toString());
progressUpdated("Writing count data", 0, 1);
Probe[] probes = startingList.getAllProbes();
float value;
for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing count data", p, probes.length);
}
sb = new StringBuffer();
sb.append(p);
for (int i = 0; i < fromStores.length; i++) {
sb.append("\t");
value = fromStores[i].getValueForProbe(probes[p]);
sb.append(value);
}
for (int i = 0; i < toStores.length; i++) {
sb.append("\t");
value = toStores[i].getValueForProbe(probes[p]);
sb.append(value);
}
pr.println(sb.toString());
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(listener.exception());
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
newList = new ProbeList(startingList, "", "", "FDR");
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
int probeIndex = Integer.parseInt(sections[0]);
float pValue;
if (multiTest) {
pValue = Float.parseFloat(sections[sections.length - 2]);
} else {
pValue = Float.parseFloat(sections[sections.length - 3]);
}
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
// TODO: Show log of R session?
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
// filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class LogisticRegressionFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
if (minObservations < 3)
minObservations = 3;
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
if (resample) {
// We need to check that the data stores are quantitated
for (int i = 0; i < fromStores.length; i++) {
if (!fromStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = fromStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
for (int i = 0; i < toStores.length; i++) {
if (!toStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = toStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
}
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
// Write the count data
// Sort these so we can get probes from the same chromosome together
Arrays.sort(probes);
PrintWriter pr = null;
String lastChr = "";
// Rather than not testing probes based on their absolute difference
// we should just post-filter them. The easiest way to do this will
// be to not test (as we do now) but explicity pass in the number of
// tests we should have performed to the multiple testing correction.
int numberOfTestsToCorrectBy = 0;
PROBE: for (int p = 0; p < probes.length; p++) {
if (!probes[p].chromosome().name().equals(lastChr)) {
if (pr != null)
pr.close();
File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + probes[p].chromosome().name() + ".txt");
pr = new PrintWriter(outFile);
lastChr = probes[p].chromosome().name();
pr.println("id\tgroup\treplicate\tstate\tcount");
}
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + lastChr, p, probes.length);
}
int[] fromMethCounts = new int[fromStores.length];
int[] fromUnmethCounts = new int[fromStores.length];
int[] toMethCounts = new int[toStores.length];
int[] toUnmethCounts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
long[] reads = fromStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = fromStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
fromMethCounts[i] = methCount;
fromUnmethCounts[i] = totalCount - methCount;
}
for (int i = 0; i < toStores.length; i++) {
long[] reads = toStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = toStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
toMethCounts[i] = methCount;
toUnmethCounts[i] = totalCount - methCount;
}
// Check to see we meet the requirements for the min amount of information
// and the min diff.
int totalFromMeth = 0;
int totalFrom = 0;
int totalToMeth = 0;
int totalTo = 0;
int validFrom = 0;
for (int i = 0; i < fromStores.length; i++) {
totalFromMeth += fromMethCounts[i];
totalFrom += fromMethCounts[i];
totalFrom += fromUnmethCounts[i];
if (fromMethCounts[i] + fromUnmethCounts[i] >= minObservations) {
++validFrom;
}
}
int validTo = 0;
for (int i = 0; i < toStores.length; i++) {
totalToMeth += toMethCounts[i];
totalTo += toMethCounts[i];
totalTo += toUnmethCounts[i];
if (toMethCounts[i] + toUnmethCounts[i] >= minObservations) {
++validTo;
}
}
// have enough data in all stores to go ahead and do the test.
if (validFrom < fromStores.length || validTo < toStores.length) {
// We don't have enough data to measure this one
continue;
}
// At this point we have to count this probe as valid for the
// purposes of multiple testing correction
++numberOfTestsToCorrectBy;
float[] fromPercentages = new float[validFrom];
float[] toPercentages = new float[validTo];
int lastFromIndex = 0;
int lastToIndex = 0;
for (int i = 0; i < fromMethCounts.length; i++) {
if (fromMethCounts[i] + fromUnmethCounts[i] == 0)
continue;
fromPercentages[lastFromIndex] = fromMethCounts[i] * 100f / (fromMethCounts[i] + fromUnmethCounts[i]);
++lastFromIndex;
}
for (int i = 0; i < toMethCounts.length; i++) {
if (toMethCounts[i] + toUnmethCounts[i] == 0)
continue;
toPercentages[lastToIndex] = toMethCounts[i] * 100f / (toMethCounts[i] + toUnmethCounts[i]);
++lastToIndex;
}
for (int i = 0; i < fromMethCounts.length; i++) {
pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromMethCounts[i]);
pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromUnmethCounts[i]);
}
for (int i = 0; i < toMethCounts.length; i++) {
pr.println(p + "\tto\tto" + i + "\tmeth\t" + toMethCounts[i]);
pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toUnmethCounts[i]);
}
}
pr.close();
// Sanity check to make sure we have something to work with.
if (numberOfTestsToCorrectBy == 0) {
progressExceptionReceived(new IllegalStateException("No probes had enough data to test."));
}
// Now we can complete the template
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("CORRECTCOUNT", "" + numberOfTestsToCorrectBy);
template.setValue("PVALUE", "" + pValueCutoff);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
} else {
template.setValue("MULTITEST", "FALSE");
}
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ManualCorrelationFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
profiles = new double[optionsPanel.graphs.length][];
for (int g = 0; g < optionsPanel.graphs.length; g++) {
profiles[g] = optionsPanel.graphs[g].profile();
}
Probe[] probes = startingList.getAllProbes();
ProbeList parentList = null;
ProbeList[] newLists = new ProbeList[profiles.length];
if (newLists.length > 1) {
parentList = new ProbeList(startingList, "Filtered Probes", "", null);
for (int p = 0; p < newLists.length; p++) {
newLists[p] = new ProbeList(parentList, listName() + " " + (p + 1), listDescription(), "R-value");
}
} else {
// We won't have a parent list
newLists[0] = new ProbeList(startingList, listName(), listDescription(), "R-value");
}
double[] currentProfile = new double[stores.length];
for (int p = 0; p < probes.length; p++) {
progressUpdated(p, probes.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
try {
for (int s = 0; s < stores.length; s++) {
currentProfile[s] = stores[s].getValueForProbe(probes[p]);
}
float bestR = 0;
int bestRIndex = -1;
for (int pr = 0; pr < profiles.length; pr++) {
float r = PearsonCorrelation.calculateCorrelation(profiles[pr], currentProfile);
if (correlationCutoff > 0) {
if (r >= correlationCutoff && r > bestR) {
bestR = r;
bestRIndex = pr;
}
} else {
if (r <= correlationCutoff && r < bestR) {
bestR = r;
bestRIndex = pr;
}
}
}
if (bestRIndex >= 0) {
newLists[bestRIndex].addProbe(probes[p], bestR);
if (parentList != null) {
parentList.addProbe(probes[p], null);
}
}
} catch (SeqMonkException ex) {
continue;
}
}
// If we only have one cluster then we just return that list
if (newLists.length == 1) {
filterFinished(newLists[0]);
} else {
filterFinished(parentList);
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class DESeqFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[][] storeGroups = new DataStore[replicateSets.length][];
for (int r = 0; r < replicateSets.length; r++) {
storeGroups[r] = replicateSets[r].dataStores();
}
File tempDir;
try {
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
// System.err.println("Temp dir is "+tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/DESeqFilter/deseq_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
// Say which p value column we're filtering on
if (multiTest) {
template.setValue("CORRECTED", "padj");
} else {
template.setValue("CORRECTED", "pvalue");
}
if (independentFiltering) {
template.setValue("INDEPENDENT", "TRUE");
} else {
template.setValue("INDEPENDENT", "FALSE");
}
StringBuffer sb = new StringBuffer();
for (int s = 0; s < storeGroups.length; s++) {
for (int i = 0; i < storeGroups[s].length; i++) {
if (!(s == 0 && i == 0)) {
sb.append(",");
}
sb.append("\"group" + s + "\"");
}
}
template.setValue("CONDITIONS", sb.toString());
template.setValue("PVALUE", "" + cutoff);
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File countFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(countFile);
sb = new StringBuffer();
sb.append("probe");
for (int s = 0; s < storeGroups.length; s++) {
for (int i = 0; i < storeGroups[s].length; i++) {
sb.append("\t");
sb.append("group");
sb.append(s);
sb.append("_");
sb.append(i);
}
}
pr.println(sb.toString());
progressUpdated("Writing count data", 0, 1);
Probe[] probes = startingList.getAllProbes();
float value;
for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing count data", p, probes.length);
}
sb = new StringBuffer();
sb.append(p);
for (int s = 0; s < storeGroups.length; s++) {
for (int i = 0; i < storeGroups[s].length; i++) {
sb.append("\t");
value = storeGroups[s][i].getValueForProbe(probes[p]);
if (value != (int) value) {
progressExceptionReceived(new IllegalArgumentException("Inputs to the DESeq filter MUST be raw, incorrected counts, not things like " + value));
pr.close();
return;
}
sb.append(value);
}
}
pr.println(sb.toString());
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(listener.exception());
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
newList = new ProbeList(startingList, "", "", "FDR");
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
int probeIndex = Integer.parseInt(sections[0]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
// TODO: Show log of R session?
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
// filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class DifferencesFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
Probe[] probes = startingList.getAllProbes();
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Difference");
HashSet<DataStore> allStoresSet = new HashSet<DataStore>();
for (int i = 0; i < fromStores.length; i++) {
allStoresSet.add(fromStores[i]);
}
for (int i = 0; i < toStores.length; i++) {
allStoresSet.add(toStores[i]);
}
DataStore[] allStores = allStoresSet.toArray(new DataStore[0]);
PROBE: for (int p = 0; p < probes.length; p++) {
progressUpdated(p, probes.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
for (int s = 0; s < allStores.length; s++) {
if (!allStores[s].hasValueForProbe(probes[p]))
continue PROBE;
try {
if (Float.isNaN(allStores[s].getValueForProbe(probes[p])))
continue PROBE;
} catch (SeqMonkException sme) {
continue;
}
}
int count = 0;
float d = 0;
for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
if (fromStores[fromIndex] == toStores[toIndex])
continue;
switch(differenceType) {
case AVERAGE:
d += getDifferenceValue(toStores[toIndex], fromStores[fromIndex], probes[p]);
count++;
break;
case MAXIMUM:
float dt1 = getDifferenceValue(toStores[toIndex], fromStores[fromIndex], probes[p]);
if (count == 0 || dt1 > d)
d = dt1;
count++;
break;
case MINIMUM:
float dt2 = getDifferenceValue(toStores[toIndex], fromStores[fromIndex], probes[p]);
if (count == 0 || dt2 < d)
d = dt2;
count++;
break;
default:
progressExceptionReceived(new SeqMonkException("Unknown difference type " + differenceType));
}
}
}
if (differenceType == AVERAGE) {
if (count > 0) {
d /= count;
}
}
// Now we have the value we need to know if it passes the test
if (upperLimit != null)
if (d > upperLimit) {
continue;
}
if (lowerLimit != null)
if (d < lowerLimit) {
continue;
}
newList.addProbe(probes[p], new Float(d));
}
filterFinished(newList);
}
Aggregations