use of uk.ac.babraham.SeqMonk.R.RScriptRunner in project SeqMonk by s-andrews.
the class EdgeRFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
// Say which p value column we're filtering on
if (multiTest) {
template.setValue("CORRECTED", "FDR");
} else {
template.setValue("CORRECTED", "PValue");
}
StringBuffer sb = new StringBuffer();
for (int i = 0; i < fromStores.length; i++) {
if (i > 0)
sb.append(",");
sb.append("1");
}
for (int i = 0; i < toStores.length; i++) {
sb.append(",");
sb.append("2");
}
template.setValue("CONDITIONS", sb.toString());
template.setValue("PVALUE", "" + cutoff);
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File countFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(countFile);
sb = new StringBuffer();
sb.append("probe");
for (int i = 0; i < fromStores.length; i++) {
sb.append("\t");
sb.append("from");
sb.append(i);
}
for (int i = 0; i < toStores.length; i++) {
sb.append("\t");
sb.append("to");
sb.append(i);
}
pr.println(sb.toString());
progressUpdated("Writing count data", 0, 1);
Probe[] probes = startingList.getAllProbes();
float value;
for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing count data", p, probes.length);
}
sb = new StringBuffer();
sb.append(p);
for (int i = 0; i < fromStores.length; i++) {
sb.append("\t");
value = fromStores[i].getValueForProbe(probes[p]);
if (value != (int) value) {
progressExceptionReceived(new IllegalArgumentException("Inputs to the EdgeR filter MUST be raw, incorrected counts, not things like " + value));
pr.close();
return;
}
sb.append(value);
}
for (int i = 0; i < toStores.length; i++) {
sb.append("\t");
value = toStores[i].getValueForProbe(probes[p]);
if (value != (int) value) {
progressExceptionReceived(new IllegalArgumentException("Inputs to the EdgeR filter MUST be raw, incorrected counts, not things like " + value));
pr.close();
return;
}
sb.append(value);
}
pr.println(sb.toString());
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
newList = new ProbeList(startingList, "", "", "FDR");
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
int probeIndex = Integer.parseInt(sections[0]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
// filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.R.RScriptRunner in project SeqMonk by s-andrews.
the class EdgeRForRevFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
if (resample) {
// We need to check that the data stores are quantitated
for (int i = 0; i < fromStores.length; i++) {
if (!fromStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = fromStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
for (int i = 0; i < toStores.length; i++) {
if (!toStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = toStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
}
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_for_rev_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("DIFFERENCE", "" + absDiffCutoff);
template.setValue("PVALUE", "" + pValueCutoff);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
template.setValue("CORRECTED", "FDR");
} else {
template.setValue("MULTITEST", "FALSE");
template.setValue("CORRECTED", "PValue");
}
// For the conditions we just repeat "from" and "to" the number of times they occur in the
// samples (twice for each sample since we have both meth and unmeth)
StringBuffer conditions = new StringBuffer();
for (int i = 0; i < fromStores.length; i++) {
if (i > 0)
conditions.append(",");
conditions.append("\"from\",\"from\"");
}
for (int i = 0; i < toStores.length; i++) {
conditions.append(",\"to\",\"to\"");
}
template.setValue("CONDITIONS", conditions.toString());
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File outFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(outFile);
pr.print("id");
for (int i = 0; i < fromStores.length; i++) {
pr.print("\tfrom_" + i + "_meth\tfrom_" + i + "_unmeth");
}
for (int i = 0; i < toStores.length; i++) {
pr.print("\tto_" + i + "_meth\tto_" + i + "_unmeth");
}
pr.print("\n");
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + probes[p].chromosome().name(), p, probes.length);
}
int[] fromMethCounts = new int[fromStores.length];
int[] fromUnmethCounts = new int[fromStores.length];
int[] toMethCounts = new int[toStores.length];
int[] toUnmethCounts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
long[] reads = fromStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = fromStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
fromMethCounts[i] = methCount;
fromUnmethCounts[i] = totalCount - methCount;
}
for (int i = 0; i < toStores.length; i++) {
long[] reads = toStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = toStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
toMethCounts[i] = methCount;
toUnmethCounts[i] = totalCount - methCount;
}
// Check to see we meet the requirements for the min amount of information
// and the min diff.
int totalFromMeth = 0;
int totalFrom = 0;
int totalToMeth = 0;
int totalTo = 0;
int validFrom = 0;
for (int i = 0; i < fromStores.length; i++) {
totalFromMeth += fromMethCounts[i];
totalFrom += fromMethCounts[i];
totalFrom += fromUnmethCounts[i];
if (fromMethCounts[i] > 0 || fromUnmethCounts[i] > 0) {
++validFrom;
}
}
int validTo = 0;
for (int i = 0; i < toStores.length; i++) {
totalToMeth += toMethCounts[i];
totalTo += toMethCounts[i];
totalTo += toUnmethCounts[i];
if (toMethCounts[i] > 0 || toUnmethCounts[i] > 0) {
++validTo;
}
}
// EdgeR only requires 2 valid observations
if (validFrom < 2 || validTo < 2) {
// We don't have enough data to measure this one
continue;
}
if (Math.abs((totalFromMeth * 100f / totalFrom) - (totalToMeth * 100f / totalTo)) < absDiffCutoff) {
continue;
}
float[] fromPercentages = new float[validFrom];
float[] toPercentages = new float[validTo];
int lastFromIndex = 0;
int lastToIndex = 0;
for (int i = 0; i < fromMethCounts.length; i++) {
if (fromMethCounts[i] + fromUnmethCounts[i] == 0)
continue;
fromPercentages[lastFromIndex] = fromMethCounts[i] * 100f / (fromMethCounts[i] + fromUnmethCounts[i]);
++lastFromIndex;
}
for (int i = 0; i < toMethCounts.length; i++) {
if (toMethCounts[i] + toUnmethCounts[i] == 0)
continue;
toPercentages[lastToIndex] = toMethCounts[i] * 100f / (toMethCounts[i] + toUnmethCounts[i]);
++lastToIndex;
}
if (Math.abs(SimpleStats.mean(fromPercentages) - SimpleStats.mean(toPercentages)) < absDiffCutoff) {
continue;
}
// If we get here then we're OK to use this probe so we print out its data. We put all of the
// data for one probe onto a single line. The first value is the index of the probe. The
// rest are pairs of meth/unmeth values for the from samples then the to samples.
pr.print(p);
for (int i = 0; i < fromMethCounts.length; i++) {
pr.print("\t" + fromMethCounts[i] + "\t" + fromUnmethCounts[i]);
}
for (int i = 0; i < toMethCounts.length; i++) {
pr.print("\t" + toMethCounts[i] + "\t" + toUnmethCounts[i]);
}
pr.print("\n");
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.R.RScriptRunner in project SeqMonk by s-andrews.
the class LogisticRegressionSplicingFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
probes = filterProbesByCount(probes, fromStores, toStores);
progressUpdated("Pairing Probes", 0, 1);
// Make pairs of probes to test
ProbePair[] pairs = makeProbePairs(probes, fromStores, toStores);
System.err.println("Found " + pairs.length + " pairs to test");
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("PVALUE", "" + pValueCutoff);
template.setValue("CORRECTCOUNT", "" + pairs.length);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
} else {
template.setValue("MULTITEST", "FALSE");
}
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
// Sort these so we can get probes from the same chromosome together
Arrays.sort(probes);
pr = null;
String lastChr = "";
for (int p = 0; p < pairs.length; p++) {
if (!pairs[p].probe1.chromosome().name().equals(lastChr)) {
if (pr != null)
pr.close();
File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + pairs[p].probe1.chromosome().name() + ".txt");
pr = new PrintWriter(outFile);
lastChr = pairs[p].probe1.chromosome().name();
pr.println("id\tgroup\treplicate\tstate\tcount");
}
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + lastChr, p, probes.length);
}
int[] fromProbe1Counts = new int[fromStores.length];
int[] fromProbe2Counts = new int[fromStores.length];
int[] toProbe1Counts = new int[toStores.length];
int[] toProbe2Counts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
fromProbe1Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe1);
fromProbe2Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < toStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
toProbe1Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe1);
toProbe2Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < fromProbe1Counts.length; i++) {
pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromProbe1Counts[i]);
pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromProbe2Counts[i]);
}
for (int i = 0; i < toProbe1Counts.length; i++) {
pr.println(p + "\tto\tto" + i + "\tmeth\t" + toProbe1Counts[i]);
pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toProbe2Counts[i]);
}
}
if (pr != null)
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
// In case the same probe is found multiple times we'll cache the p-values
// we see and report the best one.
HashMap<Probe, Float> cachedHits = new HashMap<Probe, Float>();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
if (!cachedHits.containsKey(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (!cachedHits.containsKey(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
// See if the pvalue we've got is better than the one we're storing
if (pValue < cachedHits.get(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (pValue < cachedHits.get(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
}
br.close();
for (Probe probeHit : cachedHits.keySet()) {
newList.addProbe(probeHit, cachedHits.get(probeHit));
}
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.R.RScriptRunner in project SeqMonk by s-andrews.
the class PCADataCalculator method runPCA.
private void runPCA() {
File tempDir;
try {
pd.progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
// System.err.println("Temp dir is "+tempDir.getAbsolutePath());
pd.progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Displays/PCAPlot/pca_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File countFile = new File(tempDir.getAbsoluteFile() + "/data.txt");
pr = new PrintWriter(countFile);
StringBuffer sb;
pd.progressUpdated("Writing count data", 0, 1);
for (int p = 0; p < usedProbes.length; p++) {
sb = new StringBuffer();
for (int d = 0; d < stores.length; d++) {
if (d > 0)
sb.append("\t");
sb.append(stores[d].getValueForProbe(usedProbes[p]));
}
pr.println(sb.toString());
}
pr.close();
pd.progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
pd.progressCancelled();
return;
}
if (listener.exceptionReceived()) {
pd.progressExceptionReceived(listener.exception());
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse and store the results
File varianceFile = new File(tempDir.getAbsolutePath() + "/variance_data.txt");
BufferedReader br = new BufferedReader(new FileReader(varianceFile));
String line = br.readLine();
FloatVector varianceVector = new FloatVector();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
varianceVector.add(Float.parseFloat(sections[1]));
}
br.close();
variances = varianceVector.toArray();
// We want to convert the variance values into percentages
// since that's what everyone wants for PCA.
float varianceTotal = 0;
for (int v = 0; v < variances.length; v++) {
varianceTotal += variances[v];
}
// Now convert to percentages
for (int v = 0; v < variances.length; v++) {
variances[v] = (variances[v] / varianceTotal) * 100;
}
// We don't want to keep more than 10PCs
if (variances.length > 10) {
float[] shortVariances = new float[10];
for (int i = 0; i < shortVariances.length; i++) {
shortVariances[i] = variances[i];
}
variances = shortVariances;
}
File pcaFile = new File(tempDir.getAbsolutePath() + "/pca_data.txt");
br = new BufferedReader(new FileReader(pcaFile));
line = br.readLine();
pcaResults = new float[stores.length][variances.length];
int storeIndex = 0;
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
for (int i = 0; i < variances.length; i++) {
pcaResults[storeIndex][i] = Float.parseFloat(sections[i + 1]);
}
++storeIndex;
}
br.close();
File weightsFile = new File(tempDir.getAbsolutePath() + "/pca_weights.txt");
br = new BufferedReader(new FileReader(weightsFile));
line = br.readLine();
pcaRotations = new float[usedProbes.length][variances.length];
int probeIndex = 0;
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
for (int i = 0; i < variances.length; i++) {
// We multiply by 1000 to get the values on a vaguely
// sane scale
pcaRotations[probeIndex][i] = Float.parseFloat(sections[i + 1]) * 1000;
}
++probeIndex;
}
br.close();
runner.cleanUp();
} catch (Exception e) {
pd.progressExceptionReceived(e);
return;
}
pd.progressComplete("pca_analysis", this);
// TODO: Open the graph of variances and PCA results
new PCAScatterPlotDialog(this);
new PCAVarianceDialog(this);
}
Aggregations