use of uk.ac.babraham.SeqMonk.Utilities.FloatVector in project SeqMonk by s-andrews.
the class PCADataCalculator method runPCA.
private void runPCA() {
File tempDir;
try {
pd.progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
// System.err.println("Temp dir is "+tempDir.getAbsolutePath());
pd.progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Displays/PCAPlot/pca_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File countFile = new File(tempDir.getAbsoluteFile() + "/data.txt");
pr = new PrintWriter(countFile);
StringBuffer sb;
pd.progressUpdated("Writing count data", 0, 1);
for (int p = 0; p < usedProbes.length; p++) {
sb = new StringBuffer();
for (int d = 0; d < stores.length; d++) {
if (d > 0)
sb.append("\t");
sb.append(stores[d].getValueForProbe(usedProbes[p]));
}
pr.println(sb.toString());
}
pr.close();
pd.progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
pd.progressCancelled();
return;
}
if (listener.exceptionReceived()) {
pd.progressExceptionReceived(listener.exception());
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse and store the results
File varianceFile = new File(tempDir.getAbsolutePath() + "/variance_data.txt");
BufferedReader br = new BufferedReader(new FileReader(varianceFile));
String line = br.readLine();
FloatVector varianceVector = new FloatVector();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
varianceVector.add(Float.parseFloat(sections[1]));
}
br.close();
variances = varianceVector.toArray();
// We want to convert the variance values into percentages
// since that's what everyone wants for PCA.
float varianceTotal = 0;
for (int v = 0; v < variances.length; v++) {
varianceTotal += variances[v];
}
// Now convert to percentages
for (int v = 0; v < variances.length; v++) {
variances[v] = (variances[v] / varianceTotal) * 100;
}
// We don't want to keep more than 10PCs
if (variances.length > 10) {
float[] shortVariances = new float[10];
for (int i = 0; i < shortVariances.length; i++) {
shortVariances[i] = variances[i];
}
variances = shortVariances;
}
File pcaFile = new File(tempDir.getAbsolutePath() + "/pca_data.txt");
br = new BufferedReader(new FileReader(pcaFile));
line = br.readLine();
pcaResults = new float[stores.length][variances.length];
int storeIndex = 0;
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
for (int i = 0; i < variances.length; i++) {
pcaResults[storeIndex][i] = Float.parseFloat(sections[i + 1]);
}
++storeIndex;
}
br.close();
File weightsFile = new File(tempDir.getAbsolutePath() + "/pca_weights.txt");
br = new BufferedReader(new FileReader(weightsFile));
line = br.readLine();
pcaRotations = new float[usedProbes.length][variances.length];
int probeIndex = 0;
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
for (int i = 0; i < variances.length; i++) {
// We multiply by 1000 to get the values on a vaguely
// sane scale
pcaRotations[probeIndex][i] = Float.parseFloat(sections[i + 1]) * 1000;
}
++probeIndex;
}
br.close();
runner.cleanUp();
} catch (Exception e) {
pd.progressExceptionReceived(e);
return;
}
pd.progressComplete("pca_analysis", this);
// TODO: Open the graph of variances and PCA results
new PCAScatterPlotDialog(this);
new PCAVarianceDialog(this);
}
Aggregations