use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class EdgeRForRevFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
if (resample) {
// We need to check that the data stores are quantitated
for (int i = 0; i < fromStores.length; i++) {
if (!fromStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = fromStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
for (int i = 0; i < toStores.length; i++) {
if (!toStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = toStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
}
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_for_rev_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("DIFFERENCE", "" + absDiffCutoff);
template.setValue("PVALUE", "" + pValueCutoff);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
template.setValue("CORRECTED", "FDR");
} else {
template.setValue("MULTITEST", "FALSE");
template.setValue("CORRECTED", "PValue");
}
// For the conditions we just repeat "from" and "to" the number of times they occur in the
// samples (twice for each sample since we have both meth and unmeth)
StringBuffer conditions = new StringBuffer();
for (int i = 0; i < fromStores.length; i++) {
if (i > 0)
conditions.append(",");
conditions.append("\"from\",\"from\"");
}
for (int i = 0; i < toStores.length; i++) {
conditions.append(",\"to\",\"to\"");
}
template.setValue("CONDITIONS", conditions.toString());
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File outFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(outFile);
pr.print("id");
for (int i = 0; i < fromStores.length; i++) {
pr.print("\tfrom_" + i + "_meth\tfrom_" + i + "_unmeth");
}
for (int i = 0; i < toStores.length; i++) {
pr.print("\tto_" + i + "_meth\tto_" + i + "_unmeth");
}
pr.print("\n");
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + probes[p].chromosome().name(), p, probes.length);
}
int[] fromMethCounts = new int[fromStores.length];
int[] fromUnmethCounts = new int[fromStores.length];
int[] toMethCounts = new int[toStores.length];
int[] toUnmethCounts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
long[] reads = fromStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = fromStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
fromMethCounts[i] = methCount;
fromUnmethCounts[i] = totalCount - methCount;
}
for (int i = 0; i < toStores.length; i++) {
long[] reads = toStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = toStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
toMethCounts[i] = methCount;
toUnmethCounts[i] = totalCount - methCount;
}
// Check to see we meet the requirements for the min amount of information
// and the min diff.
int totalFromMeth = 0;
int totalFrom = 0;
int totalToMeth = 0;
int totalTo = 0;
int validFrom = 0;
for (int i = 0; i < fromStores.length; i++) {
totalFromMeth += fromMethCounts[i];
totalFrom += fromMethCounts[i];
totalFrom += fromUnmethCounts[i];
if (fromMethCounts[i] > 0 || fromUnmethCounts[i] > 0) {
++validFrom;
}
}
int validTo = 0;
for (int i = 0; i < toStores.length; i++) {
totalToMeth += toMethCounts[i];
totalTo += toMethCounts[i];
totalTo += toUnmethCounts[i];
if (toMethCounts[i] > 0 || toUnmethCounts[i] > 0) {
++validTo;
}
}
// EdgeR only requires 2 valid observations
if (validFrom < 2 || validTo < 2) {
// We don't have enough data to measure this one
continue;
}
if (Math.abs((totalFromMeth * 100f / totalFrom) - (totalToMeth * 100f / totalTo)) < absDiffCutoff) {
continue;
}
float[] fromPercentages = new float[validFrom];
float[] toPercentages = new float[validTo];
int lastFromIndex = 0;
int lastToIndex = 0;
for (int i = 0; i < fromMethCounts.length; i++) {
if (fromMethCounts[i] + fromUnmethCounts[i] == 0)
continue;
fromPercentages[lastFromIndex] = fromMethCounts[i] * 100f / (fromMethCounts[i] + fromUnmethCounts[i]);
++lastFromIndex;
}
for (int i = 0; i < toMethCounts.length; i++) {
if (toMethCounts[i] + toUnmethCounts[i] == 0)
continue;
toPercentages[lastToIndex] = toMethCounts[i] * 100f / (toMethCounts[i] + toUnmethCounts[i]);
++lastToIndex;
}
if (Math.abs(SimpleStats.mean(fromPercentages) - SimpleStats.mean(toPercentages)) < absDiffCutoff) {
continue;
}
// If we get here then we're OK to use this probe so we print out its data. We put all of the
// data for one probe onto a single line. The first value is the index of the probe. The
// rest are pairs of meth/unmeth values for the from samples then the to samples.
pr.print(p);
for (int i = 0; i < fromMethCounts.length; i++) {
pr.print("\t" + fromMethCounts[i] + "\t" + fromUnmethCounts[i]);
}
for (int i = 0; i < toMethCounts.length; i++) {
pr.print("\t" + toMethCounts[i] + "\t" + toUnmethCounts[i]);
}
pr.print("\n");
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class GeneSetIntensityDifferenceFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
try {
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
calculateCustomRegression = optionsPanel.calculateCustomRegressionBox.isSelected();
if (calculateCustomRegression == false) {
customRegressionValues = null;
}
if (calculateLinearRegression) {
simpleRegression = new SimpleRegression();
}
Probe[] allProbes = startingList.getAllProbes();
// We're not allowing multiple comparisons - this is a bit of a messy workaround so it's compatible with other methods.
fromStore = fromStores[0];
toStore = toStores[0];
ArrayList<Probe> probeArrayList = new ArrayList<Probe>();
int NaNcount = 0;
// remove the invalid probes - the ones without a value
for (int i = 0; i < allProbes.length; i++) {
if ((Float.isNaN(fromStore.getValueForProbe(allProbes[i]))) || (Float.isNaN(toStore.getValueForProbe(allProbes[i])))) {
NaNcount++;
} else {
probeArrayList.add(allProbes[i]);
}
}
System.err.println("Found " + NaNcount + " probes that were invalid.");
probes = probeArrayList.toArray(new Probe[0]);
if (calculateCustomRegression == true) {
customRegressionValues = new float[2][probes.length];
}
// We'll pull the number of probes to sample from the preferences if they've changed it
Integer updatedProbesPerSet = optionsPanel.probesPerSet();
if (updatedProbesPerSet != null)
probesPerSet = updatedProbesPerSet;
// we want a set of z-scores using the local distribution.
probeZScoreLookupTable = new Hashtable<Probe, Double>();
// Put something in the progress whilst we're ordering the probe values to make the comparison.
progressUpdated("Generating background model", 0, 1);
Comparator<Integer> comp = new AverageIntensityComparator(fromStore, toStore, probes);
// We need to generate a set of probe indices that can be ordered by their average intensity
Integer[] indices = new Integer[probes.length];
for (int i = 0; i < probes.length; i++) {
indices[i] = i;
/* add the data to the linear regression object */
if (calculateLinearRegression) {
simpleRegression.addData((double) fromStore.getValueForProbe(probes[i]), (double) toStore.getValueForProbe(probes[i]));
}
}
if (calculateLinearRegression) {
System.out.println("intercept = " + simpleRegression.getIntercept() + ", slope = " + simpleRegression.getSlope());
}
Arrays.sort(indices, comp);
/* This holds the indices to get the deduplicated probe from the original list of probes */
deduplicatedIndices = new Integer[probes.length];
/* The number of probes with different values */
int dedupProbeCounter = 0;
// the probes deduplicated by value
ArrayList<Probe> deduplicatedProbes = new ArrayList<Probe>();
// populate the first one so that we have something to compare to in the loop
deduplicatedIndices[0] = 0;
deduplicatedProbes.add(probes[indices[0]]);
progressUpdated("Made 0 of 1 comparisons", 0, 1);
for (int i = 1; i < indices.length; i++) {
/* indices have been sorted, now we need to check whether adjacent pair values are identical */
if ((fromStore.getValueForProbe(probes[indices[i]]) == fromStore.getValueForProbe(probes[indices[i - 1]])) && (toStore.getValueForProbe(probes[indices[i]]) == toStore.getValueForProbe(probes[indices[i - 1]]))) {
/* If they are identical, do not add the probe to the deduplicatedProbes object, but have a reference for it so we can look up which deduplicated probe and
* therefore which distribution slice to use for the duplicated probe. */
deduplicatedIndices[i] = dedupProbeCounter;
} else {
deduplicatedProbes.add(probes[indices[i]]);
dedupProbeCounter++;
deduplicatedIndices[i] = dedupProbeCounter;
}
}
Probe[] dedupProbes = deduplicatedProbes.toArray(new Probe[0]);
// make sure we're not trying to use more probes than we've got in the analysis
if (probesPerSet > dedupProbes.length) {
probesPerSet = dedupProbes.length;
}
System.out.println("total number of probe values = " + probes.length);
System.out.println("number of deduplicated probe values = " + dedupProbes.length);
System.out.println("probesPerSet = " + probesPerSet);
// I want this to contain all the differences, then from that I'm going to get the z-scores.
double[] currentDiffSet = new double[probesPerSet];
for (int i = 0; i < indices.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (i % 1000 == 0) {
int progress = (i * 100) / indices.length;
// progress += 100*comparisonIndex;
progressUpdated("Made 0 out of 1 comparisons", progress, 100);
}
/**
* There are +1s in here because we skip over j when j == startingIndex.
*/
// We need to make up the set of differences to represent this probe
int startingIndex = deduplicatedIndices[i] - (probesPerSet / 2);
if (startingIndex < 0)
startingIndex = 0;
if (startingIndex + (probesPerSet + 1) >= dedupProbes.length)
startingIndex = dedupProbes.length - (probesPerSet + 1);
try {
for (int j = startingIndex; j < startingIndex + (probesPerSet + 1); j++) {
if (j == startingIndex) {
// Don't include the point being tested in the background model
continue;
}
double diff;
if (calculateLinearRegression == true) {
if (j > dedupProbes.length) {
System.err.println(" j is too big, it's " + j + " and dedupProbes.length = " + dedupProbes.length + ", starting index = " + startingIndex);
}
double x = fromStore.getValueForProbe(dedupProbes[j]);
double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
diff = toStore.getValueForProbe(dedupProbes[j]) - expectedY;
} else {
diff = toStore.getValueForProbe(dedupProbes[j]) - fromStore.getValueForProbe(dedupProbes[j]);
}
if (j < startingIndex) {
currentDiffSet[j - startingIndex] = diff;
} else {
currentDiffSet[(j - startingIndex) - 1] = diff;
}
}
if (calculateCustomRegression == true) {
// the average/ kind of centre line
float z = ((fromStore.getValueForProbe(probes[indices[i]]) + toStore.getValueForProbe(probes[indices[i]])) / 2);
customRegressionValues[0][indices[i]] = z - ((float) SimpleStats.mean(currentDiffSet) / 2);
customRegressionValues[1][indices[i]] = z + ((float) SimpleStats.mean(currentDiffSet) / 2);
}
double mean = 0;
// Get the difference for this point
double diff;
if (calculateLinearRegression == true) {
double x = fromStore.getValueForProbe(probes[indices[i]]);
double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
diff = toStore.getValueForProbe(probes[indices[i]]) - expectedY;
} else if (calculateCustomRegression == true) {
diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
mean = SimpleStats.mean(currentDiffSet);
} else {
diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
}
double stdev;
stdev = SimpleStats.stdev(currentDiffSet, mean);
// if there are no reads in the probe for either of the datasets, should we set the zscore to 0??
double zScore = (diff - mean) / stdev;
// modified z score
// median absolute deviation
/* double[] madArray = new double[currentDiffSet.length];
double median = SimpleStats.median(currentDiffSet);
for(int d=0; d<currentDiffSet.length; d++){
madArray[d] = Math.abs(currentDiffSet[d] - median);
}
double mad = SimpleStats.median(madArray);
zScore = (0.6745 * (diff - median))/mad;
}
*/
probeZScoreLookupTable.put(probes[indices[i]], zScore);
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
// make this an array list as we're kicking out the mapped gene sets that have zscores with variance of 0.
ArrayList<MappedGeneSetTTestValue> pValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
MappedGeneSet[] mappedGeneSets = null;
/* if we're using the gene set from a file, map the gene sets to the probes */
if (optionsPanel.geneSetsFileRadioButton.isSelected()) {
GeneSetCollectionParser geneSetCollectionParser = new GeneSetCollectionParser(minGenesInSet, maxGenesInSet);
GeneSetCollection geneSetCollection = geneSetCollectionParser.parseGeneSetInformation(validGeneSetFilepath);
MappedGeneSet[] allMappedGeneSets = geneSetCollection.getMappedGeneSets(probes);
if (allMappedGeneSets == null) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes could be matched to probes.\nCheck that the gmt file is for the right species. \nTo use gene sets from a file, probe names must contain the gene name. \nTry defining new probes over genes (e.g. using the RNA-seq quantitation pipeline) or use existing probes lists instead of a gene set file.", "No gene sets identified", JOptionPane.ERROR_MESSAGE);
throw new SeqMonkException("No sets of genes could be matched to probes.\nTo use gene sets from a file, probe names must contain the gene name.\nTry defining new probes over genes or use existing probes lists instead of a gene set file.");
} else {
ArrayList<MappedGeneSet> mgsArrayList = new ArrayList<MappedGeneSet>();
/* get rid of those that have fewer probes in the set than minGenesInSet. We shouldn't exceed maxGenesInSet unless probes have been made over something other than genes */
for (int i = 0; i < allMappedGeneSets.length; i++) {
if (allMappedGeneSets[i].getProbes().length >= minGenesInSet) {
mgsArrayList.add(allMappedGeneSets[i]);
}
}
mappedGeneSets = mgsArrayList.toArray(new MappedGeneSet[0]);
}
} else /* or if we're using existing probelists, create mappedGeneSets from them */
if (optionsPanel.probeListRadioButton.isSelected() && selectedProbeLists != null) {
mappedGeneSets = new MappedGeneSet[selectedProbeLists.length];
for (int i = 0; i < selectedProbeLists.length; i++) {
mappedGeneSets[i] = new MappedGeneSet(selectedProbeLists[i]);
}
} else {
throw new SeqMonkException("Haven't got any genesets to use, shouldn't have got here without having any selected.");
}
if (mappedGeneSets == null || mappedGeneSets.length == 0) {
throw new SeqMonkException("Couldn't map gene sets to the probes, try again with a different probe set");
} else {
System.err.println("there are " + mappedGeneSets.length + " mappedGeneSets");
System.err.println("size of zScore lookup table = " + probeZScoreLookupTable.size());
}
System.err.println("total number of mapped gene sets = " + mappedGeneSets.length);
// deduplicate our mappedGeneSets
if (optionsPanel.deduplicateGeneSetBox.isSelected()) {
mappedGeneSets = deduplicateMappedGeneSets(mappedGeneSets);
}
System.err.println("deduplicated mapped gene sets = " + mappedGeneSets.length);
/*
* we need to go through the mapped gene set and get all the values for the matched probes
*
*/
for (int i = 0; i < mappedGeneSets.length; i++) {
Probe[] geneSetProbes = mappedGeneSets[i].getProbes();
// to contain all the z-scores for the gene set
double[] geneSetZscores = new double[geneSetProbes.length];
// Find the z-scores for each of the probes in the mappedGeneSet
for (int gsp = 0; gsp < geneSetProbes.length; gsp++) {
if (probeZScoreLookupTable.containsKey(geneSetProbes[gsp])) {
geneSetZscores[gsp] = probeZScoreLookupTable.get(geneSetProbes[gsp]);
}
}
// this is just temporary to check the stats - there's some duplication with this.... is there??
mappedGeneSets[i].zScores = geneSetZscores;
if (geneSetZscores.length > 1) {
// the number of probes in the mappedGeneSet should always be > 1 anyway as the mappedGeneSet shouldn't be created if there are < 2 matched probes.
double pVal;
if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("t-test")) {
pVal = TTest.calculatePValue(geneSetZscores, 0);
} else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov test")) {
double[] allZscores = getAllZScores(probeZScoreLookupTable);
KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
pVal = ksTest.kolmogorovSmirnovTest(geneSetZscores, allZscores);
} else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov non-directional test")) {
double[] allZscores = getAllZScores(probeZScoreLookupTable);
KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
pVal = ksTest.kolmogorovSmirnovTest(convertToAbsoluteValues(geneSetZscores), convertToAbsoluteValues(allZscores));
} else {
throw new IllegalArgumentException("Didn't recognise statistical test " + optionsPanel.statisticalTestBox.getSelectedItem().toString());
}
mappedGeneSets[i].meanZScore = SimpleStats.mean(geneSetZscores);
// check the variance - we don't want variances of 0.
double stdev = SimpleStats.stdev(geneSetZscores);
if ((stdev * stdev) < 0.00000000000001) {
continue;
} else // if all the differences between the datasets are 0 then just get rid of them
if (Double.isNaN(pVal)) {
continue;
} else {
pValueArrayList.add(new MappedGeneSetTTestValue(mappedGeneSets[i], pVal));
}
} else {
System.err.println("Fell through the net somewhere, why does the set of zscores contain fewer than 2 values?");
continue;
}
}
MappedGeneSetTTestValue[] filterResultpValues = pValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
ArrayList<MappedGeneSetTTestValue> filteredPValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
// Now we've got all the p values they need to be corrected.
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(filterResultpValues);
}
System.err.println(filterResultpValues.length + " p-values calculated, multtest = " + applyMultipleTestingCorrection + ", pval limit = " + pValueLimit);
for (int i = 0; i < filterResultpValues.length; i++) {
double pOrQvalue;
if (applyMultipleTestingCorrection) {
pOrQvalue = filterResultpValues[i].q;
} else {
pOrQvalue = filterResultpValues[i].p;
}
// check whether it passes the p/q-value and z-score cut-offs
if (optionsPanel.reportAllResults.isSelected()) {
filteredPValueArrayList.add(filterResultpValues[i]);
} else {
if ((pOrQvalue < pValueLimit) && (Math.abs(filterResultpValues[i].mappedGeneSet.meanZScore) > zScoreThreshold)) {
filteredPValueArrayList.add(filterResultpValues[i]);
}
}
}
// convert the ArrayList to MappedGeneSetTTestValue
filterResultpValues = filteredPValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
if (filterResultpValues.length == 0) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes were identified using the selected parameters, \ntry changing the gene sets or relaxing the p-value/z-score thresholds.", "No gene sets identified", JOptionPane.INFORMATION_MESSAGE);
} else {
geneSetDisplay = new GeneSetDisplay(dataCollection, listDescription(), fromStore, toStore, probes, probeZScoreLookupTable, filterResultpValues, startingList, customRegressionValues, simpleRegression, // optionsPanel.bidirectionalRadioButton.isSelected());
false);
geneSetDisplay.addWindowListener(this);
}
// We don't want to save the probe list here, we're bringing up the intermediate display from which probe lists can be saved.
progressCancelled();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class ZScoreScatterPlotPanel method calculateNonredundantSet.
/**
* This collapses individual points which are over the same
* pixel when redrawing the plot at a different scale
*/
private synchronized void calculateNonredundantSet() {
closestPoint = null;
ProbePairValue[][] grid = new ProbePairValue[getWidth()][getHeight()];
try {
for (int p = 0; p < probes.length; p++) {
/* x value is the mean intensity */
float xValue = (xStore.getValueForProbe(probes[p]) + yStore.getValueForProbe(probes[p])) / 2;
/* y value is the z-score */
float yValue = probeZScoreLookupTable.get(probes[p]).floatValue();
if (Float.isNaN(xValue) || Float.isInfinite(xValue) || Float.isNaN(yValue) || Float.isInfinite(yValue)) {
continue;
}
int x = getX(xValue);
int y = getY(yValue);
if (grid[x][y] == null) {
grid[x][y] = new ProbePairValue(xValue, yValue, x, y);
grid[x][y].setProbe(probes[p]);
} else {
// belong to
if (subLists == null)
grid[x][y].count++;
// As we have multiple probes at this point we remove the
// specific probe annotation.
grid[x][y].setProbe(null);
}
}
if (subLists != null) {
for (int s = 0; s < subLists.length; s++) {
Probe[] subListProbes = subLists[s].getAllProbes();
for (int p = 0; p < subListProbes.length; p++) {
// TODO: this needs to be sorted out properly - it's to do with the filtering out of NA probes
if (probeZScoreLookupTable.containsKey(subListProbes[p])) {
// System.err.println("p = " + p + ", name = " + subListProbes[p].name());
/* x value is the mean intensity */
float xValue = (xStore.getValueForProbe(subListProbes[p]) + yStore.getValueForProbe(subListProbes[p])) / 2;
/* y value is the z-score */
float yValue = probeZScoreLookupTable.get(subListProbes[p]).floatValue();
// System.err.println("for " + probes[p].name() + ", xValue = " + xValue+ ", yValue = " + yValue);
int x = getX(xValue);
int y = getY(yValue);
if (grid[x][y] == null) {
// This messes up where we catch it in the middle of a redraw
continue;
// throw new IllegalArgumentException("Found subList position not in main list");
}
// 1 = no list so 2 is the lowest sublist index
grid[x][y].count = s + 2;
}
}
}
}
} catch (SeqMonkException e) {
throw new IllegalStateException(e);
}
// Now we need to put all of the ProbePairValues into
// a single array;
int count = 0;
for (int x = 0; x < grid.length; x++) {
for (int y = 0; y < grid[x].length; y++) {
if (grid[x][y] != null)
count++;
}
}
ProbePairValue[] nonred = new ProbePairValue[count];
count--;
for (int x = 0; x < grid.length; x++) {
for (int y = 0; y < grid[x].length; y++) {
if (grid[x][y] != null) {
nonred[count] = grid[x][y];
count--;
}
}
}
Arrays.sort(nonred);
// Work out the 95% percentile count
int minCount = 1;
int maxCount = 2;
if (nonred.length > 0) {
minCount = nonred[0].count;
maxCount = nonred[((nonred.length - 1) * 95) / 100].count;
}
// Go through every nonred assigning a suitable colour
ColourGradient gradient = new HotColdColourGradient();
for (int i = 0; i < nonred.length; i++) {
if (subLists == null) {
nonred[i].color = gradient.getColor(nonred[i].count, minCount, maxCount);
} else {
if (nonred[i].count > subLists.length + 1) {
throw new IllegalArgumentException("Count above threshold when showing sublists");
}
if (nonred[i].count == 1) {
nonred[i].color = Color.LIGHT_GRAY;
} else {
nonred[i].color = ColourIndexSet.getColour(nonred[i].count - 2);
}
}
}
nonRedundantValues = nonred;
lastNonredWidth = getWidth();
lastNonredHeight = getHeight();
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class IntensityReplicateFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
Probe[] probes = startingList.getAllProbes();
// We need to work out how many probes are going to be put into
// each sub-distribution we calculate. The rule is going to be that
// we use 1% of the total, or 100 probes whichever is the higher
probesPerSet = probes.length / 100;
if (probesPerSet < 100)
probesPerSet = 100;
if (probesPerSet > probes.length)
probesPerSet = probes.length;
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
// We'll build up a set of p-values as we go along
float[] lowestPValues = new float[probes.length];
for (int p = 0; p < lowestPValues.length; p++) {
lowestPValues[p] = 1;
}
// These arrays are going to hold the real SDs for the replicate
// sets we're going to analyse.
double[] realSDFrom = new double[probes.length];
double[] realSDTo = new double[probes.length];
// These arrays are going to hold the averaged SDs for the replicate
// sets we're going to analyse.
double[] averageSDFrom = new double[probes.length];
double[] averageSDTo = new double[probes.length];
// This is going to be the temporary array we populate with the set of
// differences we are going to analyse.
double[] localSDset = new double[probesPerSet];
// First work out the set of comparisons we're going to make
Vector<SingleComparison> comparisons = new Vector<IntensityReplicateFilter.SingleComparison>();
for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
if (fromStores[fromIndex] == toStores[toIndex])
continue;
// If we can find the fromStore in the toStores we've already done and the
// toStore anywhere in the fromStores then we can skip this.
boolean canSkip = false;
for (int i = 0; i < fromIndex; i++) {
if (fromStores[i] == toStores[toIndex]) {
for (int j = 0; j < toStores.length; j++) {
if (toStores[j] == fromStores[fromIndex]) {
canSkip = true;
break;
}
}
break;
}
}
if (canSkip)
continue;
comparisons.add(new SingleComparison(fromIndex, toIndex));
}
}
for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
// We need to generate a set of probe indices ordered by their average intensity
Integer[] fromIndices = new Integer[probes.length];
Integer[] toIndices = new Integer[probes.length];
for (int i = 0; i < probes.length; i++) {
fromIndices[i] = i;
toIndices[i] = i;
}
// This isn't ideal. We're sorting the probes by average intensity which puts together
// probes which should probably have different standard deviations. It would be better
// to sort on the intensities in each data store separately, but we get such a problem
// from very low values contaminating the set by giving artificially low average SDs that
// I don't think we can get away with this.
Comparator<Integer> fromComp = new AveragePairedIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
Comparator<Integer> toComp = new AveragePairedIntensityComparator(toStores[toIndex], fromStores[fromIndex], probes);
// Comparator<Integer> fromComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
// Comparator<Integer> toComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
Arrays.sort(fromIndices, fromComp);
Arrays.sort(toIndices, toComp);
// We also need to get the raw SDs for the two replicate sets
double[] fromValues = new double[fromStores[fromIndex].dataStores().length];
double[] toValues = new double[toStores[toIndex].dataStores().length];
try {
for (int p = 0; p < probes.length; p++) {
for (int f = 0; f < fromValues.length; f++) {
fromValues[f] = fromStores[fromIndex].dataStores()[f].getValueForProbe(probes[p]);
}
for (int t = 0; t < toValues.length; t++) {
toValues[t] = toStores[toIndex].dataStores()[t].getValueForProbe(probes[p]);
}
realSDFrom[p] = SimpleStats.stdev(fromValues);
realSDTo[p] = SimpleStats.stdev(toValues);
}
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
}
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
IndexTTestValue[] currentPValues = new IndexTTestValue[probes.length];
for (int i = 0; i < probes.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (i % 1000 == 0) {
int progress = (i * 100) / probes.length;
progress += 100 * comparisonIndex;
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
}
// We need to make up the set of SDs to represent this probe
int startingIndex = i - (probesPerSet / 2);
if (startingIndex < 0)
startingIndex = 0;
if (startingIndex + probesPerSet >= probes.length)
startingIndex = probes.length - probesPerSet;
for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
localSDset[j - startingIndex] = realSDFrom[fromIndices[j]];
}
// averageSDFrom[fromIndices[i]] = SimpleStats.percentile(localSDset,90);
averageSDFrom[fromIndices[i]] = SimpleStats.mean(localSDset);
for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
localSDset[j - startingIndex] = realSDTo[toIndices[j]];
}
// averageSDTo[toIndices[i]] = SimpleStats.percentile(localSDset,90);
averageSDTo[toIndices[i]] = SimpleStats.mean(localSDset);
}
for (int p = 0; p < probes.length; p++) {
double fromValue = 0;
double toValue = 0;
try {
fromValue = fromStores[fromIndex].getValueForProbe(probes[p]);
toValue = toStores[toIndex].getValueForProbe(probes[p]);
} catch (SeqMonkException sme) {
}
double fromSD = Math.max(realSDFrom[p], averageSDFrom[p]);
double toSD = Math.max(realSDTo[p], averageSDTo[p]);
// double fromSD = averageSDFrom[p];
// double toSD = averageSDTo[p];
currentPValues[p] = new IndexTTestValue(p, TTest.calculatePValue(fromValue, toValue, fromSD, toSD, fromStores[fromIndex].dataStores().length, toStores[toIndex].dataStores().length));
// System.err.println("P value was "+currentPValues[p].p+" from "+fromValue+" "+toValue+" "+fromSD+" "+toSD+" "+fromStores[fromIndex].dataStores().length+" "+toStores[toIndex].dataStores().length);
}
// We now need to correct the set of pValues
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(currentPValues);
}
// the combined set
if (applyMultipleTestingCorrection) {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
}
}
} else {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
}
}
}
}
// pass the filter.
for (int i = 0; i < lowestPValues.length; i++) {
if (lowestPValues[i] < pValueLimit) {
newList.addProbe(probes[i], lowestPValues[i]);
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class LogisticRegressionSplicingFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
probes = filterProbesByCount(probes, fromStores, toStores);
progressUpdated("Pairing Probes", 0, 1);
// Make pairs of probes to test
ProbePair[] pairs = makeProbePairs(probes, fromStores, toStores);
System.err.println("Found " + pairs.length + " pairs to test");
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("PVALUE", "" + pValueCutoff);
template.setValue("CORRECTCOUNT", "" + pairs.length);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
} else {
template.setValue("MULTITEST", "FALSE");
}
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
// Sort these so we can get probes from the same chromosome together
Arrays.sort(probes);
pr = null;
String lastChr = "";
for (int p = 0; p < pairs.length; p++) {
if (!pairs[p].probe1.chromosome().name().equals(lastChr)) {
if (pr != null)
pr.close();
File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + pairs[p].probe1.chromosome().name() + ".txt");
pr = new PrintWriter(outFile);
lastChr = pairs[p].probe1.chromosome().name();
pr.println("id\tgroup\treplicate\tstate\tcount");
}
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + lastChr, p, probes.length);
}
int[] fromProbe1Counts = new int[fromStores.length];
int[] fromProbe2Counts = new int[fromStores.length];
int[] toProbe1Counts = new int[toStores.length];
int[] toProbe2Counts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
fromProbe1Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe1);
fromProbe2Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < toStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
toProbe1Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe1);
toProbe2Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < fromProbe1Counts.length; i++) {
pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromProbe1Counts[i]);
pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromProbe2Counts[i]);
}
for (int i = 0; i < toProbe1Counts.length; i++) {
pr.println(p + "\tto\tto" + i + "\tmeth\t" + toProbe1Counts[i]);
pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toProbe2Counts[i]);
}
}
if (pr != null)
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
// In case the same probe is found multiple times we'll cache the p-values
// we see and report the best one.
HashMap<Probe, Float> cachedHits = new HashMap<Probe, Float>();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
if (!cachedHits.containsKey(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (!cachedHits.containsKey(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
// See if the pvalue we've got is better than the one we're storing
if (pValue < cachedHits.get(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (pValue < cachedHits.get(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
}
br.close();
for (Probe probeHit : cachedHits.keySet()) {
newList.addProbe(probeHit, cachedHits.get(probeHit));
}
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
Aggregations