use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class IntensityReplicateFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
Probe[] probes = startingList.getAllProbes();
// We need to work out how many probes are going to be put into
// each sub-distribution we calculate. The rule is going to be that
// we use 1% of the total, or 100 probes whichever is the higher
probesPerSet = probes.length / 100;
if (probesPerSet < 100)
probesPerSet = 100;
if (probesPerSet > probes.length)
probesPerSet = probes.length;
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
// We'll build up a set of p-values as we go along
float[] lowestPValues = new float[probes.length];
for (int p = 0; p < lowestPValues.length; p++) {
lowestPValues[p] = 1;
}
// These arrays are going to hold the real SDs for the replicate
// sets we're going to analyse.
double[] realSDFrom = new double[probes.length];
double[] realSDTo = new double[probes.length];
// These arrays are going to hold the averaged SDs for the replicate
// sets we're going to analyse.
double[] averageSDFrom = new double[probes.length];
double[] averageSDTo = new double[probes.length];
// This is going to be the temporary array we populate with the set of
// differences we are going to analyse.
double[] localSDset = new double[probesPerSet];
// First work out the set of comparisons we're going to make
Vector<SingleComparison> comparisons = new Vector<IntensityReplicateFilter.SingleComparison>();
for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
if (fromStores[fromIndex] == toStores[toIndex])
continue;
// If we can find the fromStore in the toStores we've already done and the
// toStore anywhere in the fromStores then we can skip this.
boolean canSkip = false;
for (int i = 0; i < fromIndex; i++) {
if (fromStores[i] == toStores[toIndex]) {
for (int j = 0; j < toStores.length; j++) {
if (toStores[j] == fromStores[fromIndex]) {
canSkip = true;
break;
}
}
break;
}
}
if (canSkip)
continue;
comparisons.add(new SingleComparison(fromIndex, toIndex));
}
}
for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
// We need to generate a set of probe indices ordered by their average intensity
Integer[] fromIndices = new Integer[probes.length];
Integer[] toIndices = new Integer[probes.length];
for (int i = 0; i < probes.length; i++) {
fromIndices[i] = i;
toIndices[i] = i;
}
// This isn't ideal. We're sorting the probes by average intensity which puts together
// probes which should probably have different standard deviations. It would be better
// to sort on the intensities in each data store separately, but we get such a problem
// from very low values contaminating the set by giving artificially low average SDs that
// I don't think we can get away with this.
Comparator<Integer> fromComp = new AveragePairedIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
Comparator<Integer> toComp = new AveragePairedIntensityComparator(toStores[toIndex], fromStores[fromIndex], probes);
// Comparator<Integer> fromComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
// Comparator<Integer> toComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
Arrays.sort(fromIndices, fromComp);
Arrays.sort(toIndices, toComp);
// We also need to get the raw SDs for the two replicate sets
double[] fromValues = new double[fromStores[fromIndex].dataStores().length];
double[] toValues = new double[toStores[toIndex].dataStores().length];
try {
for (int p = 0; p < probes.length; p++) {
for (int f = 0; f < fromValues.length; f++) {
fromValues[f] = fromStores[fromIndex].dataStores()[f].getValueForProbe(probes[p]);
}
for (int t = 0; t < toValues.length; t++) {
toValues[t] = toStores[toIndex].dataStores()[t].getValueForProbe(probes[p]);
}
realSDFrom[p] = SimpleStats.stdev(fromValues);
realSDTo[p] = SimpleStats.stdev(toValues);
}
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
}
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
IndexTTestValue[] currentPValues = new IndexTTestValue[probes.length];
for (int i = 0; i < probes.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (i % 1000 == 0) {
int progress = (i * 100) / probes.length;
progress += 100 * comparisonIndex;
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
}
// We need to make up the set of SDs to represent this probe
int startingIndex = i - (probesPerSet / 2);
if (startingIndex < 0)
startingIndex = 0;
if (startingIndex + probesPerSet >= probes.length)
startingIndex = probes.length - probesPerSet;
for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
localSDset[j - startingIndex] = realSDFrom[fromIndices[j]];
}
// averageSDFrom[fromIndices[i]] = SimpleStats.percentile(localSDset,90);
averageSDFrom[fromIndices[i]] = SimpleStats.mean(localSDset);
for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
localSDset[j - startingIndex] = realSDTo[toIndices[j]];
}
// averageSDTo[toIndices[i]] = SimpleStats.percentile(localSDset,90);
averageSDTo[toIndices[i]] = SimpleStats.mean(localSDset);
}
for (int p = 0; p < probes.length; p++) {
double fromValue = 0;
double toValue = 0;
try {
fromValue = fromStores[fromIndex].getValueForProbe(probes[p]);
toValue = toStores[toIndex].getValueForProbe(probes[p]);
} catch (SeqMonkException sme) {
}
double fromSD = Math.max(realSDFrom[p], averageSDFrom[p]);
double toSD = Math.max(realSDTo[p], averageSDTo[p]);
// double fromSD = averageSDFrom[p];
// double toSD = averageSDTo[p];
currentPValues[p] = new IndexTTestValue(p, TTest.calculatePValue(fromValue, toValue, fromSD, toSD, fromStores[fromIndex].dataStores().length, toStores[toIndex].dataStores().length));
// System.err.println("P value was "+currentPValues[p].p+" from "+fromValue+" "+toValue+" "+fromSD+" "+toSD+" "+fromStores[fromIndex].dataStores().length+" "+toStores[toIndex].dataStores().length);
}
// We now need to correct the set of pValues
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(currentPValues);
}
// the combined set
if (applyMultipleTestingCorrection) {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
}
}
} else {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
}
}
}
}
// pass the filter.
for (int i = 0; i < lowestPValues.length; i++) {
if (lowestPValues[i] < pValueLimit) {
newList.addProbe(probes[i], lowestPValues[i]);
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class LogisticRegressionSplicingFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
probes = filterProbesByCount(probes, fromStores, toStores);
progressUpdated("Pairing Probes", 0, 1);
// Make pairs of probes to test
ProbePair[] pairs = makeProbePairs(probes, fromStores, toStores);
System.err.println("Found " + pairs.length + " pairs to test");
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("PVALUE", "" + pValueCutoff);
template.setValue("CORRECTCOUNT", "" + pairs.length);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
} else {
template.setValue("MULTITEST", "FALSE");
}
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
// Sort these so we can get probes from the same chromosome together
Arrays.sort(probes);
pr = null;
String lastChr = "";
for (int p = 0; p < pairs.length; p++) {
if (!pairs[p].probe1.chromosome().name().equals(lastChr)) {
if (pr != null)
pr.close();
File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + pairs[p].probe1.chromosome().name() + ".txt");
pr = new PrintWriter(outFile);
lastChr = pairs[p].probe1.chromosome().name();
pr.println("id\tgroup\treplicate\tstate\tcount");
}
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + lastChr, p, probes.length);
}
int[] fromProbe1Counts = new int[fromStores.length];
int[] fromProbe2Counts = new int[fromStores.length];
int[] toProbe1Counts = new int[toStores.length];
int[] toProbe2Counts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
fromProbe1Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe1);
fromProbe2Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < toStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
toProbe1Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe1);
toProbe2Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < fromProbe1Counts.length; i++) {
pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromProbe1Counts[i]);
pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromProbe2Counts[i]);
}
for (int i = 0; i < toProbe1Counts.length; i++) {
pr.println(p + "\tto\tto" + i + "\tmeth\t" + toProbe1Counts[i]);
pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toProbe2Counts[i]);
}
}
if (pr != null)
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
// In case the same probe is found multiple times we'll cache the p-values
// we see and report the best one.
HashMap<Probe, Float> cachedHits = new HashMap<Probe, Float>();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
if (!cachedHits.containsKey(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (!cachedHits.containsKey(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
// See if the pvalue we've got is better than the one we're storing
if (pValue < cachedHits.get(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (pValue < cachedHits.get(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
}
br.close();
for (Probe probeHit : cachedHits.keySet()) {
newList.addProbe(probeHit, cachedHits.get(probeHit));
}
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class MonteCarloFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
Probe[] probes = toList.getAllProbes();
ArrayList<Probe> allProbes = new ArrayList<Probe>();
Probe[] fromProbes = startingList.getAllProbes();
try {
for (int i = 0; i < fromProbes.length; i++) {
allProbes.add(fromProbes[i]);
}
int passedCount = 0;
float targetValue = getValue(probes);
for (int iteration = 0; iteration < iterationCount; iteration++) {
if (iteration % 100 == 0) {
progressUpdated("Performed " + iteration + " iterations", iteration, iterationCount);
}
if (cancel) {
progressCancelled();
return;
}
Probe[] theseProbes = makeRandomProbes(allProbes, probes.length);
float value = getValue(theseProbes);
if (value >= targetValue) {
++passedCount;
}
}
float pValue = ((float) passedCount) / iterationCount;
ProbeList newList = new ProbeList(toList, "", "", "P-value");
for (int i = 0; i < probes.length; i++) {
newList.addProbe(probes[i], pValue);
}
filterFinished(newList);
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class PositionFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
ProbeList newList = new ProbeList(startingList, "", "", null);
Probe[] probes = startingList.getAllProbes();
// We only take probes which are completely enclosed in the selected region
for (int p = 0; p < probes.length; p++) {
if (p % 10000 == 0)
progressUpdated(p, probes.length);
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (chromosome != null) {
if (probes[p].chromosome() != chromosome)
continue;
}
if (!strandType.useProbe(probes[p]))
continue;
if (start != null && probes[p].start() < start)
continue;
if (end != null && probes[p].end() > end)
continue;
newList.addProbe(probes[p], null);
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class ProbeListProbeGenerator method getOptionsPanel.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.ProbeGenerators.ProbeGenerator#getOptionsPanel(uk.ac.babraham.SeqMonk.SeqMonkApplication)
*/
public JPanel getOptionsPanel() {
if (optionPanel != null) {
// We've done this already
return optionPanel;
}
ProbeList currentActiveList = null;
if (collection.probeSet() != null) {
currentLists = collection.probeSet().getAllProbeLists();
currentActiveList = collection.probeSet().getActiveList();
} else {
currentLists = new ProbeList[0];
}
optionPanel = new JPanel();
optionPanel.setLayout(new GridBagLayout());
GridBagConstraints gbc = new GridBagConstraints();
gbc.gridx = 1;
gbc.gridy = 1;
gbc.weightx = 0.1;
gbc.weighty = 0.5;
gbc.fill = GridBagConstraints.HORIZONTAL;
optionPanel.add(new JLabel("Select List"), gbc);
gbc.gridx++;
gbc.weightx = 0.9;
selectedListBox = new JComboBox(currentLists);
selectedListBox.setPrototypeDisplayValue("No longer than this please");
for (int i = 0; i < currentLists.length; i++) {
if (currentLists[i] == currentActiveList) {
selectedListBox.setSelectedIndex(i);
}
}
selectedListBox.addItemListener(new ItemListener() {
public void itemStateChanged(ItemEvent e) {
isReady();
}
});
optionPanel.add(selectedListBox, gbc);
gbc.gridx = 1;
gbc.gridy++;
gbc.weightx = 0.1;
optionPanel.add(new JLabel("Reverse Probe Direction"), gbc);
gbc.gridx++;
gbc.weightx = 0.9;
reverseDirectionBox = new JCheckBox();
optionPanel.add(reverseDirectionBox, gbc);
return optionPanel;
}
Aggregations