use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class DistributionPositionFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
Probe[] probes = startingList.getAllProbes();
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
// Convert the %values into actual probe distribution positions.
Integer upperProbeLimit = null;
if (upperLimit != null) {
upperProbeLimit = (int) ((probes.length * upperLimit) / 100);
}
Integer lowerProbeLimit = null;
if (lowerLimit != null) {
lowerProbeLimit = (int) ((probes.length * lowerLimit) / 100);
}
// System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber+" lowerProbe="+lowerProbeLimit+" upperProbe="+upperProbeLimit);
Hashtable<Probe, Integer> counts = new Hashtable<Probe, Integer>(probes.length);
for (int s = 0; s < stores.length; s++) {
progressUpdated("Processing " + stores[s].name(), s, stores.length);
Arrays.sort(probes, new ProbeValueSorter(stores[s]));
for (int p = 0; p < probes.length; p++) {
// Now we have the value we need to know if it passes the test
if (upperProbeLimit != null)
if (p > upperProbeLimit)
continue;
if (lowerProbeLimit != null)
if (p < lowerProbeLimit)
continue;
if (counts.containsKey(probes[p])) {
Integer i = counts.get(probes[p]);
counts.put(probes[p], new Integer(i.intValue() + 1));
} else {
counts.put(probes[p], 1);
}
}
}
Enumeration<Probe> pr = counts.keys();
while (pr.hasMoreElements()) {
Probe p = pr.nextElement();
switch(limitType) {
case EXACTLY:
if (counts.get(p).intValue() == chosenNumber)
newList.addProbe(p, null);
break;
case AT_LEAST:
if (counts.get(p).intValue() >= chosenNumber)
newList.addProbe(p, null);
break;
case NO_MORE_THAN:
if (counts.get(p).intValue() <= chosenNumber)
newList.addProbe(p, null);
break;
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class EdgeRFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
// Say which p value column we're filtering on
if (multiTest) {
template.setValue("CORRECTED", "FDR");
} else {
template.setValue("CORRECTED", "PValue");
}
StringBuffer sb = new StringBuffer();
for (int i = 0; i < fromStores.length; i++) {
if (i > 0)
sb.append(",");
sb.append("1");
}
for (int i = 0; i < toStores.length; i++) {
sb.append(",");
sb.append("2");
}
template.setValue("CONDITIONS", sb.toString());
template.setValue("PVALUE", "" + cutoff);
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File countFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(countFile);
sb = new StringBuffer();
sb.append("probe");
for (int i = 0; i < fromStores.length; i++) {
sb.append("\t");
sb.append("from");
sb.append(i);
}
for (int i = 0; i < toStores.length; i++) {
sb.append("\t");
sb.append("to");
sb.append(i);
}
pr.println(sb.toString());
progressUpdated("Writing count data", 0, 1);
Probe[] probes = startingList.getAllProbes();
float value;
for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing count data", p, probes.length);
}
sb = new StringBuffer();
sb.append(p);
for (int i = 0; i < fromStores.length; i++) {
sb.append("\t");
value = fromStores[i].getValueForProbe(probes[p]);
if (value != (int) value) {
progressExceptionReceived(new IllegalArgumentException("Inputs to the EdgeR filter MUST be raw, incorrected counts, not things like " + value));
pr.close();
return;
}
sb.append(value);
}
for (int i = 0; i < toStores.length; i++) {
sb.append("\t");
value = toStores[i].getValueForProbe(probes[p]);
if (value != (int) value) {
progressExceptionReceived(new IllegalArgumentException("Inputs to the EdgeR filter MUST be raw, incorrected counts, not things like " + value));
pr.close();
return;
}
sb.append(value);
}
pr.println(sb.toString());
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
newList = new ProbeList(startingList, "", "", "FDR");
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
int probeIndex = Integer.parseInt(sections[0]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
// filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class EdgeRForRevFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
if (resample) {
// We need to check that the data stores are quantitated
for (int i = 0; i < fromStores.length; i++) {
if (!fromStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = fromStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
for (int i = 0; i < toStores.length; i++) {
if (!toStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = toStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
}
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_for_rev_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("DIFFERENCE", "" + absDiffCutoff);
template.setValue("PVALUE", "" + pValueCutoff);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
template.setValue("CORRECTED", "FDR");
} else {
template.setValue("MULTITEST", "FALSE");
template.setValue("CORRECTED", "PValue");
}
// For the conditions we just repeat "from" and "to" the number of times they occur in the
// samples (twice for each sample since we have both meth and unmeth)
StringBuffer conditions = new StringBuffer();
for (int i = 0; i < fromStores.length; i++) {
if (i > 0)
conditions.append(",");
conditions.append("\"from\",\"from\"");
}
for (int i = 0; i < toStores.length; i++) {
conditions.append(",\"to\",\"to\"");
}
template.setValue("CONDITIONS", conditions.toString());
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File outFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(outFile);
pr.print("id");
for (int i = 0; i < fromStores.length; i++) {
pr.print("\tfrom_" + i + "_meth\tfrom_" + i + "_unmeth");
}
for (int i = 0; i < toStores.length; i++) {
pr.print("\tto_" + i + "_meth\tto_" + i + "_unmeth");
}
pr.print("\n");
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + probes[p].chromosome().name(), p, probes.length);
}
int[] fromMethCounts = new int[fromStores.length];
int[] fromUnmethCounts = new int[fromStores.length];
int[] toMethCounts = new int[toStores.length];
int[] toUnmethCounts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
long[] reads = fromStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = fromStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
fromMethCounts[i] = methCount;
fromUnmethCounts[i] = totalCount - methCount;
}
for (int i = 0; i < toStores.length; i++) {
long[] reads = toStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = toStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
toMethCounts[i] = methCount;
toUnmethCounts[i] = totalCount - methCount;
}
// Check to see we meet the requirements for the min amount of information
// and the min diff.
int totalFromMeth = 0;
int totalFrom = 0;
int totalToMeth = 0;
int totalTo = 0;
int validFrom = 0;
for (int i = 0; i < fromStores.length; i++) {
totalFromMeth += fromMethCounts[i];
totalFrom += fromMethCounts[i];
totalFrom += fromUnmethCounts[i];
if (fromMethCounts[i] > 0 || fromUnmethCounts[i] > 0) {
++validFrom;
}
}
int validTo = 0;
for (int i = 0; i < toStores.length; i++) {
totalToMeth += toMethCounts[i];
totalTo += toMethCounts[i];
totalTo += toUnmethCounts[i];
if (toMethCounts[i] > 0 || toUnmethCounts[i] > 0) {
++validTo;
}
}
// EdgeR only requires 2 valid observations
if (validFrom < 2 || validTo < 2) {
// We don't have enough data to measure this one
continue;
}
if (Math.abs((totalFromMeth * 100f / totalFrom) - (totalToMeth * 100f / totalTo)) < absDiffCutoff) {
continue;
}
float[] fromPercentages = new float[validFrom];
float[] toPercentages = new float[validTo];
int lastFromIndex = 0;
int lastToIndex = 0;
for (int i = 0; i < fromMethCounts.length; i++) {
if (fromMethCounts[i] + fromUnmethCounts[i] == 0)
continue;
fromPercentages[lastFromIndex] = fromMethCounts[i] * 100f / (fromMethCounts[i] + fromUnmethCounts[i]);
++lastFromIndex;
}
for (int i = 0; i < toMethCounts.length; i++) {
if (toMethCounts[i] + toUnmethCounts[i] == 0)
continue;
toPercentages[lastToIndex] = toMethCounts[i] * 100f / (toMethCounts[i] + toUnmethCounts[i]);
++lastToIndex;
}
if (Math.abs(SimpleStats.mean(fromPercentages) - SimpleStats.mean(toPercentages)) < absDiffCutoff) {
continue;
}
// If we get here then we're OK to use this probe so we print out its data. We put all of the
// data for one probe onto a single line. The first value is the index of the probe. The
// rest are pairs of meth/unmeth values for the from samples then the to samples.
pr.print(p);
for (int i = 0; i < fromMethCounts.length; i++) {
pr.print("\t" + fromMethCounts[i] + "\t" + fromUnmethCounts[i]);
}
for (int i = 0; i < toMethCounts.length; i++) {
pr.print("\t" + toMethCounts[i] + "\t" + toUnmethCounts[i]);
}
pr.print("\n");
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class FeatureFilter method generateProbeList.
protected void generateProbeList() {
// We'll start by getting the complete set of probes from the position
// filter. We'll split these by chromosome at a later date but we
// have to get them as a set to start with.
Probe[] probesToMatch = options.featurePositions.getProbes();
// This is the set of passing probes we're going to build up.
ProbeList passedProbes = new ProbeList(startingList, "", "", null);
// We need to know how far beyond the feature we might need to look
int annotationLimit = options.closenessLimit();
// Since we're going to be making the annotations on the
// basis of position we should go through all probes one
// chromosome at a time.
Chromosome[] chrs = collection.genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
// For the not-overlapping option it's easiest for us to keep a list
// of probes to reject (those that do overlap) and then make the negated
// list at the end.
HashSet<Probe> failedProbes = new HashSet<Probe>();
progressUpdated("Processing features on Chr " + chrs[c].name(), c, chrs.length);
Probe[] probes = startingList.getProbesForChromosome(chrs[c]);
Vector<Probe> featuresForThisChromosome = new Vector<Probe>();
for (int f = 0; f < probesToMatch.length; f++) {
if (probesToMatch[f].chromosome().equals(chrs[c])) {
featuresForThisChromosome.add(probesToMatch[f]);
}
}
Probe[] features = featuresForThisChromosome.toArray(new Probe[0]);
Arrays.sort(probes);
Arrays.sort(features);
int lastFoundIndex = 0;
// We can now step through the probes looking for the best feature match
for (int p = 0; p < probes.length; p++) {
boolean foundFirst = false;
for (int f = lastFoundIndex; f < features.length; f++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (!foundFirst) {
if (features[f].end() + annotationLimit >= probes[p].start()) {
lastFoundIndex = f;
foundFirst = true;
}
}
// See if we're skipping this feature for this probe based on its strand
if (strand != ANY_STRAND) {
switch(strand) {
case FORWARD_ONLY:
{
if (features[f].strand() != Location.FORWARD)
continue;
break;
}
case REVERSE_ONLY:
{
if (features[f].strand() != Location.REVERSE)
continue;
break;
}
case SAME_STRAND:
{
if (features[f].strand() != probes[p].strand())
continue;
break;
}
case OPPOSING_STRAND:
{
if (!((features[f].strand() == Location.FORWARD && probes[p].strand() == Location.REVERSE) || (features[f].strand() == Location.REVERSE && probes[p].strand() == Location.FORWARD)))
continue;
break;
}
}
}
if (relationship == EXACTLY_MATCHING) {
if (probes[p].start() == features[f].start() && probes[p].end() == features[f].end()) {
passedProbes.addProbe(probes[p], null);
break;
}
} else if (relationship == OVERLAPPING || relationship == NOT_OVERLAPPING) {
if (probes[p].start() < features[f].end() && probes[p].end() > features[f].start()) {
if (relationship == OVERLAPPING) {
passedProbes.addProbe(probes[p], null);
} else {
// This is going to be a rejected probe for not-overlapping
failedProbes.add(probes[p]);
}
break;
}
} else if (relationship == CONTAINED_WITHIN) {
if (probes[p].start() >= features[f].start() && probes[p].end() <= features[f].end()) {
passedProbes.addProbe(probes[p], null);
break;
}
} else if (relationship == SURROUNDING) {
if (probes[p].start() <= features[f].start() && probes[p].end() >= features[f].end()) {
passedProbes.addProbe(probes[p], null);
break;
}
} else if (relationship == CLOSE_TO) {
if (probes[p].start() < features[f].end() + annotationLimit && probes[p].end() > features[f].start() - annotationLimit) {
passedProbes.addProbe(probes[p], null);
break;
}
}
}
}
// data to get the probes which weren't rejected
if (relationship == NOT_OVERLAPPING) {
for (int p = 0; p < probes.length; p++) {
if (!failedProbes.contains(probes[p])) {
passedProbes.addProbe(probes[p], null);
}
}
}
}
filterFinished(passedProbes);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList in project SeqMonk by s-andrews.
the class GeneSetDisplay method saveProbeLists.
private void saveProbeLists(MappedGeneSetTTestValue[] selectedLists) {
ProbeList parentProbeList = new ProbeList(startingProbeList, "gene set filter results", description, "z score");
for (int i = 0; i < selectedLists.length; i++) {
String childDescription = getDescription(selectedLists[i]);
ProbeList newProbeList = new ProbeList(parentProbeList, selectedLists[i].mappedGeneSet.name(), childDescription, "z score");
Probe[] tempProbes = selectedLists[i].mappedGeneSet.getProbes();
for (int j = 0; j < tempProbes.length; j++) {
parentProbeList.addProbe(tempProbes[j], (float) (selectedLists[i].mappedGeneSet.zScores[j]));
newProbeList.addProbe(tempProbes[j], (float) (selectedLists[i].mappedGeneSet.zScores[j]));
}
}
String groupName = null;
while (true) {
groupName = (String) JOptionPane.showInputDialog(this, "Enter list name", "Found " + parentProbeList.getAllProbes().length + " probes", JOptionPane.QUESTION_MESSAGE, null, null, parentProbeList.name());
if (groupName == null) {
// Since the list will automatically have been added to
// the ProbeList tree we actively need to delete it if
// they choose to cancel at this point.
parentProbeList.delete();
// They cancelled
return;
}
if (groupName.length() == 0)
// Try again
continue;
break;
}
parentProbeList.setName(groupName);
}
Aggregations