use of uk.ac.babraham.SeqMonk.DataTypes.DataStore in project SeqMonk by s-andrews.
the class EdgeRForRevFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
if (resample) {
// We need to check that the data stores are quantitated
for (int i = 0; i < fromStores.length; i++) {
if (!fromStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = fromStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + fromStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
for (int i = 0; i < toStores.length; i++) {
if (!toStores[i].isQuantitated()) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " wasn't quantitated"));
return;
}
for (int p = 0; p < probes.length; p++) {
float value = toStores[i].getValueForProbe(probes[p]);
if ((!Float.isNaN(value)) && (value < 0 || value > 100)) {
progressExceptionReceived(new SeqMonkException("Data Store " + toStores[i].name() + " had a value outside the range 0-100 (" + value + ")"));
return;
}
}
}
}
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/EdgeRFilter/edger_for_rev_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("DIFFERENCE", "" + absDiffCutoff);
template.setValue("PVALUE", "" + pValueCutoff);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
template.setValue("CORRECTED", "FDR");
} else {
template.setValue("MULTITEST", "FALSE");
template.setValue("CORRECTED", "PValue");
}
// For the conditions we just repeat "from" and "to" the number of times they occur in the
// samples (twice for each sample since we have both meth and unmeth)
StringBuffer conditions = new StringBuffer();
for (int i = 0; i < fromStores.length; i++) {
if (i > 0)
conditions.append(",");
conditions.append("\"from\",\"from\"");
}
for (int i = 0; i < toStores.length; i++) {
conditions.append(",\"to\",\"to\"");
}
template.setValue("CONDITIONS", conditions.toString());
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
File outFile = new File(tempDir.getAbsoluteFile() + "/counts.txt");
pr = new PrintWriter(outFile);
pr.print("id");
for (int i = 0; i < fromStores.length; i++) {
pr.print("\tfrom_" + i + "_meth\tfrom_" + i + "_unmeth");
}
for (int i = 0; i < toStores.length; i++) {
pr.print("\tto_" + i + "_meth\tto_" + i + "_unmeth");
}
pr.print("\n");
PROBE: for (int p = 0; p < probes.length; p++) {
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + probes[p].chromosome().name(), p, probes.length);
}
int[] fromMethCounts = new int[fromStores.length];
int[] fromUnmethCounts = new int[fromStores.length];
int[] toMethCounts = new int[toStores.length];
int[] toUnmethCounts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
long[] reads = fromStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = fromStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
fromMethCounts[i] = methCount;
fromUnmethCounts[i] = totalCount - methCount;
}
for (int i = 0; i < toStores.length; i++) {
long[] reads = toStores[i].getReadsForProbe(probes[p]);
int totalCount = 0;
int methCount = 0;
if (resample) {
float value = toStores[i].getValueForProbe(probes[p]);
if (Float.isNaN(value)) {
continue PROBE;
}
totalCount = reads.length;
methCount = Math.round((totalCount * value) / 100f);
} else {
for (int r = 0; r < reads.length; r++) {
totalCount++;
if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
++methCount;
}
}
}
toMethCounts[i] = methCount;
toUnmethCounts[i] = totalCount - methCount;
}
// Check to see we meet the requirements for the min amount of information
// and the min diff.
int totalFromMeth = 0;
int totalFrom = 0;
int totalToMeth = 0;
int totalTo = 0;
int validFrom = 0;
for (int i = 0; i < fromStores.length; i++) {
totalFromMeth += fromMethCounts[i];
totalFrom += fromMethCounts[i];
totalFrom += fromUnmethCounts[i];
if (fromMethCounts[i] > 0 || fromUnmethCounts[i] > 0) {
++validFrom;
}
}
int validTo = 0;
for (int i = 0; i < toStores.length; i++) {
totalToMeth += toMethCounts[i];
totalTo += toMethCounts[i];
totalTo += toUnmethCounts[i];
if (toMethCounts[i] > 0 || toUnmethCounts[i] > 0) {
++validTo;
}
}
// EdgeR only requires 2 valid observations
if (validFrom < 2 || validTo < 2) {
// We don't have enough data to measure this one
continue;
}
if (Math.abs((totalFromMeth * 100f / totalFrom) - (totalToMeth * 100f / totalTo)) < absDiffCutoff) {
continue;
}
float[] fromPercentages = new float[validFrom];
float[] toPercentages = new float[validTo];
int lastFromIndex = 0;
int lastToIndex = 0;
for (int i = 0; i < fromMethCounts.length; i++) {
if (fromMethCounts[i] + fromUnmethCounts[i] == 0)
continue;
fromPercentages[lastFromIndex] = fromMethCounts[i] * 100f / (fromMethCounts[i] + fromUnmethCounts[i]);
++lastFromIndex;
}
for (int i = 0; i < toMethCounts.length; i++) {
if (toMethCounts[i] + toUnmethCounts[i] == 0)
continue;
toPercentages[lastToIndex] = toMethCounts[i] * 100f / (toMethCounts[i] + toUnmethCounts[i]);
++lastToIndex;
}
if (Math.abs(SimpleStats.mean(fromPercentages) - SimpleStats.mean(toPercentages)) < absDiffCutoff) {
continue;
}
// If we get here then we're OK to use this probe so we print out its data. We put all of the
// data for one probe onto a single line. The first value is the index of the probe. The
// rest are pairs of meth/unmeth values for the from samples then the to samples.
pr.print(p);
for (int i = 0; i < fromMethCounts.length; i++) {
pr.print("\t" + fromMethCounts[i] + "\t" + fromUnmethCounts[i]);
}
for (int i = 0; i < toMethCounts.length; i++) {
pr.print("\t" + toMethCounts[i] + "\t" + toUnmethCounts[i]);
}
pr.print("\n");
}
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
newList.addProbe(probes[probeIndex], pValue);
}
br.close();
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.DataStore in project SeqMonk by s-andrews.
the class MacsPeakCaller method run.
public void run() {
// for (int i=0;i<selectedChIPStores.length;i++) {
// System.err.println("Selcted ChIP="+selectedChIPStores[i]);
// }
// for (int i=0;i<selectedInputStores.length;i++) {
// System.err.println("Selcted Input="+selectedInputStores[i]);
// }
// First find the tag offsets between the watson and crick strands
// Work out the total average coverage for all of the combined ChIP samples
long totalChIPCoverage = 0;
for (int i = 0; i < selectedChIPStores.length; i++) {
totalChIPCoverage += selectedChIPStores[i].getTotalReadLength();
}
if (cancel) {
generationCancelled();
return;
}
double averageChIPCoveragePerBase = totalChIPCoverage / (double) collection.genome().getTotalGenomeLength();
double lowerCoverage = averageChIPCoveragePerBase * minFoldEnrichment;
double upperCoverage = averageChIPCoveragePerBase * maxFoldEnrichment;
System.err.println("Coverage range for high confidence peaks is " + lowerCoverage + " - " + upperCoverage);
// Now we go through the data to find locations for our high confidence peaks so we can
// randomly select 1000 of these to use to find the offset between the two strands
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> potentialHighConfidencePeaks = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
if (cancel) {
generationCancelled();
return;
}
// Time for an update
updateGenerationProgress("Finding high confidence peaks on chromosome " + chromosomes[c].name(), c, chromosomes.length);
Probe lastValidProbe = null;
for (int startPosition = 1; startPosition < chromosomes[c].length() - fragmentSize; startPosition += fragmentSize / 2) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
long totalLength = 0;
Probe probe = new Probe(chromosomes[c], startPosition, startPosition + fragmentSize);
for (int s = 0; s < selectedChIPStores.length; s++) {
long[] reads = selectedChIPStores[s].getReadsForProbe(probe);
for (int j = 0; j < reads.length; j++) {
totalLength += SequenceRead.length(reads[j]);
}
}
if (totalLength >= (lowerCoverage * probe.length()) && totalLength <= upperCoverage * probe.length()) {
if (lastValidProbe != null && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {
lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());
} else if (lastValidProbe != null) {
// Check that the overall density over the region falls within our limits
totalLength = 0;
for (int s = 0; s < selectedChIPStores.length; s++) {
long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
for (int j = 0; j < reads.length; j++) {
totalLength += SequenceRead.length(reads[j]);
}
}
if (totalLength >= (lowerCoverage * lastValidProbe.length()) && totalLength <= upperCoverage * lastValidProbe.length()) {
potentialHighConfidencePeaks.add(lastValidProbe);
}
lastValidProbe = probe;
} else {
lastValidProbe = probe;
}
}
}
if (lastValidProbe != null) {
long totalLength = 0;
for (int s = 0; s < selectedChIPStores.length; s++) {
long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
for (int j = 0; j < reads.length; j++) {
totalLength += SequenceRead.length(reads[j]);
}
}
if (totalLength >= (lowerCoverage * lastValidProbe.length()) && totalLength <= upperCoverage * lastValidProbe.length()) {
potentialHighConfidencePeaks.add(lastValidProbe);
}
}
}
if (potentialHighConfidencePeaks.size() == 0) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No high confidence peaks found", "Quitting generator", JOptionPane.INFORMATION_MESSAGE);
generationCancelled();
return;
}
// System.err.println("Found "+potentialHighConfidencePeaks.size()+" high confidence peaks");
// Now we select 1000 random probes from this set
Probe[] highConfidencePeaks = potentialHighConfidencePeaks.toArray(new Probe[0]);
Collections.shuffle(Arrays.asList(highConfidencePeaks));
Probe[] randomHighConfidenceProbes = new Probe[Math.min(highConfidencePeaks.length, 1000)];
for (int i = 0; i < randomHighConfidenceProbes.length; i++) {
randomHighConfidenceProbes[i] = highConfidencePeaks[i];
}
// Now find the average distance between forward / reverse reads in the candidate peaks
int[] distances = new int[highConfidencePeaks.length];
// Sort the candidates so we don't do stupid stuff with the cache
Arrays.sort(randomHighConfidenceProbes);
for (int p = 0; p < randomHighConfidenceProbes.length; p++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
distances[p] = getInterStrandDistance(randomHighConfidenceProbes[p], selectedChIPStores);
}
int medianInterStrandDistance = (int) SimpleStats.median(distances);
if (medianInterStrandDistance < 0)
medianInterStrandDistance = 0;
// System.err.println("Median inter strand difference = "+medianInterStrandDistance);
// Now we find the depth cutoff for overrepresented single tags using a binomial distribution
int totalReadCount = 0;
for (int i = 0; i < selectedChIPStores.length; i++) {
totalReadCount += selectedChIPStores[i].getTotalReadCount();
}
BinomialDistribution bin = new BinomialDistribution(totalReadCount, 1d / collection.genome().getTotalGenomeLength());
// We want to know what depth has a chance of less than 1^-5
int redundantThreshold = bin.inverseCumulativeProbability(1 - 0.00001d);
if (redundantThreshold < 1)
redundantThreshold = 1;
// System.err.println("Redundancy threshold is "+redundantThreshold);
// Now we construct a poisson distribution to work out the threshold to use for
// constructing a full candidate peak set.
updateGenerationProgress("Counting non-redundant reads", 0, 1);
// To do this we need to get the full non-redundant length from the whole set
int totalNonRedCount = getNonRedundantReadCount(selectedChIPStores, redundantThreshold);
// System.err.println("Total non-redundant sequences is "+totalNonRedCount);
// We need to know the median read length for the data
int readLength = 0;
for (int i = 0; i < selectedChIPStores.length; i++) {
readLength += selectedChIPStores[i].getTotalReadLength() / selectedChIPStores[i].getTotalReadCount();
}
readLength /= selectedChIPStores.length;
double expectedCountsPerWindow = getExpectedCountPerWindow(totalNonRedCount, collection.genome().getTotalGenomeLength(), fragmentSize, readLength);
PoissonDistribution poisson = new PoissonDistribution(expectedCountsPerWindow);
int readCountCutoff = poisson.inverseCumulativeProbability(1 - pValue);
// System.err.println("Threshold for enrichment in a window is "+readCountCutoff+" reads using a p-value of "+pValue+" and a mean of "+(totalNonRedCount/(collection.genome().getTotalGenomeLength()/(double)fragmentSize)));
// Now we go back through the whole dataset to do a search for all possible candidate probes
// We re-use the peak vector we came up with before.
potentialHighConfidencePeaks.clear();
for (int c = 0; c < chromosomes.length; c++) {
// Time for an update
updateGenerationProgress("Finding candidate peaks on chromosome " + chromosomes[c].name(), c, chromosomes.length);
Probe lastValidProbe = null;
for (int startPosition = 1; startPosition < chromosomes[c].length() - fragmentSize; startPosition += fragmentSize / 2) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
// We expand the region we're looking at by the inter-strand distance as we're going to
// be adjusting the read positions
Probe probe = new Probe(chromosomes[c], startPosition, (startPosition + fragmentSize - 1));
long[] mergedProbeReads = getReadsFromDataStoreCollection(probe, selectedChIPStores, medianInterStrandDistance);
mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);
SequenceRead.sort(mergedProbeReads);
int thisProbeOverlapCount = 0;
for (int i = 0; i < mergedProbeReads.length; i++) {
if (SequenceRead.overlaps(mergedProbeReads[i], probe.packedPosition())) {
++thisProbeOverlapCount;
}
}
if (thisProbeOverlapCount > readCountCutoff) {
if (lastValidProbe != null && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {
lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());
} else if (lastValidProbe != null) {
potentialHighConfidencePeaks.add(lastValidProbe);
lastValidProbe = probe;
} else {
lastValidProbe = probe;
}
}
}
if (lastValidProbe != null) {
potentialHighConfidencePeaks.add(lastValidProbe);
}
}
// Finally we re-filter the peaks we have using local poisson distributions with densities taken
// from either the input samples (if there are any), or the local region. The densities are
// estimated over 1,5 and 10kb around the peak and genome wide and the max of these is taken.
// If there is no input then the 1kb region is not used.
Probe[] allCandidateProbes = potentialHighConfidencePeaks.toArray(new Probe[0]);
// Work out which stores we're using to validate against.
DataStore[] validationStores;
boolean useInput = false;
double inputCorrection = 1;
int validationNonRedCount;
if (selectedInputStores != null && selectedInputStores.length > 0) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
validationStores = selectedInputStores;
useInput = true;
// We also need to work out the total number of nonredundant seqences
// in the input so we can work out a scaling factor so that the densities
// for input and chip are comparable.
validationNonRedCount = getNonRedundantReadCount(validationStores, redundantThreshold);
inputCorrection = totalNonRedCount / (double) validationNonRedCount;
System.err.println("From chip=" + totalNonRedCount + " input=" + validationNonRedCount + " correction is " + inputCorrection);
} else {
validationStores = selectedChIPStores;
validationNonRedCount = totalNonRedCount;
}
Vector<Probe> finalValidatedProbes = new Vector<Probe>();
for (int p = 0; p < allCandidateProbes.length; p++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
if (p % 100 == 0) {
updateGenerationProgress("Validated " + p + " out of " + allCandidateProbes.length + " raw peaks", p, allCandidateProbes.length);
}
// System.err.println("Validating "+allCandidateProbes[p].chromosome()+":"+allCandidateProbes[p].start()+"-"+allCandidateProbes[p].end());
// We now need to find the maximum read density per 2*bandwidth against which
// we're going to validate this peak
// We're going to get all reads within 10kb of the peak, and then we can subselect from there
int midPoint = allCandidateProbes[p].middle();
Probe region10kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 5000, 1), Math.min(midPoint + 4999, allCandidateProbes[p].chromosome().length()), allCandidateProbes[p].strand());
Probe region5kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 2500, 1), Math.min(midPoint + 2499, allCandidateProbes[p].chromosome().length()), allCandidateProbes[p].strand());
Probe region1kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 500, 1), Math.min(midPoint + 499, allCandidateProbes[p].chromosome().length()), allCandidateProbes[p].strand());
// Get the probes for the largest region
long[] thisRegionReads = getReadsFromDataStoreCollection(region10kb, validationStores, 0);
// Deduplicate so it's a fair comparison
// Should we recalculate the redundant threshold based on the input coverage?
thisRegionReads = deduplicateReads(thisRegionReads, redundantThreshold);
int region10kbcount = thisRegionReads.length;
int region5kbcount = 0;
int region1kbcount = 0;
// Go through the reads seeing if they fit into the 5 or 1kb regions
for (int r = 0; r < thisRegionReads.length; r++) {
if (SequenceRead.overlaps(region5kb.packedPosition(), thisRegionReads[r]))
++region5kbcount;
if (SequenceRead.overlaps(region1kb.packedPosition(), thisRegionReads[r]))
++region1kbcount;
}
// System.err.println("Input counts 10kb="+region10kbcount+" 5kb="+region5kbcount+" 1kb="+region1kbcount);
// Convert to densities per window and ajdust for global coverage
double globalDensity = getExpectedCountPerWindow(validationNonRedCount, collection.genome().getTotalGenomeLength(), allCandidateProbes[p].length(), readLength) * inputCorrection;
double density10kb = getExpectedCountPerWindow(region10kbcount, region10kb.length(), allCandidateProbes[p].length(), readLength) * inputCorrection;
double density5kb = getExpectedCountPerWindow(region5kbcount, region5kb.length(), allCandidateProbes[p].length(), readLength) * inputCorrection;
double density1kb = getExpectedCountPerWindow(region1kbcount, region1kb.length(), allCandidateProbes[p].length(), readLength) * inputCorrection;
// Find the highest density to use for the validation
double highestDensity = globalDensity;
if (density10kb > highestDensity)
highestDensity = density10kb;
if (density5kb > highestDensity)
highestDensity = density5kb;
if (useInput && density1kb > highestDensity)
highestDensity = density1kb;
// System.err.println("Global="+globalDensity+" 10kb="+density10kb+" 5kb="+density5kb+" 1kb="+density1kb+" using="+highestDensity);
// Construct a poisson distribution with this density
PoissonDistribution localPoisson = new PoissonDistribution(highestDensity);
// System.err.println("Cutoff from global="+(new PoissonDistribution(globalDensity)).inverseCumulativeProbability(1-pValue)+" 10kb="+(new PoissonDistribution(density10kb)).inverseCumulativeProbability(1-pValue)+" 5kb="+(new PoissonDistribution(density5kb)).inverseCumulativeProbability(1-pValue)+" 1kb="+(new PoissonDistribution(density1kb)).inverseCumulativeProbability(1-pValue));
// Now check to see if the actual count from this peak is enough to still pass
long[] mergedProbeReads = getReadsFromDataStoreCollection(allCandidateProbes[p], selectedChIPStores, medianInterStrandDistance);
mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);
SequenceRead.sort(mergedProbeReads);
int thisProbeOverlapCount = 0;
for (int i = 0; i < mergedProbeReads.length; i++) {
if (SequenceRead.overlaps(mergedProbeReads[i], allCandidateProbes[p].packedPosition())) {
++thisProbeOverlapCount;
}
}
if (thisProbeOverlapCount > localPoisson.inverseCumulativeProbability(1 - pValue)) {
finalValidatedProbes.add(allCandidateProbes[p]);
// System.err.println("Adding probe to final set");
}
}
// System.err.println("From "+allCandidateProbes.length+" candidates "+finalValidatedProbes.size()+" peaks were validated");
ProbeSet finalSet = new ProbeSet(getDescription(), finalValidatedProbes.toArray(new Probe[0]));
generationComplete(finalSet);
}
use of uk.ac.babraham.SeqMonk.DataTypes.DataStore in project SeqMonk by s-andrews.
the class MacsPeakCaller method getOptionsPanel.
public JPanel getOptionsPanel() {
if (optionPanel != null) {
// We've done this already
return optionPanel;
}
optionPanel = new JPanel();
optionPanel.setLayout(new GridBagLayout());
GridBagConstraints gbc = new GridBagConstraints();
gbc.gridx = 1;
gbc.gridy = 1;
gbc.weightx = 0.5;
gbc.weighty = 0.1;
gbc.fill = GridBagConstraints.BOTH;
optionPanel.add(new JLabel("ChIP Stores to use"), gbc);
gbc.gridy++;
gbc.weighty = 0.5;
DataStore[] stores = collection.getAllDataStores();
chipStoresList = new JList(stores);
chipStoresList.getSelectionModel().addListSelectionListener(this);
chipStoresList.setCellRenderer(new TypeColourRenderer());
optionPanel.add(new JScrollPane(chipStoresList), gbc);
gbc.gridy++;
gbc.weighty = 0.1;
optionPanel.add(new JLabel("Input Stores to use"), gbc);
gbc.gridy++;
gbc.weighty = 0.5;
inputStoresList = new JList(stores);
inputStoresList.getSelectionModel().addListSelectionListener(this);
inputStoresList.setCellRenderer(new TypeColourRenderer());
optionPanel.add(new JScrollPane(inputStoresList), gbc);
gbc.gridy++;
JPanel bottomPanel = new JPanel();
bottomPanel.setLayout(new GridBagLayout());
GridBagConstraints bgbc = new GridBagConstraints();
bgbc.gridx = 0;
bgbc.gridy = 0;
bgbc.weightx = 0.5;
bgbc.weighty = 0.5;
bgbc.fill = GridBagConstraints.HORIZONTAL;
bottomPanel.add(new JLabel("P-value cutoff "), bgbc);
pValueField = new JTextField("" + pValue);
pValueField.addKeyListener(new NumberKeyListener(true, false, 1));
bgbc.gridx = 1;
bottomPanel.add(pValueField, bgbc);
bgbc.gridx = 0;
bgbc.gridy++;
bottomPanel.add(new JLabel("Sonicated fragment size "), bgbc);
fragmentSizeField = new JTextField("300");
fragmentSizeField.addKeyListener(new NumberKeyListener(false, false));
bgbc.gridx = 1;
bottomPanel.add(fragmentSizeField, bgbc);
bgbc.gridx = 0;
bgbc.gridy++;
bottomPanel.add(new JLabel("Skip deduplication step "), bgbc);
skipDeduplicationBox = new JCheckBox();
bgbc.gridx = 1;
bottomPanel.add(skipDeduplicationBox, bgbc);
optionPanel.add(bottomPanel, gbc);
return optionPanel;
}
use of uk.ac.babraham.SeqMonk.DataTypes.DataStore in project SeqMonk by s-andrews.
the class LogisticRegressionSplicingFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
probes = filterProbesByCount(probes, fromStores, toStores);
progressUpdated("Pairing Probes", 0, 1);
// Make pairs of probes to test
ProbePair[] pairs = makeProbePairs(probes, fromStores, toStores);
System.err.println("Found " + pairs.length + " pairs to test");
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("PVALUE", "" + pValueCutoff);
template.setValue("CORRECTCOUNT", "" + pairs.length);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
} else {
template.setValue("MULTITEST", "FALSE");
}
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
// Sort these so we can get probes from the same chromosome together
Arrays.sort(probes);
pr = null;
String lastChr = "";
for (int p = 0; p < pairs.length; p++) {
if (!pairs[p].probe1.chromosome().name().equals(lastChr)) {
if (pr != null)
pr.close();
File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + pairs[p].probe1.chromosome().name() + ".txt");
pr = new PrintWriter(outFile);
lastChr = pairs[p].probe1.chromosome().name();
pr.println("id\tgroup\treplicate\tstate\tcount");
}
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + lastChr, p, probes.length);
}
int[] fromProbe1Counts = new int[fromStores.length];
int[] fromProbe2Counts = new int[fromStores.length];
int[] toProbe1Counts = new int[toStores.length];
int[] toProbe2Counts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
fromProbe1Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe1);
fromProbe2Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < toStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
toProbe1Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe1);
toProbe2Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < fromProbe1Counts.length; i++) {
pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromProbe1Counts[i]);
pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromProbe2Counts[i]);
}
for (int i = 0; i < toProbe1Counts.length; i++) {
pr.println(p + "\tto\tto" + i + "\tmeth\t" + toProbe1Counts[i]);
pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toProbe2Counts[i]);
}
}
if (pr != null)
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
// In case the same probe is found multiple times we'll cache the p-values
// we see and report the best one.
HashMap<Probe, Float> cachedHits = new HashMap<Probe, Float>();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
if (!cachedHits.containsKey(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (!cachedHits.containsKey(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
// See if the pvalue we've got is better than the one we're storing
if (pValue < cachedHits.get(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (pValue < cachedHits.get(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
}
br.close();
for (Probe probeHit : cachedHits.keySet()) {
newList.addProbe(probeHit, cachedHits.get(probeHit));
}
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.DataStore in project SeqMonk by s-andrews.
the class ReadPositionProbeGenerator method getOptionsPanel.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.ProbeGenerators.ProbeGenerator#getOptionsPanel(uk.ac.babraham.SeqMonk.SeqMonkApplication)
*/
public JPanel getOptionsPanel() {
if (optionPanel != null) {
// We've done this already
return optionPanel;
}
optionPanel = new JPanel();
optionPanel.setLayout(new GridBagLayout());
GridBagConstraints gbc = new GridBagConstraints();
gbc.gridx = 1;
gbc.gridy = 1;
gbc.weightx = 0.5;
gbc.weighty = 0.1;
gbc.fill = GridBagConstraints.BOTH;
optionPanel.add(new JLabel("Stores to use"), gbc);
gbc.gridy++;
gbc.weighty = 0.9;
DataStore[] stores = collection.getAllDataStores();
storesList = new JList(stores);
storesList.getSelectionModel().addListSelectionListener(this);
storesList.setCellRenderer(new TypeColourRenderer());
optionPanel.add(new JScrollPane(storesList), gbc);
gbc.gridy++;
JPanel bottomPanel = new JPanel();
bottomPanel.setLayout(new GridBagLayout());
GridBagConstraints bgbc = new GridBagConstraints();
bgbc.gridx = 0;
bgbc.gridy = 0;
bgbc.weightx = 0.5;
bgbc.weighty = 0.5;
bgbc.fill = GridBagConstraints.HORIZONTAL;
bottomPanel.add(new JLabel("Use reads on strand "), bgbc);
readStrandTypeBox = new JComboBox(ReadStrandType.getTypeOptions());
bgbc.gridx = 1;
bottomPanel.add(readStrandTypeBox, bgbc);
bgbc.gridx = 0;
bgbc.gridy++;
bottomPanel.add(new JLabel("Minimum read count per position"), bgbc);
bgbc.gridx = 1;
minCountField = new JTextField("" + minCount, 4);
minCountField.addKeyListener(this);
bottomPanel.add(minCountField, bgbc);
bgbc.gridx = 0;
bgbc.gridy++;
bottomPanel.add(new JLabel("Valid positions per window"), bgbc);
bgbc.gridx = 1;
readsPerWindowField = new JTextField("" + readsPerWindow, 4);
readsPerWindowField.addKeyListener(new NumberKeyListener(false, false));
readsPerWindowField.addKeyListener(this);
bottomPanel.add(readsPerWindowField, bgbc);
bgbc.gridx = 0;
bgbc.gridy++;
bottomPanel.add(new JLabel("Limit to region"), bgbc);
bgbc.gridx = 1;
JPanel limitPanel = new JPanel();
limitPanel.setLayout(new BorderLayout());
designWithinExistingBox = new JCheckBox();
limitPanel.add(designWithinExistingBox, BorderLayout.WEST);
if (collection.probeSet() == null) {
limitRegionBox = new JComboBox(new String[] { "Currently Visible Region" });
} else {
limitRegionBox = new JComboBox(new String[] { "Active Probe List", "Currently Visible Region" });
}
limitRegionBox.setEnabled(false);
designWithinExistingBox.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent arg0) {
limitRegionBox.setEnabled(designWithinExistingBox.isSelected());
isReady();
}
});
limitPanel.add(limitRegionBox, BorderLayout.CENTER);
bottomPanel.add(limitPanel, bgbc);
optionPanel.add(bottomPanel, gbc);
bgbc.gridx = 0;
bgbc.gridy++;
bottomPanel.add(new JLabel("Ignore Strand "), bgbc);
bgbc.gridx = 1;
ignoreStrandBox = new JCheckBox();
ignoreStrandBox.setSelected(true);
bottomPanel.add(ignoreStrandBox, bgbc);
optionPanel.add(bottomPanel, gbc);
return optionPanel;
}
Aggregations