use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class RNASeqPipeline method startPipeline.
protected void startPipeline() {
// We first need to generate probes over all of the features listed in
// the feature types. The probes should cover the whole area of the
// feature regardless of where it splices.
Vector<Probe> probes = new Vector<Probe>();
boolean mergeTranscripts = optionsPanel.mergeTranscripts();
boolean pairedEnd = optionsPanel.pairedEnd();
boolean logTransform = optionsPanel.logTransform();
boolean applyTranscriptLengthCorrection = optionsPanel.applyTranscriptLengthCorrection();
boolean rawCounts = optionsPanel.rawCounts();
boolean noValueForZeroCounts = optionsPanel.noValueForZeroCounts();
boolean correctDNAContamination = optionsPanel.correctForDNAContamination();
boolean correctDuplication = optionsPanel.correctForDNADuplication();
if (rawCounts) {
logTransform = false;
applyTranscriptLengthCorrection = false;
noValueForZeroCounts = false;
}
Chromosome[] chrs = collection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
// System.err.println("Processing chr "+chrs[c].name());
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Making features for chr" + chrs[c].name(), c, chrs.length * 2);
Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
Arrays.sort(features);
FeatureGroup[] mergedTranscripts = mergeTranscripts(features, mergeTranscripts);
for (int f = 0; f < mergedTranscripts.length; f++) {
if (cancel) {
progressCancelled();
return;
}
probes.add(new Probe(chrs[c], mergedTranscripts[f].start(), mergedTranscripts[f].end(), mergedTranscripts[f].strand(), mergedTranscripts[f].name()));
}
}
Probe[] allProbes = probes.toArray(new Probe[0]);
Arrays.sort(allProbes);
if (collection().probeSet() == null) {
collection().setProbeSet(new ProbeSet("Transcript features over " + optionsPanel.getSelectedFeatureType(), allProbes));
} else {
Probe[] existingProbes = collection().probeSet().getAllProbes();
Arrays.sort(existingProbes);
if (allProbes.length != existingProbes.length) {
collection().setProbeSet(new ProbeSet("Transcript features over " + optionsPanel.getSelectedFeatureType(), allProbes));
} else {
// Check the positions against the new ones
boolean areTheyTheSame = true;
for (int p = 0; p < allProbes.length; p++) {
if (allProbes[p].packedPosition() != existingProbes[p].packedPosition()) {
areTheyTheSame = false;
break;
}
}
if (areTheyTheSame) {
allProbes = existingProbes;
} else {
collection().setProbeSet(new ProbeSet("Transcript features over " + optionsPanel.getSelectedFeatureType(), allProbes));
}
}
}
// If we're correcting for DNA contamination we need to work out the average density of
// reads in intergenic regions
float[] dnaDensityPerKb = new float[data.length];
int[] correctedTotalCounts = new int[data.length];
if (correctDNAContamination) {
// We need to make interstitial probes to the set we already have, ignoring those at the end of chromosomes
Vector<Probe> intergenicProbes = new Vector<Probe>();
Chromosome lastChr = allProbes[0].chromosome();
for (int p = 1; p < allProbes.length; p++) {
if (allProbes[p].chromosome() != lastChr) {
lastChr = allProbes[p].chromosome();
continue;
}
// See if there's a gap back to the last probe
if (allProbes[p].start() > allProbes[p - 1].end()) {
if (allProbes[p].start() - allProbes[p - 1].end() < 1000) {
// Don't bother with really short probes
continue;
}
intergenicProbes.add(new Probe(lastChr, allProbes[p - 1].end() + 1, allProbes[p].start() - 1));
}
}
Probe[] allIntergenicProbes = intergenicProbes.toArray(new Probe[0]);
for (int d = 0; d < data.length; d++) {
progressUpdated("Quantitating DNA contamination", 1, 2);
float[] densities = new float[allIntergenicProbes.length];
for (int p = 0; p < allIntergenicProbes.length; p++) {
densities[p] = data[d].getReadsForProbe(allIntergenicProbes[p]).length / (allIntergenicProbes[p].length() / 1000f);
}
dnaDensityPerKb[d] = SimpleStats.median(densities);
}
// Work out adjusted total counts having subtracted the DNA contamination
for (int d = 0; d < data.length; d++) {
int predictedContamination = (int) (dnaDensityPerKb[d] * (SeqMonkApplication.getInstance().dataCollection().genome().getTotalGenomeLength() / 1000));
int correctedTotalReadCount = data[d].getTotalReadCount() - predictedContamination;
correctedTotalCounts[d] = correctedTotalReadCount;
}
// Halve the density if they're doing a directional quantitation
if (optionsPanel.isDirectional()) {
for (int i = 0; i < dnaDensityPerKb.length; i++) {
dnaDensityPerKb[i] /= 2;
}
}
// Halve the density if the libraries are paired end
if (pairedEnd) {
for (int i = 0; i < dnaDensityPerKb.length; i++) {
dnaDensityPerKb[i] /= 2;
}
}
}
// If we're correcting for duplication we need to work out the modal count depth in
// intergenic regions
int[] modalDuplicationLevels = new int[data.length];
if (correctDuplication) {
for (int d = 0; d < data.length; d++) {
progressUpdated("Quantitating DNA duplication", 1, 2);
// We're not going to look at depths which are > 200. If it's that duplicated
// then there's no point continuing anyway.
int[] depthCount = new int[200];
for (int p = 0; p < allProbes.length; p++) {
long[] reads = data[d].getReadsForProbe(allProbes[p]);
int currentCount = 0;
for (int r = 1; r < reads.length; r++) {
if (reads[r] == reads[r - 1]) {
++currentCount;
} else {
if (currentCount > 0 && currentCount < 200) {
++depthCount[currentCount];
}
currentCount = 1;
}
}
}
// Find the modal depth in intergenic regions. This is the best estimate
// of duplication
// Since unique reads turn up all over the place even in duplicated
// data we say that if unique reads are higher than the sum of 2-10 there
// is no duplication
int twoTenSum = 0;
for (int i = 2; i <= 10; i++) {
twoTenSum += depthCount[i];
}
if (depthCount[1] > twoTenSum) {
modalDuplicationLevels[d] = 1;
} else {
int highestDepth = 0;
int bestDupGuess = 1;
for (int i = 2; i < depthCount.length; i++) {
// System.err.println("For depth "+i+" count was "+depthCount[i]);
if (depthCount[i] > highestDepth) {
bestDupGuess = i;
highestDepth = depthCount[i];
}
}
modalDuplicationLevels[d] = bestDupGuess;
}
}
}
// Having made probes we now need to quantitate them. We'll fetch the
// probes overlapping each sub-feature and then aggregate these together
// to get the final quantitation.
QuantitationStrandType readFilter = optionsPanel.readFilter();
int currentIndex = 0;
for (int c = 0; c < chrs.length; c++) {
if (cancel) {
progressCancelled();
return;
}
progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
Arrays.sort(features);
FeatureGroup[] mergedTranscripts = mergeTranscripts(features, mergeTranscripts);
int[] readLengths = new int[data.length];
for (int d = 0; d < data.length; d++) {
readLengths[d] = data[d].getMaxReadLength();
// actual length.
if (pairedEnd) {
readLengths[d] *= 2;
}
}
for (int f = 0; f < mergedTranscripts.length; f++) {
Location[] subLocations = mergedTranscripts[f].getSubLocations();
int totalLength = 0;
// Find the total length of all of the exons
for (int s = 0; s < subLocations.length; s++) {
totalLength += subLocations[s].length();
}
for (int d = 0; d < data.length; d++) {
if (cancel) {
progressCancelled();
return;
}
long totalCount = 0;
for (int s = 0; s < subLocations.length; s++) {
long[] reads = data[d].getReadsForProbe(new Probe(chrs[c], subLocations[s].start(), subLocations[s].end()));
for (int r = 0; r < reads.length; r++) {
if (!readFilter.useRead(subLocations[s], reads[r])) {
continue;
}
int overlap = (Math.min(subLocations[s].end(), SequenceRead.end(reads[r])) - Math.max(subLocations[s].start(), SequenceRead.start(reads[r]))) + 1;
totalCount += overlap;
}
}
// Now we correct the count by the total length of reads in the data and by
// the length of the split parts of the probe, and assign this to the probe.
// As we're correcting for read length then we work out the whole number of
// reads which this count could comprise, rounding down to a whole number.
totalCount /= readLengths[d];
// We can now subtract the DNA contamination prediction.
if (correctDNAContamination) {
int predictedContamination = (int) ((totalLength / 1000f) * dnaDensityPerKb[d]);
totalCount -= predictedContamination;
// Makes no sense to have negative counts
if (totalCount < 0)
totalCount = 0;
}
// ..and we can divide by the duplication level if we know it.
if (correctDuplication) {
totalCount /= modalDuplicationLevels[d];
}
// System.err.println("Total read count for "+mergedTranscripts[f].name+" is "+totalCount);
float value = totalCount;
if (value == 0 && noValueForZeroCounts) {
value = Float.NaN;
}
// If we're log transforming then we need to set zero values to 0.9
if (logTransform && value == 0 && !noValueForZeroCounts) {
value = 0.9f;
}
// been asked to.
if (applyTranscriptLengthCorrection) {
value /= (totalLength / 1000f);
}
// We also correct by the total read count
if (!rawCounts) {
// System.err.println("True total is "+data[d].getTotalReadCount()+" corrected total is "+correctedTotalCounts[d]);
// If these libraries are paired end then the total number of
// reads is also effectively halved.
float totalReadCount;
// calculated this already, but otherwise we'll take the total count (total length/read length)
if (correctDNAContamination) {
totalReadCount = correctedTotalCounts[d];
} else {
totalReadCount = data[d].getTotalReadLength() / readLengths[d];
}
// If we're correcting for duplication we divide by the duplication level.
if (correctDuplication) {
totalReadCount /= modalDuplicationLevels[d];
}
// Finally we work out millions of reads (single end) or fragments (paired end)
if (pairedEnd) {
totalReadCount /= 2000000f;
} else {
totalReadCount /= 1000000f;
}
// Lastly we divide the value by the total millions of reads to get the globally corrected count.
value /= totalReadCount;
}
// Finally we do the log transform if we've been asked to
if (logTransform) {
value = (float) Math.log(value) / log2;
}
data[d].setValueForProbe(allProbes[currentIndex], value);
}
currentIndex++;
}
}
collection().probeSet().setCurrentQuantitation(getQuantitationDescription(mergeTranscripts, applyTranscriptLengthCorrection, correctDNAContamination, logTransform, rawCounts));
// If we estimated any parameters let's report them.
if (correctDNAContamination || correctDuplication) {
float[] dna = null;
if (correctDNAContamination) {
dna = dnaDensityPerKb;
}
int[] dup = null;
if (correctDuplication) {
dup = modalDuplicationLevels;
}
RNASeqParametersModel model = new RNASeqParametersModel(data, dna, dup);
ReportTableDialog report = new ReportTableDialog(SeqMonkApplication.getInstance(), new Report(null, null) {
@Override
public void run() {
}
@Override
public String name() {
return "RNA-Seq parameter";
}
@Override
public boolean isReady() {
return true;
}
@Override
public JPanel getOptionsPanel() {
return null;
}
@Override
public void generateReport() {
}
}, model);
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class ZScoreScatterPlotPanel method calculateNonredundantSet.
/**
* This collapses individual points which are over the same
* pixel when redrawing the plot at a different scale
*/
private synchronized void calculateNonredundantSet() {
closestPoint = null;
ProbePairValue[][] grid = new ProbePairValue[getWidth()][getHeight()];
try {
for (int p = 0; p < probes.length; p++) {
/* x value is the mean intensity */
float xValue = (xStore.getValueForProbe(probes[p]) + yStore.getValueForProbe(probes[p])) / 2;
/* y value is the z-score */
float yValue = probeZScoreLookupTable.get(probes[p]).floatValue();
if (Float.isNaN(xValue) || Float.isInfinite(xValue) || Float.isNaN(yValue) || Float.isInfinite(yValue)) {
continue;
}
int x = getX(xValue);
int y = getY(yValue);
if (grid[x][y] == null) {
grid[x][y] = new ProbePairValue(xValue, yValue, x, y);
grid[x][y].setProbe(probes[p]);
} else {
// belong to
if (subLists == null)
grid[x][y].count++;
// As we have multiple probes at this point we remove the
// specific probe annotation.
grid[x][y].setProbe(null);
}
}
if (subLists != null) {
for (int s = 0; s < subLists.length; s++) {
Probe[] subListProbes = subLists[s].getAllProbes();
for (int p = 0; p < subListProbes.length; p++) {
// TODO: this needs to be sorted out properly - it's to do with the filtering out of NA probes
if (probeZScoreLookupTable.containsKey(subListProbes[p])) {
// System.err.println("p = " + p + ", name = " + subListProbes[p].name());
/* x value is the mean intensity */
float xValue = (xStore.getValueForProbe(subListProbes[p]) + yStore.getValueForProbe(subListProbes[p])) / 2;
/* y value is the z-score */
float yValue = probeZScoreLookupTable.get(subListProbes[p]).floatValue();
// System.err.println("for " + probes[p].name() + ", xValue = " + xValue+ ", yValue = " + yValue);
int x = getX(xValue);
int y = getY(yValue);
if (grid[x][y] == null) {
// This messes up where we catch it in the middle of a redraw
continue;
// throw new IllegalArgumentException("Found subList position not in main list");
}
// 1 = no list so 2 is the lowest sublist index
grid[x][y].count = s + 2;
}
}
}
}
} catch (SeqMonkException e) {
throw new IllegalStateException(e);
}
// Now we need to put all of the ProbePairValues into
// a single array;
int count = 0;
for (int x = 0; x < grid.length; x++) {
for (int y = 0; y < grid[x].length; y++) {
if (grid[x][y] != null)
count++;
}
}
ProbePairValue[] nonred = new ProbePairValue[count];
count--;
for (int x = 0; x < grid.length; x++) {
for (int y = 0; y < grid[x].length; y++) {
if (grid[x][y] != null) {
nonred[count] = grid[x][y];
count--;
}
}
}
Arrays.sort(nonred);
// Work out the 95% percentile count
int minCount = 1;
int maxCount = 2;
if (nonred.length > 0) {
minCount = nonred[0].count;
maxCount = nonred[((nonred.length - 1) * 95) / 100].count;
}
// Go through every nonred assigning a suitable colour
ColourGradient gradient = new HotColdColourGradient();
for (int i = 0; i < nonred.length; i++) {
if (subLists == null) {
nonred[i].color = gradient.getColor(nonred[i].count, minCount, maxCount);
} else {
if (nonred[i].count > subLists.length + 1) {
throw new IllegalArgumentException("Count above threshold when showing sublists");
}
if (nonred[i].count == 1) {
nonred[i].color = Color.LIGHT_GRAY;
} else {
nonred[i].color = ColourIndexSet.getColour(nonred[i].count - 2);
}
}
}
nonRedundantValues = nonred;
lastNonredWidth = getWidth();
lastNonredHeight = getHeight();
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class IntensityReplicateFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
Probe[] probes = startingList.getAllProbes();
// We need to work out how many probes are going to be put into
// each sub-distribution we calculate. The rule is going to be that
// we use 1% of the total, or 100 probes whichever is the higher
probesPerSet = probes.length / 100;
if (probesPerSet < 100)
probesPerSet = 100;
if (probesPerSet > probes.length)
probesPerSet = probes.length;
ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", "Diff p-value");
// We'll build up a set of p-values as we go along
float[] lowestPValues = new float[probes.length];
for (int p = 0; p < lowestPValues.length; p++) {
lowestPValues[p] = 1;
}
// These arrays are going to hold the real SDs for the replicate
// sets we're going to analyse.
double[] realSDFrom = new double[probes.length];
double[] realSDTo = new double[probes.length];
// These arrays are going to hold the averaged SDs for the replicate
// sets we're going to analyse.
double[] averageSDFrom = new double[probes.length];
double[] averageSDTo = new double[probes.length];
// This is going to be the temporary array we populate with the set of
// differences we are going to analyse.
double[] localSDset = new double[probesPerSet];
// First work out the set of comparisons we're going to make
Vector<SingleComparison> comparisons = new Vector<IntensityReplicateFilter.SingleComparison>();
for (int fromIndex = 0; fromIndex < fromStores.length; fromIndex++) {
for (int toIndex = 0; toIndex < toStores.length; toIndex++) {
if (fromStores[fromIndex] == toStores[toIndex])
continue;
// If we can find the fromStore in the toStores we've already done and the
// toStore anywhere in the fromStores then we can skip this.
boolean canSkip = false;
for (int i = 0; i < fromIndex; i++) {
if (fromStores[i] == toStores[toIndex]) {
for (int j = 0; j < toStores.length; j++) {
if (toStores[j] == fromStores[fromIndex]) {
canSkip = true;
break;
}
}
break;
}
}
if (canSkip)
continue;
comparisons.add(new SingleComparison(fromIndex, toIndex));
}
}
for (int comparisonIndex = 0; comparisonIndex < comparisons.size(); comparisonIndex++) {
int fromIndex = comparisons.elementAt(comparisonIndex).fromIndex;
int toIndex = comparisons.elementAt(comparisonIndex).toIndex;
// We need to generate a set of probe indices ordered by their average intensity
Integer[] fromIndices = new Integer[probes.length];
Integer[] toIndices = new Integer[probes.length];
for (int i = 0; i < probes.length; i++) {
fromIndices[i] = i;
toIndices[i] = i;
}
// This isn't ideal. We're sorting the probes by average intensity which puts together
// probes which should probably have different standard deviations. It would be better
// to sort on the intensities in each data store separately, but we get such a problem
// from very low values contaminating the set by giving artificially low average SDs that
// I don't think we can get away with this.
Comparator<Integer> fromComp = new AveragePairedIntensityComparator(fromStores[fromIndex], toStores[toIndex], probes);
Comparator<Integer> toComp = new AveragePairedIntensityComparator(toStores[toIndex], fromStores[fromIndex], probes);
// Comparator<Integer> fromComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
// Comparator<Integer> toComp = new AverageIntensityComparator(fromStores[fromIndex], probes);
Arrays.sort(fromIndices, fromComp);
Arrays.sort(toIndices, toComp);
// We also need to get the raw SDs for the two replicate sets
double[] fromValues = new double[fromStores[fromIndex].dataStores().length];
double[] toValues = new double[toStores[toIndex].dataStores().length];
try {
for (int p = 0; p < probes.length; p++) {
for (int f = 0; f < fromValues.length; f++) {
fromValues[f] = fromStores[fromIndex].dataStores()[f].getValueForProbe(probes[p]);
}
for (int t = 0; t < toValues.length; t++) {
toValues[t] = toStores[toIndex].dataStores()[t].getValueForProbe(probes[p]);
}
realSDFrom[p] = SimpleStats.stdev(fromValues);
realSDTo[p] = SimpleStats.stdev(toValues);
}
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
}
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", comparisonIndex, comparisons.size());
IndexTTestValue[] currentPValues = new IndexTTestValue[probes.length];
for (int i = 0; i < probes.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (i % 1000 == 0) {
int progress = (i * 100) / probes.length;
progress += 100 * comparisonIndex;
progressUpdated("Made " + comparisonIndex + " out of " + comparisons.size() + " comparisons", progress, comparisons.size() * 100);
}
// We need to make up the set of SDs to represent this probe
int startingIndex = i - (probesPerSet / 2);
if (startingIndex < 0)
startingIndex = 0;
if (startingIndex + probesPerSet >= probes.length)
startingIndex = probes.length - probesPerSet;
for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
localSDset[j - startingIndex] = realSDFrom[fromIndices[j]];
}
// averageSDFrom[fromIndices[i]] = SimpleStats.percentile(localSDset,90);
averageSDFrom[fromIndices[i]] = SimpleStats.mean(localSDset);
for (int j = startingIndex; j < startingIndex + probesPerSet; j++) {
localSDset[j - startingIndex] = realSDTo[toIndices[j]];
}
// averageSDTo[toIndices[i]] = SimpleStats.percentile(localSDset,90);
averageSDTo[toIndices[i]] = SimpleStats.mean(localSDset);
}
for (int p = 0; p < probes.length; p++) {
double fromValue = 0;
double toValue = 0;
try {
fromValue = fromStores[fromIndex].getValueForProbe(probes[p]);
toValue = toStores[toIndex].getValueForProbe(probes[p]);
} catch (SeqMonkException sme) {
}
double fromSD = Math.max(realSDFrom[p], averageSDFrom[p]);
double toSD = Math.max(realSDTo[p], averageSDTo[p]);
// double fromSD = averageSDFrom[p];
// double toSD = averageSDTo[p];
currentPValues[p] = new IndexTTestValue(p, TTest.calculatePValue(fromValue, toValue, fromSD, toSD, fromStores[fromIndex].dataStores().length, toStores[toIndex].dataStores().length));
// System.err.println("P value was "+currentPValues[p].p+" from "+fromValue+" "+toValue+" "+fromSD+" "+toSD+" "+fromStores[fromIndex].dataStores().length+" "+toStores[toIndex].dataStores().length);
}
// We now need to correct the set of pValues
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(currentPValues);
}
// the combined set
if (applyMultipleTestingCorrection) {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].q < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].q;
}
}
} else {
for (int i = 0; i < currentPValues.length; i++) {
if (currentPValues[i].p < lowestPValues[currentPValues[i].index]) {
lowestPValues[currentPValues[i].index] = (float) currentPValues[i].p;
}
}
}
}
// pass the filter.
for (int i = 0; i < lowestPValues.length; i++) {
if (lowestPValues[i] < pValueLimit) {
newList.addProbe(probes[i], lowestPValues[i]);
}
}
filterFinished(newList);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class LogisticRegressionSplicingFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
@Override
protected void generateProbeList() {
// We need to make a temporary directory, save the data into it, write out the R script
// and then run it an collect the list of results, then clean up.
// Make up the list of DataStores in each replicate set
DataStore[] fromStores = replicateSets[0].dataStores();
DataStore[] toStores = replicateSets[1].dataStores();
File tempDir;
try {
Probe[] probes = startingList.getAllProbes();
probes = filterProbesByCount(probes, fromStores, toStores);
progressUpdated("Pairing Probes", 0, 1);
// Make pairs of probes to test
ProbePair[] pairs = makeProbePairs(probes, fromStores, toStores);
System.err.println("Found " + pairs.length + " pairs to test");
progressUpdated("Creating temp directory", 0, 1);
tempDir = TempDirectory.createTempDirectory();
System.err.println("Temp dir is " + tempDir.getAbsolutePath());
progressUpdated("Writing R script", 0, 1);
// Get the template script
Template template = new Template(ClassLoader.getSystemResource("uk/ac/babraham/SeqMonk/Filters/LogisticRegressionFilter/logistic_regression_template.r"));
// Substitute in the variables we need to change
template.setValue("WORKING", tempDir.getAbsolutePath().replace("\\", "/"));
template.setValue("PVALUE", "" + pValueCutoff);
template.setValue("CORRECTCOUNT", "" + pairs.length);
if (multiTest) {
template.setValue("MULTITEST", "TRUE");
} else {
template.setValue("MULTITEST", "FALSE");
}
// Write the script file
File scriptFile = new File(tempDir.getAbsoluteFile() + "/script.r");
PrintWriter pr = new PrintWriter(scriptFile);
pr.print(template.toString());
pr.close();
// Write the count data
// Sort these so we can get probes from the same chromosome together
Arrays.sort(probes);
pr = null;
String lastChr = "";
for (int p = 0; p < pairs.length; p++) {
if (!pairs[p].probe1.chromosome().name().equals(lastChr)) {
if (pr != null)
pr.close();
File outFile = new File(tempDir.getAbsoluteFile() + "/data_chr" + pairs[p].probe1.chromosome().name() + ".txt");
pr = new PrintWriter(outFile);
lastChr = pairs[p].probe1.chromosome().name();
pr.println("id\tgroup\treplicate\tstate\tcount");
}
if (p % 1000 == 0) {
progressUpdated("Writing data for chr" + lastChr, p, probes.length);
}
int[] fromProbe1Counts = new int[fromStores.length];
int[] fromProbe2Counts = new int[fromStores.length];
int[] toProbe1Counts = new int[toStores.length];
int[] toProbe2Counts = new int[toStores.length];
for (int i = 0; i < fromStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
fromProbe1Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe1);
fromProbe2Counts[i] = (int) fromStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < toStores.length; i++) {
// TODO: For the moment we'll expect they've quantitated this themselves
toProbe1Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe1);
toProbe2Counts[i] = (int) toStores[i].getValueForProbe(pairs[p].probe2);
}
for (int i = 0; i < fromProbe1Counts.length; i++) {
pr.println(p + "\tfrom\tfrom" + i + "\tmeth\t" + fromProbe1Counts[i]);
pr.println(p + "\tfrom\tfrom" + i + "\tunmeth\t" + fromProbe2Counts[i]);
}
for (int i = 0; i < toProbe1Counts.length; i++) {
pr.println(p + "\tto\tto" + i + "\tmeth\t" + toProbe1Counts[i]);
pr.println(p + "\tto\tto" + i + "\tunmeth\t" + toProbe2Counts[i]);
}
}
if (pr != null)
pr.close();
progressUpdated("Running R Script", 0, 1);
RScriptRunner runner = new RScriptRunner(tempDir);
RProgressListener listener = new RProgressListener(runner);
runner.addProgressListener(new ProgressRecordDialog("R Session", runner));
runner.runScript();
while (true) {
if (listener.cancelled()) {
progressCancelled();
pr.close();
return;
}
if (listener.exceptionReceived()) {
progressExceptionReceived(new SeqMonkException("R Script failed"));
pr.close();
return;
}
if (listener.complete())
break;
Thread.sleep(500);
}
// We can now parse the results and put the hits into a new probe list
ProbeList newList;
if (multiTest) {
newList = new ProbeList(startingList, "", "", "FDR");
} else {
newList = new ProbeList(startingList, "", "", "p-value");
}
File hitsFile = new File(tempDir.getAbsolutePath() + "/hits.txt");
BufferedReader br = new BufferedReader(new FileReader(hitsFile));
String line = br.readLine();
// In case the same probe is found multiple times we'll cache the p-values
// we see and report the best one.
HashMap<Probe, Float> cachedHits = new HashMap<Probe, Float>();
while ((line = br.readLine()) != null) {
String[] sections = line.split("\t");
String[] indexSections = sections[0].split("\\.");
int probeIndex = Integer.parseInt(indexSections[indexSections.length - 1]);
float pValue = Float.parseFloat(sections[sections.length - 1]);
if (!cachedHits.containsKey(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (!cachedHits.containsKey(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
// See if the pvalue we've got is better than the one we're storing
if (pValue < cachedHits.get(pairs[probeIndex].probe1)) {
cachedHits.put(pairs[probeIndex].probe1, pValue);
}
if (pValue < cachedHits.get(pairs[probeIndex].probe2)) {
cachedHits.put(pairs[probeIndex].probe2, pValue);
}
}
br.close();
for (Probe probeHit : cachedHits.keySet()) {
newList.addProbe(probeHit, cachedHits.get(probeHit));
}
runner.cleanUp();
filterFinished(newList);
} catch (Exception ioe) {
progressExceptionReceived(ioe);
return;
}
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class MonteCarloFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
Probe[] probes = toList.getAllProbes();
ArrayList<Probe> allProbes = new ArrayList<Probe>();
Probe[] fromProbes = startingList.getAllProbes();
try {
for (int i = 0; i < fromProbes.length; i++) {
allProbes.add(fromProbes[i]);
}
int passedCount = 0;
float targetValue = getValue(probes);
for (int iteration = 0; iteration < iterationCount; iteration++) {
if (iteration % 100 == 0) {
progressUpdated("Performed " + iteration + " iterations", iteration, iterationCount);
}
if (cancel) {
progressCancelled();
return;
}
Probe[] theseProbes = makeRandomProbes(allProbes, probes.length);
float value = getValue(theseProbes);
if (value >= targetValue) {
++passedCount;
}
}
float pValue = ((float) passedCount) / iterationCount;
ProbeList newList = new ProbeList(toList, "", "", "P-value");
for (int i = 0; i < probes.length; i++) {
newList.addProbe(probes[i], pValue);
}
filterFinished(newList);
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
Aggregations