use of org.apache.commons.math3.stat.descriptive.rank.Median in project SeqMonk by s-andrews.
the class GeneSetIntensityDifferenceFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
try {
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
calculateCustomRegression = optionsPanel.calculateCustomRegressionBox.isSelected();
if (calculateCustomRegression == false) {
customRegressionValues = null;
}
if (calculateLinearRegression) {
simpleRegression = new SimpleRegression();
}
Probe[] allProbes = startingList.getAllProbes();
// We're not allowing multiple comparisons - this is a bit of a messy workaround so it's compatible with other methods.
fromStore = fromStores[0];
toStore = toStores[0];
ArrayList<Probe> probeArrayList = new ArrayList<Probe>();
int NaNcount = 0;
// remove the invalid probes - the ones without a value
for (int i = 0; i < allProbes.length; i++) {
if ((Float.isNaN(fromStore.getValueForProbe(allProbes[i]))) || (Float.isNaN(toStore.getValueForProbe(allProbes[i])))) {
NaNcount++;
} else {
probeArrayList.add(allProbes[i]);
}
}
System.err.println("Found " + NaNcount + " probes that were invalid.");
probes = probeArrayList.toArray(new Probe[0]);
if (calculateCustomRegression == true) {
customRegressionValues = new float[2][probes.length];
}
// We'll pull the number of probes to sample from the preferences if they've changed it
Integer updatedProbesPerSet = optionsPanel.probesPerSet();
if (updatedProbesPerSet != null)
probesPerSet = updatedProbesPerSet;
// we want a set of z-scores using the local distribution.
probeZScoreLookupTable = new Hashtable<Probe, Double>();
// Put something in the progress whilst we're ordering the probe values to make the comparison.
progressUpdated("Generating background model", 0, 1);
Comparator<Integer> comp = new AverageIntensityComparator(fromStore, toStore, probes);
// We need to generate a set of probe indices that can be ordered by their average intensity
Integer[] indices = new Integer[probes.length];
for (int i = 0; i < probes.length; i++) {
indices[i] = i;
/* add the data to the linear regression object */
if (calculateLinearRegression) {
simpleRegression.addData((double) fromStore.getValueForProbe(probes[i]), (double) toStore.getValueForProbe(probes[i]));
}
}
if (calculateLinearRegression) {
System.out.println("intercept = " + simpleRegression.getIntercept() + ", slope = " + simpleRegression.getSlope());
}
Arrays.sort(indices, comp);
/* This holds the indices to get the deduplicated probe from the original list of probes */
deduplicatedIndices = new Integer[probes.length];
/* The number of probes with different values */
int dedupProbeCounter = 0;
// the probes deduplicated by value
ArrayList<Probe> deduplicatedProbes = new ArrayList<Probe>();
// populate the first one so that we have something to compare to in the loop
deduplicatedIndices[0] = 0;
deduplicatedProbes.add(probes[indices[0]]);
progressUpdated("Made 0 of 1 comparisons", 0, 1);
for (int i = 1; i < indices.length; i++) {
/* indices have been sorted, now we need to check whether adjacent pair values are identical */
if ((fromStore.getValueForProbe(probes[indices[i]]) == fromStore.getValueForProbe(probes[indices[i - 1]])) && (toStore.getValueForProbe(probes[indices[i]]) == toStore.getValueForProbe(probes[indices[i - 1]]))) {
/* If they are identical, do not add the probe to the deduplicatedProbes object, but have a reference for it so we can look up which deduplicated probe and
* therefore which distribution slice to use for the duplicated probe. */
deduplicatedIndices[i] = dedupProbeCounter;
} else {
deduplicatedProbes.add(probes[indices[i]]);
dedupProbeCounter++;
deduplicatedIndices[i] = dedupProbeCounter;
}
}
Probe[] dedupProbes = deduplicatedProbes.toArray(new Probe[0]);
// make sure we're not trying to use more probes than we've got in the analysis
if (probesPerSet > dedupProbes.length) {
probesPerSet = dedupProbes.length;
}
System.out.println("total number of probe values = " + probes.length);
System.out.println("number of deduplicated probe values = " + dedupProbes.length);
System.out.println("probesPerSet = " + probesPerSet);
// I want this to contain all the differences, then from that I'm going to get the z-scores.
double[] currentDiffSet = new double[probesPerSet];
for (int i = 0; i < indices.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (i % 1000 == 0) {
int progress = (i * 100) / indices.length;
// progress += 100*comparisonIndex;
progressUpdated("Made 0 out of 1 comparisons", progress, 100);
}
/**
* There are +1s in here because we skip over j when j == startingIndex.
*/
// We need to make up the set of differences to represent this probe
int startingIndex = deduplicatedIndices[i] - (probesPerSet / 2);
if (startingIndex < 0)
startingIndex = 0;
if (startingIndex + (probesPerSet + 1) >= dedupProbes.length)
startingIndex = dedupProbes.length - (probesPerSet + 1);
try {
for (int j = startingIndex; j < startingIndex + (probesPerSet + 1); j++) {
if (j == startingIndex) {
// Don't include the point being tested in the background model
continue;
}
double diff;
if (calculateLinearRegression == true) {
if (j > dedupProbes.length) {
System.err.println(" j is too big, it's " + j + " and dedupProbes.length = " + dedupProbes.length + ", starting index = " + startingIndex);
}
double x = fromStore.getValueForProbe(dedupProbes[j]);
double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
diff = toStore.getValueForProbe(dedupProbes[j]) - expectedY;
} else {
diff = toStore.getValueForProbe(dedupProbes[j]) - fromStore.getValueForProbe(dedupProbes[j]);
}
if (j < startingIndex) {
currentDiffSet[j - startingIndex] = diff;
} else {
currentDiffSet[(j - startingIndex) - 1] = diff;
}
}
if (calculateCustomRegression == true) {
// the average/ kind of centre line
float z = ((fromStore.getValueForProbe(probes[indices[i]]) + toStore.getValueForProbe(probes[indices[i]])) / 2);
customRegressionValues[0][indices[i]] = z - ((float) SimpleStats.mean(currentDiffSet) / 2);
customRegressionValues[1][indices[i]] = z + ((float) SimpleStats.mean(currentDiffSet) / 2);
}
double mean = 0;
// Get the difference for this point
double diff;
if (calculateLinearRegression == true) {
double x = fromStore.getValueForProbe(probes[indices[i]]);
double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
diff = toStore.getValueForProbe(probes[indices[i]]) - expectedY;
} else if (calculateCustomRegression == true) {
diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
mean = SimpleStats.mean(currentDiffSet);
} else {
diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
}
double stdev;
stdev = SimpleStats.stdev(currentDiffSet, mean);
// if there are no reads in the probe for either of the datasets, should we set the zscore to 0??
double zScore = (diff - mean) / stdev;
// modified z score
// median absolute deviation
/* double[] madArray = new double[currentDiffSet.length];
double median = SimpleStats.median(currentDiffSet);
for(int d=0; d<currentDiffSet.length; d++){
madArray[d] = Math.abs(currentDiffSet[d] - median);
}
double mad = SimpleStats.median(madArray);
zScore = (0.6745 * (diff - median))/mad;
}
*/
probeZScoreLookupTable.put(probes[indices[i]], zScore);
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
// make this an array list as we're kicking out the mapped gene sets that have zscores with variance of 0.
ArrayList<MappedGeneSetTTestValue> pValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
MappedGeneSet[] mappedGeneSets = null;
/* if we're using the gene set from a file, map the gene sets to the probes */
if (optionsPanel.geneSetsFileRadioButton.isSelected()) {
GeneSetCollectionParser geneSetCollectionParser = new GeneSetCollectionParser(minGenesInSet, maxGenesInSet);
GeneSetCollection geneSetCollection = geneSetCollectionParser.parseGeneSetInformation(validGeneSetFilepath);
MappedGeneSet[] allMappedGeneSets = geneSetCollection.getMappedGeneSets(probes);
if (allMappedGeneSets == null) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes could be matched to probes.\nCheck that the gmt file is for the right species. \nTo use gene sets from a file, probe names must contain the gene name. \nTry defining new probes over genes (e.g. using the RNA-seq quantitation pipeline) or use existing probes lists instead of a gene set file.", "No gene sets identified", JOptionPane.ERROR_MESSAGE);
throw new SeqMonkException("No sets of genes could be matched to probes.\nTo use gene sets from a file, probe names must contain the gene name.\nTry defining new probes over genes or use existing probes lists instead of a gene set file.");
} else {
ArrayList<MappedGeneSet> mgsArrayList = new ArrayList<MappedGeneSet>();
/* get rid of those that have fewer probes in the set than minGenesInSet. We shouldn't exceed maxGenesInSet unless probes have been made over something other than genes */
for (int i = 0; i < allMappedGeneSets.length; i++) {
if (allMappedGeneSets[i].getProbes().length >= minGenesInSet) {
mgsArrayList.add(allMappedGeneSets[i]);
}
}
mappedGeneSets = mgsArrayList.toArray(new MappedGeneSet[0]);
}
} else /* or if we're using existing probelists, create mappedGeneSets from them */
if (optionsPanel.probeListRadioButton.isSelected() && selectedProbeLists != null) {
mappedGeneSets = new MappedGeneSet[selectedProbeLists.length];
for (int i = 0; i < selectedProbeLists.length; i++) {
mappedGeneSets[i] = new MappedGeneSet(selectedProbeLists[i]);
}
} else {
throw new SeqMonkException("Haven't got any genesets to use, shouldn't have got here without having any selected.");
}
if (mappedGeneSets == null || mappedGeneSets.length == 0) {
throw new SeqMonkException("Couldn't map gene sets to the probes, try again with a different probe set");
} else {
System.err.println("there are " + mappedGeneSets.length + " mappedGeneSets");
System.err.println("size of zScore lookup table = " + probeZScoreLookupTable.size());
}
System.err.println("total number of mapped gene sets = " + mappedGeneSets.length);
// deduplicate our mappedGeneSets
if (optionsPanel.deduplicateGeneSetBox.isSelected()) {
mappedGeneSets = deduplicateMappedGeneSets(mappedGeneSets);
}
System.err.println("deduplicated mapped gene sets = " + mappedGeneSets.length);
/*
* we need to go through the mapped gene set and get all the values for the matched probes
*
*/
for (int i = 0; i < mappedGeneSets.length; i++) {
Probe[] geneSetProbes = mappedGeneSets[i].getProbes();
// to contain all the z-scores for the gene set
double[] geneSetZscores = new double[geneSetProbes.length];
// Find the z-scores for each of the probes in the mappedGeneSet
for (int gsp = 0; gsp < geneSetProbes.length; gsp++) {
if (probeZScoreLookupTable.containsKey(geneSetProbes[gsp])) {
geneSetZscores[gsp] = probeZScoreLookupTable.get(geneSetProbes[gsp]);
}
}
// this is just temporary to check the stats - there's some duplication with this.... is there??
mappedGeneSets[i].zScores = geneSetZscores;
if (geneSetZscores.length > 1) {
// the number of probes in the mappedGeneSet should always be > 1 anyway as the mappedGeneSet shouldn't be created if there are < 2 matched probes.
double pVal;
if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("t-test")) {
pVal = TTest.calculatePValue(geneSetZscores, 0);
} else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov test")) {
double[] allZscores = getAllZScores(probeZScoreLookupTable);
KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
pVal = ksTest.kolmogorovSmirnovTest(geneSetZscores, allZscores);
} else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov non-directional test")) {
double[] allZscores = getAllZScores(probeZScoreLookupTable);
KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
pVal = ksTest.kolmogorovSmirnovTest(convertToAbsoluteValues(geneSetZscores), convertToAbsoluteValues(allZscores));
} else {
throw new IllegalArgumentException("Didn't recognise statistical test " + optionsPanel.statisticalTestBox.getSelectedItem().toString());
}
mappedGeneSets[i].meanZScore = SimpleStats.mean(geneSetZscores);
// check the variance - we don't want variances of 0.
double stdev = SimpleStats.stdev(geneSetZscores);
if ((stdev * stdev) < 0.00000000000001) {
continue;
} else // if all the differences between the datasets are 0 then just get rid of them
if (Double.isNaN(pVal)) {
continue;
} else {
pValueArrayList.add(new MappedGeneSetTTestValue(mappedGeneSets[i], pVal));
}
} else {
System.err.println("Fell through the net somewhere, why does the set of zscores contain fewer than 2 values?");
continue;
}
}
MappedGeneSetTTestValue[] filterResultpValues = pValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
ArrayList<MappedGeneSetTTestValue> filteredPValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
// Now we've got all the p values they need to be corrected.
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(filterResultpValues);
}
System.err.println(filterResultpValues.length + " p-values calculated, multtest = " + applyMultipleTestingCorrection + ", pval limit = " + pValueLimit);
for (int i = 0; i < filterResultpValues.length; i++) {
double pOrQvalue;
if (applyMultipleTestingCorrection) {
pOrQvalue = filterResultpValues[i].q;
} else {
pOrQvalue = filterResultpValues[i].p;
}
// check whether it passes the p/q-value and z-score cut-offs
if (optionsPanel.reportAllResults.isSelected()) {
filteredPValueArrayList.add(filterResultpValues[i]);
} else {
if ((pOrQvalue < pValueLimit) && (Math.abs(filterResultpValues[i].mappedGeneSet.meanZScore) > zScoreThreshold)) {
filteredPValueArrayList.add(filterResultpValues[i]);
}
}
}
// convert the ArrayList to MappedGeneSetTTestValue
filterResultpValues = filteredPValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
if (filterResultpValues.length == 0) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes were identified using the selected parameters, \ntry changing the gene sets or relaxing the p-value/z-score thresholds.", "No gene sets identified", JOptionPane.INFORMATION_MESSAGE);
} else {
geneSetDisplay = new GeneSetDisplay(dataCollection, listDescription(), fromStore, toStore, probes, probeZScoreLookupTable, filterResultpValues, startingList, customRegressionValues, simpleRegression, // optionsPanel.bidirectionalRadioButton.isSelected());
false);
geneSetDisplay.addWindowListener(this);
}
// We don't want to save the probe list here, we're bringing up the intermediate display from which probe lists can be saved.
progressCancelled();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project SeqMonk by s-andrews.
the class MacsPeakCaller method run.
public void run() {
// for (int i=0;i<selectedChIPStores.length;i++) {
// System.err.println("Selcted ChIP="+selectedChIPStores[i]);
// }
// for (int i=0;i<selectedInputStores.length;i++) {
// System.err.println("Selcted Input="+selectedInputStores[i]);
// }
// First find the tag offsets between the watson and crick strands
// Work out the total average coverage for all of the combined ChIP samples
long totalChIPCoverage = 0;
for (int i = 0; i < selectedChIPStores.length; i++) {
totalChIPCoverage += selectedChIPStores[i].getTotalReadLength();
}
if (cancel) {
generationCancelled();
return;
}
double averageChIPCoveragePerBase = totalChIPCoverage / (double) collection.genome().getTotalGenomeLength();
double lowerCoverage = averageChIPCoveragePerBase * minFoldEnrichment;
double upperCoverage = averageChIPCoveragePerBase * maxFoldEnrichment;
System.err.println("Coverage range for high confidence peaks is " + lowerCoverage + " - " + upperCoverage);
// Now we go through the data to find locations for our high confidence peaks so we can
// randomly select 1000 of these to use to find the offset between the two strands
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> potentialHighConfidencePeaks = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
if (cancel) {
generationCancelled();
return;
}
// Time for an update
updateGenerationProgress("Finding high confidence peaks on chromosome " + chromosomes[c].name(), c, chromosomes.length);
Probe lastValidProbe = null;
for (int startPosition = 1; startPosition < chromosomes[c].length() - fragmentSize; startPosition += fragmentSize / 2) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
long totalLength = 0;
Probe probe = new Probe(chromosomes[c], startPosition, startPosition + fragmentSize);
for (int s = 0; s < selectedChIPStores.length; s++) {
long[] reads = selectedChIPStores[s].getReadsForProbe(probe);
for (int j = 0; j < reads.length; j++) {
totalLength += SequenceRead.length(reads[j]);
}
}
if (totalLength >= (lowerCoverage * probe.length()) && totalLength <= upperCoverage * probe.length()) {
if (lastValidProbe != null && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {
lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());
} else if (lastValidProbe != null) {
// Check that the overall density over the region falls within our limits
totalLength = 0;
for (int s = 0; s < selectedChIPStores.length; s++) {
long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
for (int j = 0; j < reads.length; j++) {
totalLength += SequenceRead.length(reads[j]);
}
}
if (totalLength >= (lowerCoverage * lastValidProbe.length()) && totalLength <= upperCoverage * lastValidProbe.length()) {
potentialHighConfidencePeaks.add(lastValidProbe);
}
lastValidProbe = probe;
} else {
lastValidProbe = probe;
}
}
}
if (lastValidProbe != null) {
long totalLength = 0;
for (int s = 0; s < selectedChIPStores.length; s++) {
long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
for (int j = 0; j < reads.length; j++) {
totalLength += SequenceRead.length(reads[j]);
}
}
if (totalLength >= (lowerCoverage * lastValidProbe.length()) && totalLength <= upperCoverage * lastValidProbe.length()) {
potentialHighConfidencePeaks.add(lastValidProbe);
}
}
}
if (potentialHighConfidencePeaks.size() == 0) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No high confidence peaks found", "Quitting generator", JOptionPane.INFORMATION_MESSAGE);
generationCancelled();
return;
}
// System.err.println("Found "+potentialHighConfidencePeaks.size()+" high confidence peaks");
// Now we select 1000 random probes from this set
Probe[] highConfidencePeaks = potentialHighConfidencePeaks.toArray(new Probe[0]);
Collections.shuffle(Arrays.asList(highConfidencePeaks));
Probe[] randomHighConfidenceProbes = new Probe[Math.min(highConfidencePeaks.length, 1000)];
for (int i = 0; i < randomHighConfidenceProbes.length; i++) {
randomHighConfidenceProbes[i] = highConfidencePeaks[i];
}
// Now find the average distance between forward / reverse reads in the candidate peaks
int[] distances = new int[highConfidencePeaks.length];
// Sort the candidates so we don't do stupid stuff with the cache
Arrays.sort(randomHighConfidenceProbes);
for (int p = 0; p < randomHighConfidenceProbes.length; p++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
distances[p] = getInterStrandDistance(randomHighConfidenceProbes[p], selectedChIPStores);
}
int medianInterStrandDistance = (int) SimpleStats.median(distances);
if (medianInterStrandDistance < 0)
medianInterStrandDistance = 0;
// System.err.println("Median inter strand difference = "+medianInterStrandDistance);
// Now we find the depth cutoff for overrepresented single tags using a binomial distribution
int totalReadCount = 0;
for (int i = 0; i < selectedChIPStores.length; i++) {
totalReadCount += selectedChIPStores[i].getTotalReadCount();
}
BinomialDistribution bin = new BinomialDistribution(totalReadCount, 1d / collection.genome().getTotalGenomeLength());
// We want to know what depth has a chance of less than 1^-5
int redundantThreshold = bin.inverseCumulativeProbability(1 - 0.00001d);
if (redundantThreshold < 1)
redundantThreshold = 1;
// System.err.println("Redundancy threshold is "+redundantThreshold);
// Now we construct a poisson distribution to work out the threshold to use for
// constructing a full candidate peak set.
updateGenerationProgress("Counting non-redundant reads", 0, 1);
// To do this we need to get the full non-redundant length from the whole set
int totalNonRedCount = getNonRedundantReadCount(selectedChIPStores, redundantThreshold);
// System.err.println("Total non-redundant sequences is "+totalNonRedCount);
// We need to know the median read length for the data
int readLength = 0;
for (int i = 0; i < selectedChIPStores.length; i++) {
readLength += selectedChIPStores[i].getTotalReadLength() / selectedChIPStores[i].getTotalReadCount();
}
readLength /= selectedChIPStores.length;
double expectedCountsPerWindow = getExpectedCountPerWindow(totalNonRedCount, collection.genome().getTotalGenomeLength(), fragmentSize, readLength);
PoissonDistribution poisson = new PoissonDistribution(expectedCountsPerWindow);
int readCountCutoff = poisson.inverseCumulativeProbability(1 - pValue);
// System.err.println("Threshold for enrichment in a window is "+readCountCutoff+" reads using a p-value of "+pValue+" and a mean of "+(totalNonRedCount/(collection.genome().getTotalGenomeLength()/(double)fragmentSize)));
// Now we go back through the whole dataset to do a search for all possible candidate probes
// We re-use the peak vector we came up with before.
potentialHighConfidencePeaks.clear();
for (int c = 0; c < chromosomes.length; c++) {
// Time for an update
updateGenerationProgress("Finding candidate peaks on chromosome " + chromosomes[c].name(), c, chromosomes.length);
Probe lastValidProbe = null;
for (int startPosition = 1; startPosition < chromosomes[c].length() - fragmentSize; startPosition += fragmentSize / 2) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
// We expand the region we're looking at by the inter-strand distance as we're going to
// be adjusting the read positions
Probe probe = new Probe(chromosomes[c], startPosition, (startPosition + fragmentSize - 1));
long[] mergedProbeReads = getReadsFromDataStoreCollection(probe, selectedChIPStores, medianInterStrandDistance);
mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);
SequenceRead.sort(mergedProbeReads);
int thisProbeOverlapCount = 0;
for (int i = 0; i < mergedProbeReads.length; i++) {
if (SequenceRead.overlaps(mergedProbeReads[i], probe.packedPosition())) {
++thisProbeOverlapCount;
}
}
if (thisProbeOverlapCount > readCountCutoff) {
if (lastValidProbe != null && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {
lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());
} else if (lastValidProbe != null) {
potentialHighConfidencePeaks.add(lastValidProbe);
lastValidProbe = probe;
} else {
lastValidProbe = probe;
}
}
}
if (lastValidProbe != null) {
potentialHighConfidencePeaks.add(lastValidProbe);
}
}
// Finally we re-filter the peaks we have using local poisson distributions with densities taken
// from either the input samples (if there are any), or the local region. The densities are
// estimated over 1,5 and 10kb around the peak and genome wide and the max of these is taken.
// If there is no input then the 1kb region is not used.
Probe[] allCandidateProbes = potentialHighConfidencePeaks.toArray(new Probe[0]);
// Work out which stores we're using to validate against.
DataStore[] validationStores;
boolean useInput = false;
double inputCorrection = 1;
int validationNonRedCount;
if (selectedInputStores != null && selectedInputStores.length > 0) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
validationStores = selectedInputStores;
useInput = true;
// We also need to work out the total number of nonredundant seqences
// in the input so we can work out a scaling factor so that the densities
// for input and chip are comparable.
validationNonRedCount = getNonRedundantReadCount(validationStores, redundantThreshold);
inputCorrection = totalNonRedCount / (double) validationNonRedCount;
System.err.println("From chip=" + totalNonRedCount + " input=" + validationNonRedCount + " correction is " + inputCorrection);
} else {
validationStores = selectedChIPStores;
validationNonRedCount = totalNonRedCount;
}
Vector<Probe> finalValidatedProbes = new Vector<Probe>();
for (int p = 0; p < allCandidateProbes.length; p++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
if (p % 100 == 0) {
updateGenerationProgress("Validated " + p + " out of " + allCandidateProbes.length + " raw peaks", p, allCandidateProbes.length);
}
// System.err.println("Validating "+allCandidateProbes[p].chromosome()+":"+allCandidateProbes[p].start()+"-"+allCandidateProbes[p].end());
// We now need to find the maximum read density per 2*bandwidth against which
// we're going to validate this peak
// We're going to get all reads within 10kb of the peak, and then we can subselect from there
int midPoint = allCandidateProbes[p].middle();
Probe region10kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 5000, 1), Math.min(midPoint + 4999, allCandidateProbes[p].chromosome().length()), allCandidateProbes[p].strand());
Probe region5kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 2500, 1), Math.min(midPoint + 2499, allCandidateProbes[p].chromosome().length()), allCandidateProbes[p].strand());
Probe region1kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 500, 1), Math.min(midPoint + 499, allCandidateProbes[p].chromosome().length()), allCandidateProbes[p].strand());
// Get the probes for the largest region
long[] thisRegionReads = getReadsFromDataStoreCollection(region10kb, validationStores, 0);
// Deduplicate so it's a fair comparison
// Should we recalculate the redundant threshold based on the input coverage?
thisRegionReads = deduplicateReads(thisRegionReads, redundantThreshold);
int region10kbcount = thisRegionReads.length;
int region5kbcount = 0;
int region1kbcount = 0;
// Go through the reads seeing if they fit into the 5 or 1kb regions
for (int r = 0; r < thisRegionReads.length; r++) {
if (SequenceRead.overlaps(region5kb.packedPosition(), thisRegionReads[r]))
++region5kbcount;
if (SequenceRead.overlaps(region1kb.packedPosition(), thisRegionReads[r]))
++region1kbcount;
}
// System.err.println("Input counts 10kb="+region10kbcount+" 5kb="+region5kbcount+" 1kb="+region1kbcount);
// Convert to densities per window and ajdust for global coverage
double globalDensity = getExpectedCountPerWindow(validationNonRedCount, collection.genome().getTotalGenomeLength(), allCandidateProbes[p].length(), readLength) * inputCorrection;
double density10kb = getExpectedCountPerWindow(region10kbcount, region10kb.length(), allCandidateProbes[p].length(), readLength) * inputCorrection;
double density5kb = getExpectedCountPerWindow(region5kbcount, region5kb.length(), allCandidateProbes[p].length(), readLength) * inputCorrection;
double density1kb = getExpectedCountPerWindow(region1kbcount, region1kb.length(), allCandidateProbes[p].length(), readLength) * inputCorrection;
// Find the highest density to use for the validation
double highestDensity = globalDensity;
if (density10kb > highestDensity)
highestDensity = density10kb;
if (density5kb > highestDensity)
highestDensity = density5kb;
if (useInput && density1kb > highestDensity)
highestDensity = density1kb;
// System.err.println("Global="+globalDensity+" 10kb="+density10kb+" 5kb="+density5kb+" 1kb="+density1kb+" using="+highestDensity);
// Construct a poisson distribution with this density
PoissonDistribution localPoisson = new PoissonDistribution(highestDensity);
// System.err.println("Cutoff from global="+(new PoissonDistribution(globalDensity)).inverseCumulativeProbability(1-pValue)+" 10kb="+(new PoissonDistribution(density10kb)).inverseCumulativeProbability(1-pValue)+" 5kb="+(new PoissonDistribution(density5kb)).inverseCumulativeProbability(1-pValue)+" 1kb="+(new PoissonDistribution(density1kb)).inverseCumulativeProbability(1-pValue));
// Now check to see if the actual count from this peak is enough to still pass
long[] mergedProbeReads = getReadsFromDataStoreCollection(allCandidateProbes[p], selectedChIPStores, medianInterStrandDistance);
mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);
SequenceRead.sort(mergedProbeReads);
int thisProbeOverlapCount = 0;
for (int i = 0; i < mergedProbeReads.length; i++) {
if (SequenceRead.overlaps(mergedProbeReads[i], allCandidateProbes[p].packedPosition())) {
++thisProbeOverlapCount;
}
}
if (thisProbeOverlapCount > localPoisson.inverseCumulativeProbability(1 - pValue)) {
finalValidatedProbes.add(allCandidateProbes[p]);
// System.err.println("Adding probe to final set");
}
}
// System.err.println("From "+allCandidateProbes.length+" candidates "+finalValidatedProbes.size()+" peaks were validated");
ProbeSet finalSet = new ProbeSet(getDescription(), finalValidatedProbes.toArray(new Probe[0]));
generationComplete(finalSet);
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project presto by prestodb.
the class MathFunctions method cauchyCdf.
@Description("Cauchy cdf for a given value, median, and scale (gamma)")
@ScalarFunction
@SqlType(StandardTypes.DOUBLE)
public static double cauchyCdf(@SqlType(StandardTypes.DOUBLE) double median, @SqlType(StandardTypes.DOUBLE) double scale, @SqlType(StandardTypes.DOUBLE) double value) {
checkCondition(scale > 0, INVALID_FUNCTION_ARGUMENT, "scale must be greater than 0");
CauchyDistribution distribution = new CauchyDistribution(null, median, scale, CauchyDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
return distribution.cumulativeProbability(value);
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project GDSC-SMLM by aherbert.
the class TraceLengthAnalysis method run.
@Override
public void run(String arg) {
SmlmUsageTracker.recordPlugin(this.getClass(), arg);
if (MemoryPeakResults.isMemoryEmpty()) {
IJ.error(TITLE, "No localisations in memory");
return;
}
if (!showDialog()) {
return;
}
// Load the results
MemoryPeakResults results = ResultsManager.loadInputResults(settings.inputOption, false, null, null);
if (MemoryPeakResults.isEmpty(results)) {
IJ.error(TITLE, "No results could be loaded");
return;
}
try {
distanceConverter = results.getDistanceConverter(DistanceUnit.UM);
timeConverter = results.getTimeConverter(TimeUnit.SECOND);
} catch (final Exception ex) {
IJ.error(TITLE, "Cannot convert units to um or seconds: " + ex.getMessage());
return;
}
// Get the localisation error (4s^2) in raw units^2
double precision = 0;
try {
final PrecisionResultProcedure p = new PrecisionResultProcedure(results);
p.getPrecision();
// Precision in nm using the median
precision = new Percentile().evaluate(p.precisions, 50);
// Convert from nm to um to raw units
final double rawPrecision = distanceConverter.convertBack(precision / 1e3);
// Get the localisation error (4s^2) in units^2
error = 4 * rawPrecision * rawPrecision;
} catch (final Exception ex) {
ImageJUtils.log(TITLE + " - Unable to compute precision: " + ex.getMessage());
}
// Analyse the track lengths
results = results.copy();
results.sort(IdFramePeakResultComparator.INSTANCE);
// Ensure the first result triggers an id change
lastid = results.getFirst().getId() - 1;
results.forEach(this::processTrackLength);
// For the final track
store();
msds = msdList.toArray();
lengths = lengthList.toArray();
ids = idList.toArray();
final int[] limits = MathUtils.limits(lengths);
h1 = new int[limits[1] + 1];
h2 = new int[h1.length];
x1 = SimpleArrayUtils.newArray(h1.length, 0, 1f);
y1 = new float[x1.length];
y2 = new float[x1.length];
// Sort by MSD
final int[] indices = SimpleArrayUtils.natural(msds.length);
SortUtils.sortIndices(indices, msds, false);
final double[] msds2 = msds.clone();
final int[] lengths2 = lengths.clone();
final int[] ids2 = ids.clone();
for (int i = 0; i < indices.length; i++) {
msds[i] = msds2[indices[i]];
lengths[i] = lengths2[indices[i]];
ids[i] = ids2[indices[i]];
}
// Interactive analysis
final NonBlockingExtendedGenericDialog gd = new NonBlockingExtendedGenericDialog(TITLE);
ImageJUtils.addMessage(gd, "Split traces into fixed or moving using the track diffusion coefficient (D).\n" + "Localisation error has been subtracted from jumps (%s nm).", MathUtils.rounded(precision));
final Statistics s = Statistics.create(msds);
final double av = s.getMean();
final String msg = String.format("Average D per track = %s um^2/s", MathUtils.rounded(av));
gd.addMessage(msg);
// Histogram the diffusion coefficients
final WindowOrganiser wo = new WindowOrganiser();
final HistogramPlot histogramPlot = new HistogramPlotBuilder("Trace diffusion coefficient", StoredData.create(msds), "D (um^2/s)").setRemoveOutliersOption(1).setPlotLabel(msg).build();
histogramPlot.show(wo);
final double[] xvalues = histogramPlot.getPlotXValues();
final double min = xvalues[0];
final double max = xvalues[xvalues.length - 1];
// see if we can build a nice slider range from the histogram limits
if (max - min < 5) {
// Because sliders are used when the range is <5 and floating point
gd.addSlider("D_threshold", min, max, settings.msdThreshold);
} else {
gd.addNumericField("D_threshold", settings.msdThreshold, 2, 6, "um^2/s");
}
gd.addCheckbox("Normalise", settings.normalise);
gd.addDialogListener((gd1, event) -> {
settings.msdThreshold = gd1.getNextNumber();
settings.normalise = gd1.getNextBoolean();
update();
return true;
});
if (ImageJUtils.isShowGenericDialog()) {
draw(wo);
wo.tile();
}
gd.setOKLabel("Save datasets");
gd.setCancelLabel("Close");
gd.addHelp(HelpUrls.getUrl("trace-length-analysis"));
gd.showDialog();
if (gd.wasCanceled()) {
return;
}
// Sort by ID
final PeakResult[] list = results.toArray();
Arrays.sort(list, IdFramePeakResultComparator.INSTANCE);
createResults(results, "Fixed", 0, lastIndex, list);
createResults(results, "Moving", lastIndex, msds.length, list);
}
use of org.apache.commons.math3.stat.descriptive.rank.Median in project GDSC-SMLM by aherbert.
the class BenchmarkSpotFit method summariseResults.
private void summariseResults(BenchmarkSpotFitResult spotFitResults, long runTime, final PreprocessedPeakResult[] preprocessedPeakResults, int uniqueIdCount, CandidateData candidateData, TIntObjectHashMap<List<Coordinate>> actualCoordinates) {
// Summarise the fitting results. N fits, N failures.
// Optimal match statistics if filtering is perfect (since fitting is not perfect).
final StoredDataStatistics distanceStats = new StoredDataStatistics();
final StoredDataStatistics depthStats = new StoredDataStatistics();
// Get stats for all fitted results and those that match
// Signal, SNR, Width, xShift, yShift, Precision
createFilterCriteria();
final StoredDataStatistics[][] stats = new StoredDataStatistics[3][filterCriteria.length];
for (int i = 0; i < stats.length; i++) {
for (int j = 0; j < stats[i].length; j++) {
stats[i][j] = new StoredDataStatistics();
}
}
final double nmPerPixel = simulationParameters.pixelPitch;
double tp = 0;
double fp = 0;
int failCtp = 0;
int failCfp = 0;
int ctp = 0;
int cfp = 0;
final int[] singleStatus = new int[FitStatus.values().length];
final int[] multiStatus = new int[singleStatus.length];
final int[] doubletStatus = new int[singleStatus.length];
final int[] multiDoubletStatus = new int[singleStatus.length];
// Easier to materialise the values since we have a lot of non final variables to manipulate
final TIntObjectHashMap<FilterCandidates> fitResults = spotFitResults.fitResults;
final int[] frames = new int[fitResults.size()];
final FilterCandidates[] candidates = new FilterCandidates[fitResults.size()];
final int[] counter = new int[1];
fitResults.forEachEntry((frame, candidate) -> {
frames[counter[0]] = frame;
candidates[counter[0]] = candidate;
counter[0]++;
return true;
});
for (final FilterCandidates result : candidates) {
// Count the number of fit results that matched (tp) and did not match (fp)
tp += result.tp;
fp += result.fp;
for (int i = 0; i < result.fitResult.length; i++) {
if (result.spots[i].match) {
ctp++;
} else {
cfp++;
}
final MultiPathFitResult fitResult = result.fitResult[i];
if (singleStatus != null && result.spots[i].match) {
// Debugging reasons for fit failure
addStatus(singleStatus, fitResult.getSingleFitResult());
addStatus(multiStatus, fitResult.getMultiFitResult());
addStatus(doubletStatus, fitResult.getDoubletFitResult());
addStatus(multiDoubletStatus, fitResult.getMultiDoubletFitResult());
}
if (noMatch(fitResult)) {
if (result.spots[i].match) {
failCtp++;
} else {
failCfp++;
}
}
// We have multi-path results.
// We want statistics for:
// [0] all fitted spots
// [1] fitted spots that match a result
// [2] fitted spots that do not match a result
addToStats(fitResult.getSingleFitResult(), stats);
addToStats(fitResult.getMultiFitResult(), stats);
addToStats(fitResult.getDoubletFitResult(), stats);
addToStats(fitResult.getMultiDoubletFitResult(), stats);
}
// Statistics on spots that fit an actual result
for (int i = 0; i < result.match.length; i++) {
if (!result.match[i].isFitResult()) {
// For now just ignore the candidates that matched
continue;
}
final FitMatch fitMatch = (FitMatch) result.match[i];
distanceStats.add(fitMatch.distance * nmPerPixel);
depthStats.add(fitMatch.zdepth * nmPerPixel);
}
}
if (tp == 0) {
IJ.error(TITLE, "No fit results matched the simulation actual results");
return;
}
// Store data for computing correlation
final double[] i1 = new double[depthStats.getN()];
final double[] i2 = new double[i1.length];
final double[] is = new double[i1.length];
int ci = 0;
for (final FilterCandidates result : candidates) {
for (int i = 0; i < result.match.length; i++) {
if (!result.match[i].isFitResult()) {
// For now just ignore the candidates that matched
continue;
}
final FitMatch fitMatch = (FitMatch) result.match[i];
final ScoredSpot spot = result.spots[fitMatch.index];
i1[ci] = fitMatch.predictedSignal;
i2[ci] = fitMatch.actualSignal;
is[ci] = spot.spot.intensity;
ci++;
}
}
// We want to compute the Jaccard against the spot metric
// Filter the results using the multi-path filter
final ArrayList<MultiPathFitResults> multiPathResults = new ArrayList<>(fitResults.size());
for (int i = 0; i < frames.length; i++) {
final int frame = frames[i];
final MultiPathFitResult[] multiPathFitResults = candidates[i].fitResult;
final int totalCandidates = candidates[i].spots.length;
final List<Coordinate> list = actualCoordinates.get(frame);
final int nActual = (list == null) ? 0 : list.size();
multiPathResults.add(new MultiPathFitResults(frame, multiPathFitResults, totalCandidates, nActual));
}
// Score the results and count the number returned
final List<FractionalAssignment[]> assignments = new ArrayList<>();
final TIntHashSet set = new TIntHashSet(uniqueIdCount);
final FractionScoreStore scoreStore = set::add;
final MultiPathFitResults[] multiResults = multiPathResults.toArray(new MultiPathFitResults[0]);
// Filter with no filter
final MultiPathFilter mpf = new MultiPathFilter(new SignalFilter(0), null, multiFilter.residualsThreshold);
mpf.fractionScoreSubset(multiResults, NullFailCounter.INSTANCE, this.results.size(), assignments, scoreStore, CoordinateStoreFactory.create(0, 0, imp.getWidth(), imp.getHeight(), config.convertUsingHwhMax(config.getDuplicateDistanceParameter())));
final double[][] matchScores = new double[set.size()][];
int count = 0;
for (int i = 0; i < assignments.size(); i++) {
final FractionalAssignment[] a = assignments.get(i);
if (a == null) {
continue;
}
for (int j = 0; j < a.length; j++) {
final PreprocessedPeakResult r = ((PeakFractionalAssignment) a[j]).peakResult;
set.remove(r.getUniqueId());
final double precision = Math.sqrt(r.getLocationVariance());
final double signal = r.getSignal();
final double snr = r.getSnr();
final double width = r.getXSdFactor();
final double xShift = r.getXRelativeShift2();
final double yShift = r.getYRelativeShift2();
// Since these two are combined for filtering and the max is what matters.
final double shift = (xShift > yShift) ? Math.sqrt(xShift) : Math.sqrt(yShift);
final double eshift = Math.sqrt(xShift + yShift);
final double[] score = new double[8];
score[FILTER_SIGNAL] = signal;
score[FILTER_SNR] = snr;
score[FILTER_MIN_WIDTH] = width;
score[FILTER_MAX_WIDTH] = width;
score[FILTER_SHIFT] = shift;
score[FILTER_ESHIFT] = eshift;
score[FILTER_PRECISION] = precision;
score[FILTER_PRECISION + 1] = a[j].getScore();
matchScores[count++] = score;
}
}
// Add the rest
set.forEach(new CustomTIntProcedure(count) {
@Override
public boolean execute(int uniqueId) {
// This should not be null or something has gone wrong
final PreprocessedPeakResult r = preprocessedPeakResults[uniqueId];
if (r == null) {
throw new IllegalArgumentException("Missing result: " + uniqueId);
}
final double precision = Math.sqrt(r.getLocationVariance());
final double signal = r.getSignal();
final double snr = r.getSnr();
final double width = r.getXSdFactor();
final double xShift = r.getXRelativeShift2();
final double yShift = r.getYRelativeShift2();
// Since these two are combined for filtering and the max is what matters.
final double shift = (xShift > yShift) ? Math.sqrt(xShift) : Math.sqrt(yShift);
final double eshift = Math.sqrt(xShift + yShift);
final double[] score = new double[8];
score[FILTER_SIGNAL] = signal;
score[FILTER_SNR] = snr;
score[FILTER_MIN_WIDTH] = width;
score[FILTER_MAX_WIDTH] = width;
score[FILTER_SHIFT] = shift;
score[FILTER_ESHIFT] = eshift;
score[FILTER_PRECISION] = precision;
matchScores[count++] = score;
return true;
}
});
final FitConfiguration fitConfig = config.getFitConfiguration();
// Debug the reasons the fit failed
if (singleStatus != null) {
String name = PeakFit.getSolverName(fitConfig);
if (fitConfig.getFitSolver() == FitSolver.MLE && fitConfig.isModelCamera()) {
name += " Camera";
}
IJ.log("Failure counts: " + name);
printFailures("Single", singleStatus);
printFailures("Multi", multiStatus);
printFailures("Doublet", doubletStatus);
printFailures("Multi doublet", multiDoubletStatus);
}
final StringBuilder sb = new StringBuilder(300);
// Add information about the simulation
final double signal = simulationParameters.averageSignal;
final int n = results.size();
sb.append(imp.getStackSize()).append('\t');
final int w = imp.getWidth();
final int h = imp.getHeight();
sb.append(w).append('\t');
sb.append(h).append('\t');
sb.append(n).append('\t');
final double density = ((double) n / imp.getStackSize()) / (w * h) / (simulationParameters.pixelPitch * simulationParameters.pixelPitch / 1e6);
sb.append(MathUtils.rounded(density)).append('\t');
sb.append(MathUtils.rounded(signal)).append('\t');
sb.append(MathUtils.rounded(simulationParameters.sd)).append('\t');
sb.append(MathUtils.rounded(simulationParameters.pixelPitch)).append('\t');
sb.append(MathUtils.rounded(simulationParameters.depth)).append('\t');
sb.append(simulationParameters.fixedDepth).append('\t');
sb.append(MathUtils.rounded(simulationParameters.gain)).append('\t');
sb.append(MathUtils.rounded(simulationParameters.readNoise)).append('\t');
sb.append(MathUtils.rounded(simulationParameters.background)).append('\t');
sb.append(MathUtils.rounded(simulationParameters.noise)).append('\t');
if (simulationParameters.fullSimulation) {
// The total signal is spread over frames
}
sb.append(MathUtils.rounded(signal / simulationParameters.noise)).append('\t');
sb.append(MathUtils.rounded(simulationParameters.sd / simulationParameters.pixelPitch)).append('\t');
sb.append(spotFilter.getDescription());
// nP and nN is the fractional score of the spot candidates
addCount(sb, (double) candidateData.countPositive + candidateData.countNegative);
addCount(sb, candidateData.countPositive);
addCount(sb, candidateData.countNegative);
addCount(sb, candidateData.fractionPositive);
addCount(sb, candidateData.fractionNegative);
String name = PeakFit.getSolverName(fitConfig);
if (fitConfig.getFitSolver() == FitSolver.MLE && fitConfig.isModelCamera()) {
name += " Camera";
}
add(sb, name);
add(sb, config.getFitting());
spotFitResults.resultPrefix = sb.toString();
// Q. Should I add other fit configuration here?
// The fraction of positive and negative candidates that were included
add(sb, (100.0 * ctp) / candidateData.countPositive);
add(sb, (100.0 * cfp) / candidateData.countNegative);
// Score the fitting results compared to the original simulation.
// Score the candidate selection:
add(sb, ctp + cfp);
add(sb, ctp);
add(sb, cfp);
// TP are all candidates that can be matched to a spot
// FP are all candidates that cannot be matched to a spot
// FN = The number of missed spots
FractionClassificationResult match = new FractionClassificationResult(ctp, cfp, 0, simulationParameters.molecules - ctp);
add(sb, match.getRecall());
add(sb, match.getPrecision());
add(sb, match.getF1Score());
add(sb, match.getJaccard());
// Score the fitting results:
add(sb, failCtp);
add(sb, failCfp);
// TP are all fit results that can be matched to a spot
// FP are all fit results that cannot be matched to a spot
// FN = The number of missed spots
add(sb, tp);
add(sb, fp);
match = new FractionClassificationResult(tp, fp, 0, simulationParameters.molecules - tp);
add(sb, match.getRecall());
add(sb, match.getPrecision());
add(sb, match.getF1Score());
add(sb, match.getJaccard());
// Do it again but pretend we can perfectly filter all the false positives
// add(sb, tp);
match = new FractionClassificationResult(tp, 0, 0, simulationParameters.molecules - tp);
// Recall is unchanged
// Precision will be 100%
add(sb, match.getF1Score());
add(sb, match.getJaccard());
// The mean may be subject to extreme outliers so use the median
double median = distanceStats.getMedian();
add(sb, median);
final WindowOrganiser wo = new WindowOrganiser();
String label = String.format("Recall = %s. n = %d. Median = %s nm. SD = %s nm", MathUtils.rounded(match.getRecall()), distanceStats.getN(), MathUtils.rounded(median), MathUtils.rounded(distanceStats.getStandardDeviation()));
new HistogramPlotBuilder(TITLE, distanceStats, "Match Distance (nm)").setPlotLabel(label).show(wo);
median = depthStats.getMedian();
add(sb, median);
// Sort by spot intensity and produce correlation
double[] correlation = null;
double[] rankCorrelation = null;
double[] rank = null;
final FastCorrelator fastCorrelator = new FastCorrelator();
final ArrayList<Ranking> pc1 = new ArrayList<>();
final ArrayList<Ranking> pc2 = new ArrayList<>();
ci = 0;
if (settings.showCorrelation) {
final int[] indices = SimpleArrayUtils.natural(i1.length);
SortUtils.sortData(indices, is, settings.rankByIntensity, true);
correlation = new double[i1.length];
rankCorrelation = new double[i1.length];
rank = new double[i1.length];
for (final int ci2 : indices) {
fastCorrelator.add(Math.round(i1[ci2]), Math.round(i2[ci2]));
pc1.add(new Ranking(i1[ci2], ci));
pc2.add(new Ranking(i2[ci2], ci));
correlation[ci] = fastCorrelator.getCorrelation();
rankCorrelation[ci] = Correlator.correlation(rank(pc1), rank(pc2));
if (settings.rankByIntensity) {
rank[ci] = is[0] - is[ci];
} else {
rank[ci] = ci;
}
ci++;
}
} else {
for (int i = 0; i < i1.length; i++) {
fastCorrelator.add(Math.round(i1[i]), Math.round(i2[i]));
pc1.add(new Ranking(i1[i], i));
pc2.add(new Ranking(i2[i], i));
}
}
final double pearsonCorr = fastCorrelator.getCorrelation();
final double rankedCorr = Correlator.correlation(rank(pc1), rank(pc2));
// Get the regression
final SimpleRegression regression = new SimpleRegression(false);
for (int i = 0; i < pc1.size(); i++) {
regression.addData(pc1.get(i).value, pc2.get(i).value);
}
// final double intercept = regression.getIntercept();
final double slope = regression.getSlope();
if (settings.showCorrelation) {
String title = TITLE + " Intensity";
Plot plot = new Plot(title, "Candidate", "Spot");
final double[] limits1 = MathUtils.limits(i1);
final double[] limits2 = MathUtils.limits(i2);
plot.setLimits(limits1[0], limits1[1], limits2[0], limits2[1]);
label = String.format("Correlation=%s; Ranked=%s; Slope=%s", MathUtils.rounded(pearsonCorr), MathUtils.rounded(rankedCorr), MathUtils.rounded(slope));
plot.addLabel(0, 0, label);
plot.setColor(Color.red);
plot.addPoints(i1, i2, Plot.DOT);
if (slope > 1) {
plot.drawLine(limits1[0], limits1[0] * slope, limits1[1], limits1[1] * slope);
} else {
plot.drawLine(limits2[0] / slope, limits2[0], limits2[1] / slope, limits2[1]);
}
ImageJUtils.display(title, plot, wo);
title = TITLE + " Correlation";
plot = new Plot(title, "Spot Rank", "Correlation");
final double[] xlimits = MathUtils.limits(rank);
double[] ylimits = MathUtils.limits(correlation);
ylimits = MathUtils.limits(ylimits, rankCorrelation);
plot.setLimits(xlimits[0], xlimits[1], ylimits[0], ylimits[1]);
plot.setColor(Color.red);
plot.addPoints(rank, correlation, Plot.LINE);
plot.setColor(Color.blue);
plot.addPoints(rank, rankCorrelation, Plot.LINE);
plot.setColor(Color.black);
plot.addLabel(0, 0, label);
ImageJUtils.display(title, plot, wo);
}
add(sb, pearsonCorr);
add(sb, rankedCorr);
add(sb, slope);
label = String.format("n = %d. Median = %s nm", depthStats.getN(), MathUtils.rounded(median));
new HistogramPlotBuilder(TITLE, depthStats, "Match Depth (nm)").setRemoveOutliersOption(1).setPlotLabel(label).show(wo);
// Plot histograms of the stats on the same window
final double[] lower = new double[filterCriteria.length];
final double[] upper = new double[lower.length];
final double[] min = new double[lower.length];
final double[] max = new double[lower.length];
for (int i = 0; i < stats[0].length; i++) {
final double[] limits = showDoubleHistogram(stats, i, wo, matchScores);
lower[i] = limits[0];
upper[i] = limits[1];
min[i] = limits[2];
max[i] = limits[3];
}
// Reconfigure some of the range limits
// Make this a bit bigger
upper[FILTER_SIGNAL] *= 2;
// Make this a bit bigger
upper[FILTER_SNR] *= 2;
final double factor = 0.25;
if (lower[FILTER_MIN_WIDTH] != 0) {
// (assuming lower is less than 1)
upper[FILTER_MIN_WIDTH] = 1 - Math.max(0, factor * (1 - lower[FILTER_MIN_WIDTH]));
}
if (upper[FILTER_MIN_WIDTH] != 0) {
// (assuming upper is more than 1)
lower[FILTER_MAX_WIDTH] = 1 + Math.max(0, factor * (upper[FILTER_MAX_WIDTH] - 1));
}
// Round the ranges
final double[] interval = new double[stats[0].length];
interval[FILTER_SIGNAL] = SignalFilter.DEFAULT_INCREMENT;
interval[FILTER_SNR] = SnrFilter.DEFAULT_INCREMENT;
interval[FILTER_MIN_WIDTH] = WidthFilter2.DEFAULT_MIN_INCREMENT;
interval[FILTER_MAX_WIDTH] = WidthFilter.DEFAULT_INCREMENT;
interval[FILTER_SHIFT] = ShiftFilter.DEFAULT_INCREMENT;
interval[FILTER_ESHIFT] = EShiftFilter.DEFAULT_INCREMENT;
interval[FILTER_PRECISION] = PrecisionFilter.DEFAULT_INCREMENT;
interval[FILTER_ITERATIONS] = 0.1;
interval[FILTER_EVALUATIONS] = 0.1;
// Create a range increment
final double[] increment = new double[lower.length];
for (int i = 0; i < increment.length; i++) {
lower[i] = MathUtils.floor(lower[i], interval[i]);
upper[i] = MathUtils.ceil(upper[i], interval[i]);
final double range = upper[i] - lower[i];
// Allow clipping if the range is small compared to the min increment
double multiples = range / interval[i];
// Use 8 multiples for the equivalent of +/- 4 steps around the centre
if (multiples < 8) {
multiples = Math.ceil(multiples);
} else {
multiples = 8;
}
increment[i] = MathUtils.ceil(range / multiples, interval[i]);
if (i == FILTER_MIN_WIDTH) {
// Requires clipping based on the upper limit
lower[i] = upper[i] - increment[i] * multiples;
} else {
upper[i] = lower[i] + increment[i] * multiples;
}
}
for (int i = 0; i < stats[0].length; i++) {
lower[i] = MathUtils.round(lower[i]);
upper[i] = MathUtils.round(upper[i]);
min[i] = MathUtils.round(min[i]);
max[i] = MathUtils.round(max[i]);
increment[i] = MathUtils.round(increment[i]);
sb.append('\t').append(min[i]).append(':').append(lower[i]).append('-').append(upper[i]).append(':').append(max[i]);
}
// Disable some filters
increment[FILTER_SIGNAL] = Double.POSITIVE_INFINITY;
// increment[FILTER_SHIFT] = Double.POSITIVE_INFINITY;
increment[FILTER_ESHIFT] = Double.POSITIVE_INFINITY;
wo.tile();
sb.append('\t').append(TextUtils.nanosToString(runTime));
createTable().append(sb.toString());
if (settings.saveFilterRange) {
GUIFilterSettings filterSettings = SettingsManager.readGuiFilterSettings(0);
String filename = (silent) ? filterSettings.getFilterSetFilename() : ImageJUtils.getFilename("Filter_range_file", filterSettings.getFilterSetFilename());
if (filename == null) {
return;
}
// Remove extension to store the filename
filename = FileUtils.replaceExtension(filename, ".xml");
filterSettings = filterSettings.toBuilder().setFilterSetFilename(filename).build();
// Create a filter set using the ranges
final ArrayList<Filter> filters = new ArrayList<>(4);
// Create the multi-filter using the same precision type as that used during fitting.
// Currently no support for z-filter as 3D astigmatism fitting is experimental.
final PrecisionMethod precisionMethod = getPrecisionMethod((DirectFilter) multiFilter.getFilter());
Function<double[], Filter> generator;
if (precisionMethod == PrecisionMethod.POISSON_CRLB) {
generator = parameters -> new MultiFilterCrlb(parameters[FILTER_SIGNAL], (float) parameters[FILTER_SNR], parameters[FILTER_MIN_WIDTH], parameters[FILTER_MAX_WIDTH], parameters[FILTER_SHIFT], parameters[FILTER_ESHIFT], parameters[FILTER_PRECISION], 0f, 0f);
} else if (precisionMethod == PrecisionMethod.MORTENSEN) {
generator = parameters -> new MultiFilter(parameters[FILTER_SIGNAL], (float) parameters[FILTER_SNR], parameters[FILTER_MIN_WIDTH], parameters[FILTER_MAX_WIDTH], parameters[FILTER_SHIFT], parameters[FILTER_ESHIFT], parameters[FILTER_PRECISION], 0f, 0f);
} else {
// Default
generator = parameters -> new MultiFilter2(parameters[FILTER_SIGNAL], (float) parameters[FILTER_SNR], parameters[FILTER_MIN_WIDTH], parameters[FILTER_MAX_WIDTH], parameters[FILTER_SHIFT], parameters[FILTER_ESHIFT], parameters[FILTER_PRECISION], 0f, 0f);
}
filters.add(generator.apply(lower));
filters.add(generator.apply(upper));
filters.add(generator.apply(increment));
if (saveFilters(filename, filters)) {
SettingsManager.writeSettings(filterSettings);
}
// Create a filter set using the min/max and the initial bounds.
// Set sensible limits
min[FILTER_SIGNAL] = Math.max(min[FILTER_SIGNAL], 30);
max[FILTER_SNR] = Math.min(max[FILTER_SNR], 10000);
max[FILTER_PRECISION] = Math.min(max[FILTER_PRECISION], 100);
// Make the 4-set filters the same as the 3-set filters.
filters.clear();
filters.add(generator.apply(min));
filters.add(generator.apply(lower));
filters.add(generator.apply(upper));
filters.add(generator.apply(max));
saveFilters(FileUtils.replaceExtension(filename, ".4.xml"), filters);
}
spotFitResults.min = min;
spotFitResults.max = max;
}
Aggregations