use of org.apache.commons.math3.fraction.Fraction in project GDSC-SMLM by aherbert.
the class BenchmarkSpotFilter method summariseResults.
private BenchmarkFilterResult summariseResults(TIntObjectHashMap<FilterResult> filterResults, FitEngineConfiguration config, MaximaSpotFilter spotFilter, boolean relativeDistances, boolean batchSummary) {
BenchmarkFilterResult filterResult = new BenchmarkFilterResult(filterResults, config, spotFilter);
// Note:
// Although we can compute the TP/FP score as each additional spot is added
// using the RankedScoreCalculator this is not applicable to the PeakFit method.
// The method relies on all spot candidates being present in order to make a
// decision to fit the candidate as a multiple. So scoring the filter candidates using
// for example the top 10 may get a better score than if all candidates were scored
// and the scores accumulated for the top 10, it is not how the algorithm will use the
// candidate set. I.e. It does not use the top 10, then top 20 to refine the fit, etc.
// (the method is not iterative) .
// We require an assessment of how a subset of the scored candidates
// in ranked order contributes to the overall score, i.e. are the candidates ranked
// in the correct order, those most contributing to the match to the underlying data
// should be higher up and those least contributing will be at the end.
// TODO We could add some smart filtering of candidates before ranking. This would
// allow assessment of the candidate set handed to PeakFit. E.g. Threshold the image
// and only use candidates that are in the foreground region.
double[][] cumul = histogramFailures(filterResult);
// Create the overall match score
final double[] total = new double[3];
final ArrayList<ScoredSpot> allSpots = new ArrayList<BenchmarkSpotFilter.ScoredSpot>();
filterResults.forEachValue(new TObjectProcedure<FilterResult>() {
public boolean execute(FilterResult result) {
total[0] += result.result.getTP();
total[1] += result.result.getFP();
total[2] += result.result.getFN();
allSpots.addAll(Arrays.asList(result.spots));
return true;
}
});
double tp = total[0], fp = total[1], fn = total[2];
FractionClassificationResult allResult = new FractionClassificationResult(tp, fp, 0, fn);
// The number of actual results
final double n = (tp + fn);
StringBuilder sb = new StringBuilder();
double signal = (simulationParameters.minSignal + simulationParameters.maxSignal) * 0.5;
// Create the benchmark settings and the fitting settings
sb.append(imp.getStackSize()).append("\t");
final int w = lastAnalysisBorder.width;
final int h = lastAnalysisBorder.height;
sb.append(w).append("\t");
sb.append(h).append("\t");
sb.append(Utils.rounded(n)).append("\t");
double density = (n / imp.getStackSize()) / (w * h) / (simulationParameters.a * simulationParameters.a / 1e6);
sb.append(Utils.rounded(density)).append("\t");
sb.append(Utils.rounded(signal)).append("\t");
sb.append(Utils.rounded(simulationParameters.s)).append("\t");
sb.append(Utils.rounded(simulationParameters.a)).append("\t");
sb.append(Utils.rounded(simulationParameters.depth)).append("\t");
sb.append(simulationParameters.fixedDepth).append("\t");
sb.append(Utils.rounded(simulationParameters.gain)).append("\t");
sb.append(Utils.rounded(simulationParameters.readNoise)).append("\t");
sb.append(Utils.rounded(simulationParameters.b)).append("\t");
sb.append(Utils.rounded(simulationParameters.b2)).append("\t");
// Compute the noise
double noise = simulationParameters.b2;
if (simulationParameters.emCCD) {
// The b2 parameter was computed without application of the EM-CCD noise factor of 2.
//final double b2 = backgroundVariance + readVariance
// = simulationParameters.b + readVariance
// This should be applied only to the background variance.
final double readVariance = noise - simulationParameters.b;
noise = simulationParameters.b * 2 + readVariance;
}
sb.append(Utils.rounded(signal / Math.sqrt(noise))).append("\t");
sb.append(Utils.rounded(simulationParameters.s / simulationParameters.a)).append("\t");
sb.append(config.getDataFilterType()).append("\t");
//sb.append(spotFilter.getName()).append("\t");
sb.append(spotFilter.getSearch()).append("\t");
sb.append(spotFilter.getBorder()).append("\t");
sb.append(Utils.rounded(spotFilter.getSpread())).append("\t");
sb.append(config.getDataFilter(0)).append("\t");
final double param = config.getSmooth(0);
final double hwhmMin = config.getHWHMMin();
if (relativeDistances) {
sb.append(Utils.rounded(param * hwhmMin)).append("\t");
sb.append(Utils.rounded(param)).append("\t");
} else {
sb.append(Utils.rounded(param)).append("\t");
sb.append(Utils.rounded(param / hwhmMin)).append("\t");
}
sb.append(spotFilter.getDescription()).append("\t");
sb.append(lastAnalysisBorder.x).append("\t");
sb.append(MATCHING_METHOD[matchingMethod]).append("\t");
sb.append(Utils.rounded(lowerMatchDistance)).append("\t");
sb.append(Utils.rounded(matchDistance)).append("\t");
sb.append(Utils.rounded(lowerSignalFactor)).append("\t");
sb.append(Utils.rounded(upperSignalFactor));
resultPrefix = sb.toString();
// Add the results
sb.append("\t");
// Rank the scored spots by intensity
Collections.sort(allSpots);
// Produce Recall, Precision, Jaccard for each cut of the spot candidates
double[] r = new double[allSpots.size() + 1];
double[] p = new double[r.length];
double[] j = new double[r.length];
double[] c = new double[r.length];
double[] truePositives = new double[r.length];
double[] falsePositives = new double[r.length];
double[] intensity = new double[r.length];
// Note: fn = n - tp
tp = fp = 0;
int i = 1;
p[0] = 1;
FastCorrelator corr = new FastCorrelator();
double lastC = 0;
double[] i1 = new double[r.length];
double[] i2 = new double[r.length];
int ci = 0;
SimpleRegression regression = new SimpleRegression(false);
for (ScoredSpot s : allSpots) {
if (s.match) {
// Score partial matches as part true-positive and part false-positive.
// TP can be above 1 if we are allowing multiple matches.
tp += s.getScore();
fp += s.antiScore();
// Just use a rounded intensity for now
final double spotIntensity = s.getIntensity();
final long v1 = (long) Math.round(spotIntensity);
final long v2 = (long) Math.round(s.intensity);
regression.addData(spotIntensity, s.intensity);
i1[ci] = spotIntensity;
i2[ci] = s.intensity;
ci++;
corr.add(v1, v2);
lastC = corr.getCorrelation();
} else
fp++;
r[i] = (double) tp / n;
p[i] = (double) tp / (tp + fp);
// (tp+fp+fn) == (fp+n) since tp+fn=n;
j[i] = (double) tp / (fp + n);
c[i] = lastC;
truePositives[i] = tp;
falsePositives[i] = fp;
intensity[i] = s.getIntensity();
i++;
}
i1 = Arrays.copyOf(i1, ci);
i2 = Arrays.copyOf(i2, ci);
final double slope = regression.getSlope();
sb.append(Utils.rounded(slope)).append("\t");
addResult(sb, allResult, c[c.length - 1]);
// Output the match results when the recall achieves the fraction of the maximum.
double target = r[r.length - 1];
if (recallFraction < 100)
target *= recallFraction / 100.0;
int fractionIndex = 0;
while (fractionIndex < r.length && r[fractionIndex] < target) {
fractionIndex++;
}
if (fractionIndex == r.length)
fractionIndex--;
addResult(sb, new FractionClassificationResult(truePositives[fractionIndex], falsePositives[fractionIndex], 0, n - truePositives[fractionIndex]), c[fractionIndex]);
// Output the match results at the maximum jaccard score
int maxIndex = 0;
for (int ii = 1; ii < r.length; ii++) {
if (j[maxIndex] < j[ii])
maxIndex = ii;
}
addResult(sb, new FractionClassificationResult(truePositives[maxIndex], falsePositives[maxIndex], 0, n - truePositives[maxIndex]), c[maxIndex]);
sb.append(Utils.rounded(time / 1e6));
// Calculate AUC (Average precision == Area Under Precision-Recall curve)
final double auc = AUCCalculator.auc(p, r);
// Compute the AUC using the adjusted precision curve
// which uses the maximum precision for recall >= r
final double[] maxp = new double[p.length];
double max = 0;
for (int k = maxp.length; k-- > 0; ) {
if (max < p[k])
max = p[k];
maxp[k] = max;
}
final double auc2 = AUCCalculator.auc(maxp, r);
sb.append("\t").append(Utils.rounded(auc));
sb.append("\t").append(Utils.rounded(auc2));
// Output the number of fit failures that must be processed to capture fractions of the true positives
if (cumul[0].length != 0) {
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.80)));
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.90)));
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.95)));
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.99)));
sb.append("\t").append(Utils.rounded(cumul[0][cumul[0].length - 1]));
} else
sb.append("\t\t\t\t\t");
BufferedTextWindow resultsTable = getTable(batchSummary);
resultsTable.append(sb.toString());
// Store results
filterResult.auc = auc;
filterResult.auc2 = auc2;
filterResult.r = r;
filterResult.p = p;
filterResult.j = j;
filterResult.c = c;
filterResult.maxIndex = maxIndex;
filterResult.fractionIndex = fractionIndex;
filterResult.cumul = cumul;
filterResult.slope = slope;
filterResult.i1 = i1;
filterResult.i2 = i2;
filterResult.intensity = intensity;
filterResult.relativeDistances = relativeDistances;
filterResult.time = time;
return filterResult;
}
use of org.apache.commons.math3.fraction.Fraction in project gatk-protected by broadinstitute.
the class AlleleFractionSegmenterUnitTest method generateAllelicCount.
protected static AllelicCount generateAllelicCount(final double minorFraction, final SimpleInterval position, final RandomGenerator rng, final GammaDistribution biasGenerator, final double outlierProbability) {
final int numReads = 100;
final double bias = biasGenerator.sample();
//flip a coin to decide alt minor (alt fraction = minor fraction) or ref minor (alt fraction = 1 - minor fraction)
final double altFraction = rng.nextDouble() < 0.5 ? minorFraction : 1 - minorFraction;
//the probability of an alt read is the alt fraction modified by the bias or, in the case of an outlier, random
final double pAlt = rng.nextDouble() < outlierProbability ? rng.nextDouble() : altFraction / (altFraction + (1 - altFraction) * bias);
final int numAltReads = new BinomialDistribution(rng, numReads, pAlt).sample();
final int numRefReads = numReads - numAltReads;
return new AllelicCount(position, numAltReads, numRefReads);
}
use of org.apache.commons.math3.fraction.Fraction in project gatk by broadinstitute.
the class SNPSegmenter method writeSegmentFile.
/**
* Write segment file based on maximum-likelihood estimates of the minor allele fraction at SNP sites,
* assuming the specified allelic bias. These estimates are converted to target coverages,
* which are written to a temporary file and then passed to {@link RCBSSegmenter}.
* @param snps TargetCollection of allelic counts at SNP sites
* @param sampleName sample name
* @param outputFile segment file to write to and return
* @param allelicBias allelic bias to use in estimate of minor allele fraction
*/
public static void writeSegmentFile(final TargetCollection<AllelicCount> snps, final String sampleName, final File outputFile, final double allelicBias) {
Utils.validateArg(snps.totalSize() > 0, "Must have a positive number of SNPs to perform SNP segmentation.");
try {
final File targetsFromSNPCountsFile = File.createTempFile("targets-from-snps", ".tsv");
final List<Target> targets = snps.targets().stream().map(ac -> new Target(name(ac), ac.getInterval())).collect(Collectors.toList());
final RealMatrix minorAlleleFractions = new Array2DRowRealMatrix(snps.targetCount(), 1);
minorAlleleFractions.setColumn(0, snps.targets().stream().mapToDouble(ac -> ac.estimateMinorAlleleFraction(allelicBias)).toArray());
ReadCountCollectionUtils.write(targetsFromSNPCountsFile, new ReadCountCollection(targets, Collections.singletonList(sampleName), minorAlleleFractions));
//segment SNPs based on observed log_2 minor allele fraction (log_2 is applied in CBS.R)
RCBSSegmenter.writeSegmentFile(sampleName, targetsFromSNPCountsFile.getAbsolutePath(), outputFile.getAbsolutePath(), false);
} catch (final IOException e) {
throw new UserException.CouldNotCreateOutputFile("Could not create temporary output file during " + "SNP segmentation.", e);
}
}
use of org.apache.commons.math3.fraction.Fraction in project gatk by broadinstitute.
the class HetPulldownCalculator method isPileupHetCompatible.
/**
* Returns true if the distribution of major and other base-pair counts from a pileup at a locus is compatible with
* allele fraction of 0.5.
*
* <p>
* Compatibility is defined by a p-value threshold. That is, compute the two-sided p-value of observing
* a number of major read counts out of a total number of reads, assuming the given heterozygous
* allele fraction. If the p-value is less than the given threshold, then reject the null hypothesis
* that the heterozygous allele fraction is 0.5 (i.e., SNP is likely to be homozygous) and return false,
* otherwise return true.
* </p>
* @param baseCounts base-pair counts
* @param totalBaseCount total base-pair counts (excluding N, etc.)
* @param pvalThreshold p-value threshold for two-sided binomial test (should be in [0, 1], but no check is performed)
* @return boolean compatibility with heterozygous allele fraction
*/
@VisibleForTesting
protected static boolean isPileupHetCompatible(final Nucleotide.Counter baseCounts, final int totalBaseCount, final double pvalThreshold) {
final int majorReadCount = Arrays.stream(BASES).mapToInt(b -> (int) baseCounts.get(b)).max().getAsInt();
if (majorReadCount == 0 || totalBaseCount - majorReadCount == 0) {
return false;
}
final double pval = new BinomialTest().binomialTest(totalBaseCount, majorReadCount, HET_ALLELE_FRACTION, AlternativeHypothesis.TWO_SIDED);
return pval >= pvalThreshold;
}
use of org.apache.commons.math3.fraction.Fraction in project gatk-protected by broadinstitute.
the class StrandArtifact method annotate.
@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
Utils.nonNull(gb);
Utils.nonNull(vc);
Utils.nonNull(likelihoods);
// do not annotate the genotype fields for normal
if (g.isHomRef()) {
return;
}
pi.put(NO_ARTIFACT, 0.95);
pi.put(ART_FWD, 0.025);
pi.put(ART_REV, 0.025);
// We use the allele with highest LOD score
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
final Allele altAlelle = vc.getAlternateAllele(indexOfMaxTumorLod);
final Collection<ReadLikelihoods<Allele>.BestAllele<Allele>> bestAlleles = likelihoods.bestAlleles(g.getSampleName());
final int numFwdAltReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
final int numRevAltReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative() && ba.allele.equals(altAlelle)).count();
final int numFwdReads = (int) bestAlleles.stream().filter(ba -> !ba.read.isReverseStrand() && ba.isInformative()).count();
final int numRevReads = (int) bestAlleles.stream().filter(ba -> ba.read.isReverseStrand() && ba.isInformative()).count();
final int numAltReads = numFwdAltReads + numRevAltReads;
final int numReads = numFwdReads + numRevReads;
final EnumMap<StrandArtifactZ, Double> unnormalized_posterior_probabilities = new EnumMap<>(StrandArtifactZ.class);
final EnumMap<StrandArtifactZ, Double> maximum_a_posteriori_allele_fraction_estimates = new EnumMap<>(StrandArtifactZ.class);
/*** Compute the posterior probability of ARTIFACT_FWD and ARTIFACT_REV; it's a double integral over f and epsilon ***/
// the integrand is a polynomial of degree n, where n is the number of reads at the locus
// thus to integrate exactly with Gauss-Legendre we need (n/2)+1 points
final int numIntegPointsForAlleleFraction = numReads / 2 + 1;
final int numIntegPointsForEpsilon = (numReads + ALPHA + BETA - 2) / 2 + 1;
final double likelihoodForArtifactFwd = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
final double likelihoodForArtifactRev = IntegrationUtils.integrate2d((f, epsilon) -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction, 0.0, 1.0, numIntegPointsForEpsilon);
unnormalized_posterior_probabilities.put(ART_FWD, pi.get(ART_FWD) * likelihoodForArtifactFwd);
unnormalized_posterior_probabilities.put(ART_REV, pi.get(ART_REV) * likelihoodForArtifactRev);
/*** Compute the posterior probability of NO_ARTIFACT; evaluate a single integral over the allele fraction ***/
final double likelihoodForNoArtifact = IntegrationUtils.integrate(f -> getIntegrandGivenNoArtifact(f, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForAlleleFraction);
unnormalized_posterior_probabilities.put(NO_ARTIFACT, pi.get(NO_ARTIFACT) * likelihoodForNoArtifact);
final double[] posterior_probabilities = MathUtils.normalizeFromRealSpace(unnormalized_posterior_probabilities.values().stream().mapToDouble(Double::doubleValue).toArray());
/*** Compute the maximum a posteriori estimate for allele fraction given strand artifact ***/
// For a fixed f, integrate the double integral over epsilons. This gives us the likelihood p(x^+, x^- | f, z) for a fixed f, which is proportional to
// the posterior probability of p(f | x^+, x^-, z)
final int numSamplePoints = 100;
final double[] samplePoints = GATKProtectedMathUtils.createEvenlySpacedPoints(0.0, 1.0, numSamplePoints);
double[] likelihoodsGivenForwardArtifact = new double[numSamplePoints];
double[] likelihoodsGivenReverseArtifact = new double[numSamplePoints];
for (int i = 0; i < samplePoints.length; i++) {
final double f = samplePoints[i];
likelihoodsGivenForwardArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numFwdReads, numRevReads, numFwdAltReads, numRevAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
likelihoodsGivenReverseArtifact[i] = IntegrationUtils.integrate(epsilon -> getIntegrandGivenArtifact(f, epsilon, numRevReads, numFwdReads, numRevAltReads, numFwdAltReads), 0.0, 1.0, numIntegPointsForEpsilon);
}
final int maxAlleleFractionIndexFwd = MathUtils.maxElementIndex(likelihoodsGivenForwardArtifact);
final int maxAlleleFractionIndexRev = MathUtils.maxElementIndex(likelihoodsGivenReverseArtifact);
maximum_a_posteriori_allele_fraction_estimates.put(ART_FWD, samplePoints[maxAlleleFractionIndexFwd]);
maximum_a_posteriori_allele_fraction_estimates.put(ART_REV, samplePoints[maxAlleleFractionIndexRev]);
// In the absence of strand artifact, MAP estimate for f reduces to the sample alt allele fraction
maximum_a_posteriori_allele_fraction_estimates.put(NO_ARTIFACT, (double) numAltReads / numReads);
gb.attribute(POSTERIOR_PROBABILITIES_KEY, posterior_probabilities);
gb.attribute(MAP_ALLELE_FRACTIONS_KEY, maximum_a_posteriori_allele_fraction_estimates.values().stream().mapToDouble(Double::doubleValue).toArray());
}
Aggregations