use of org.apache.commons.math3.exception.MaxCountExceededException in project EnrichmentMapApp by BaderLab.
the class MannWhitneyUTestSided method calculateAsymptoticPValue.
/**
* @param Umin smallest Mann-Whitney U value
* @param Umin smallest Mann-Whitney U1 value
* @param Umin smallest Mann-Whitney U2 value
* @param n1 number of subjects in first sample
* @param n2 number of subjects in second sample
* @return two-sided asymptotic p-value
* @throws ConvergenceException if the p-value can not be computed
* due to a convergence error
* @throws MaxCountExceededException if the maximum number of
* iterations is exceeded
*/
private double calculateAsymptoticPValue(final double Umin, final double U1, final double U2, final int n1, final int n2, final Type side) throws ConvergenceException, MaxCountExceededException {
/* long multiplication to avoid overflow (double not used due to efficiency
* and to avoid precision loss)
*/
final long n1n2prod = (long) n1 * n2;
// http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U#Normal_approximation
final double EU = n1n2prod / 2.0;
final double VarU = n1n2prod * (n1 + n2 + 1) / 12.0;
final double z = (Umin - EU) / FastMath.sqrt(VarU);
// No try-catch or advertised exception because args are valid
final NormalDistribution standardNormal = new NormalDistribution(0, 1);
double p = 2 * standardNormal.cumulativeProbability(z);
if (side == Type.TWO_SIDED) {
return p;
}
if (side == Type.LESS) {
if (U1 < U2) {
return 0.5 * p;
} else {
return 1.0 - (0.5 * p);
}
} else {
if (U1 > U2) {
return 0.5 * p;
} else {
return 1.0 - (0.5 * p);
}
}
}
use of org.apache.commons.math3.exception.MaxCountExceededException in project gatk-protected by broadinstitute.
the class AlleleFractionInitializer method initialMinorFractions.
/**
* Initialize minor fractions assuming no allelic bias <p></p>
*
* We integrate over f to get posterior probabilities (responsibilities) of alt / ref minor
* that is, responsibility of alt minor is int_{0 to 1/2} f^a (1-f)^r df
* responsibility of ref minor is int_{0 to 1/2} f^r (1-f)^a df
* these are proportional to I(1/2, a + 1, r + 1) and I(1/2, r + 1, a + 1),
* respectively, where I is the (incomplete) regularized Beta function.
* By definition these likelihoods sum to 1, ie they are already normalized. <p></p>
*
* Finally, we set each minor fraction to the responsibility-weighted total count of
* reads in minor allele divided by total reads, ignoring outliers.
*/
private AlleleFractionState.MinorFractions initialMinorFractions(final AlleleFractionData data) {
final int numSegments = data.getNumSegments();
final AlleleFractionState.MinorFractions result = new AlleleFractionState.MinorFractions(numSegments);
for (int segment = 0; segment < numSegments; segment++) {
double responsibilityWeightedMinorAlleleReadCount = 0.0;
double responsibilityWeightedTotalReadCount = 0.0;
for (final AllelicCount count : data.getCountsInSegment(segment)) {
final int a = count.getAltReadCount();
final int r = count.getRefReadCount();
double altMinorResponsibility;
try {
altMinorResponsibility = Beta.regularizedBeta(0.5, a + 1, r + 1);
} catch (final MaxCountExceededException e) {
//if the special function can't be computed, give an all-or-nothing responsibility
altMinorResponsibility = a < r ? 1.0 : 0.0;
}
responsibilityWeightedMinorAlleleReadCount += altMinorResponsibility * a + (1 - altMinorResponsibility) * r;
responsibilityWeightedTotalReadCount += a + r;
}
// we achieve a flat prior via a single pseudocount for minor and non-minor reads, hence the +1 and +2
result.add((responsibilityWeightedMinorAlleleReadCount + 1) / (responsibilityWeightedTotalReadCount + 2));
}
return result;
}
use of org.apache.commons.math3.exception.MaxCountExceededException in project gatk by broadinstitute.
the class AlleleFractionInitializer method initialMinorFractions.
/**
* Initialize minor fractions assuming no allelic bias <p></p>
*
* We integrate over f to get posterior probabilities (responsibilities) of alt / ref minor
* that is, responsibility of alt minor is int_{0 to 1/2} f^a (1-f)^r df
* responsibility of ref minor is int_{0 to 1/2} f^r (1-f)^a df
* these are proportional to I(1/2, a + 1, r + 1) and I(1/2, r + 1, a + 1),
* respectively, where I is the (incomplete) regularized Beta function.
* By definition these likelihoods sum to 1, ie they are already normalized. <p></p>
*
* Finally, we set each minor fraction to the responsibility-weighted total count of
* reads in minor allele divided by total reads, ignoring outliers.
*/
private AlleleFractionState.MinorFractions initialMinorFractions(final AlleleFractionData data) {
final int numSegments = data.getNumSegments();
final AlleleFractionState.MinorFractions result = new AlleleFractionState.MinorFractions(numSegments);
for (int segment = 0; segment < numSegments; segment++) {
double responsibilityWeightedMinorAlleleReadCount = 0.0;
double responsibilityWeightedTotalReadCount = 0.0;
for (final AllelicCount count : data.getCountsInSegment(segment)) {
final int a = count.getAltReadCount();
final int r = count.getRefReadCount();
double altMinorResponsibility;
try {
altMinorResponsibility = Beta.regularizedBeta(0.5, a + 1, r + 1);
} catch (final MaxCountExceededException e) {
//if the special function can't be computed, give an all-or-nothing responsibility
altMinorResponsibility = a < r ? 1.0 : 0.0;
}
responsibilityWeightedMinorAlleleReadCount += altMinorResponsibility * a + (1 - altMinorResponsibility) * r;
responsibilityWeightedTotalReadCount += a + r;
}
// we achieve a flat prior via a single pseudocount for minor and non-minor reads, hence the +1 and +2
result.add((responsibilityWeightedMinorAlleleReadCount + 1) / (responsibilityWeightedTotalReadCount + 2));
}
return result;
}
use of org.apache.commons.math3.exception.MaxCountExceededException in project gatk-protected by broadinstitute.
the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
* <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
* <p>Returns null if no pass filtering. Please note that in these cases,
* in the rest of this class, we use this to assume that a GMM is not a good model.</p>
*
* @param segments -- segments with segment mean in log2 copy ratio space
* @param targets -- targets with a log2 copy ratio estimate
* @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
* in the evaluation
* @param numComponents -- number of components to use in the GMM. Usually, this is 2.
* @return never {@code null}. Fitting result with indications whether it converged or was even attempted.
*/
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
// For each target in a segment that contains enough targets, normalize the difference against the segment mean
// and collapse the filtered targets into the copy ratio estimates.
final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
if (filteredTargetsSegDiff.size() < numComponents) {
return new MixtureMultivariateNormalFitResult(null, false, false);
}
// Assume that Apache Commons wants data points in the first dimension.
// Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
// Convert the filtered targets into 2d array (even if second dimension is length 1). The second dimension is
// uncorrelated Gaussian. This is only to get around funny API in Apache Commons, which will throw an
// exception if the length of the second dimension is < 2
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
filteredTargetsSegDiff2d[i][1] = nd.sample();
}
final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
try {
multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
} catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
// did not converge. Include the model as it was when the exception was thrown.
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
}
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
use of org.apache.commons.math3.exception.MaxCountExceededException in project gatk by broadinstitute.
the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
* <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
* <p>Returns null if no pass filtering. Please note that in these cases,
* in the rest of this class, we use this to assume that a GMM is not a good model.</p>
*
* @param segments -- segments with segment mean in log2 copy ratio space
* @param targets -- targets with a log2 copy ratio estimate
* @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
* in the evaluation
* @param numComponents -- number of components to use in the GMM. Usually, this is 2.
* @return never {@code null}. Fitting result with indications whether it converged or was even attempted.
*/
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
// For each target in a segment that contains enough targets, normalize the difference against the segment mean
// and collapse the filtered targets into the copy ratio estimates.
final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
if (filteredTargetsSegDiff.size() < numComponents) {
return new MixtureMultivariateNormalFitResult(null, false, false);
}
// Assume that Apache Commons wants data points in the first dimension.
// Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
// Convert the filtered targets into 2d array (even if second dimension is length 1). The second dimension is
// uncorrelated Gaussian. This is only to get around funny API in Apache Commons, which will throw an
// exception if the length of the second dimension is < 2
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
filteredTargetsSegDiff2d[i][1] = nd.sample();
}
final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
try {
multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
} catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
// did not converge. Include the model as it was when the exception was thrown.
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
}
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
Aggregations