use of org.apache.commons.math3.distribution.NormalDistribution in project gatk by broadinstitute.
the class CoverageDropoutDetectorTest method getUnivariateGaussianTargetsWithDropout.
private Object[][] getUnivariateGaussianTargetsWithDropout(final double sigma, final double dropoutRate) {
Random rng = new Random(337);
final RandomGenerator randomGenerator = RandomGeneratorFactory.createRandomGenerator(rng);
NormalDistribution n = new NormalDistribution(randomGenerator, 1, sigma);
final int numDataPoints = 10000;
final int numEventPoints = 2000;
// Randomly select dropoutRate of targets and reduce by 25%-75% (uniformly distributed)
UniformRealDistribution uniformRealDistribution = new UniformRealDistribution(randomGenerator, 0, 1.0);
final List<ReadCountRecord.SingleSampleRecord> targetList = new ArrayList<>();
for (int i = 0; i < numDataPoints; i++) {
double coverage = n.sample() + (i < (numDataPoints - numEventPoints) ? 0.0 : 0.5);
if (uniformRealDistribution.sample() < dropoutRate) {
double multiplier = .25 + uniformRealDistribution.sample() / 2;
coverage = coverage * multiplier;
}
targetList.add(new ReadCountRecord.SingleSampleRecord(new Target("arbitrary_name", new SimpleInterval("chr1", 100 + 2 * i, 101 + 2 * i)), coverage));
}
HashedListTargetCollection<ReadCountRecord.SingleSampleRecord> targets = new HashedListTargetCollection<>(targetList);
List<ModeledSegment> segments = new ArrayList<>();
segments.add(new ModeledSegment(new SimpleInterval("chr1", 100, 16050), 8000, 1));
segments.add(new ModeledSegment(new SimpleInterval("chr1", 16100, 20200), 2000, 1.5));
return new Object[][] { { targets, segments } };
}
use of org.apache.commons.math3.distribution.NormalDistribution in project gatk by broadinstitute.
the class CNLOHCaller method calculateDoubleGaussian.
/**
* This function takes two half-Gaussian distributions and uses one for the values below the mode and the other for values above the
* mode.
*
* If the mode is outside the credible interval, the calculation is done as if the interval boundary is
* {@link CNLOHCaller#MIN_DIST_FROM_MODE} above/below the mode.
*
*
* @param val value to calculate the "pdf"
* @param credibleMode mode of the presumed distribution
* @param credibleLow 95% confidence interval on the low tail
* @param credibleHigh 95% confidence interval on the high tail
* @return pdf with a minimum value of {@link CNLOHCaller#MIN_L}
*/
@VisibleForTesting
static double calculateDoubleGaussian(final double val, final double credibleMode, final double credibleLow, final double credibleHigh) {
final double hiDist = Math.max(credibleHigh - credibleMode, MIN_DIST_FROM_MODE);
final double loDist = Math.max(credibleMode - credibleLow, MIN_DIST_FROM_MODE);
return new NormalDistribution(null, credibleMode, (val >= credibleMode ? hiDist : loDist) / NUM_STD_95_CONF_INTERVAL_GAUSSIAN).density(val);
}
use of org.apache.commons.math3.distribution.NormalDistribution in project gatk by broadinstitute.
the class AdaptiveMetropolisSampler method sample.
public double sample(final RandomGenerator rng, final Function<Double, Double> logPDF) {
Utils.nonNull(rng);
Utils.nonNull(logPDF);
final AbstractRealDistribution normal = new NormalDistribution(rng, 0, 1);
final double proposal = xCurrent + stepSize * normal.sample();
final double acceptanceProbability = (proposal < lowerBound || upperBound < proposal) ? 0 : Math.min(1, Math.exp(logPDF.apply(proposal) - logPDF.apply(xCurrent)));
//adjust stepSize larger/smaller to decrease/increase the acceptance rate
final double correctionFactor = (acceptanceProbability - optimalAcceptanceRate) * adjustmentRate * (timeScale / (timeScale + iteration));
stepSize *= Math.exp(correctionFactor);
iteration++;
return rng.nextDouble() < acceptanceProbability ? proposal : xCurrent;
}
use of org.apache.commons.math3.distribution.NormalDistribution in project pyramid by cheng-li.
the class MultiLabelSynthesizer method independentNoise.
/**
* y0: w=(0,1)
* y1: w=(1,1)
* y2: w=(1,0)
* y3: w=(1,-1)
* @return
*/
public static MultiLabelClfDataSet independentNoise() {
int numData = 10000;
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[] weights = new Vector[numClass];
for (int k = 0; k < numClass; k++) {
Vector vector = new DenseVector(numFeature);
weights[k] = vector;
}
weights[0].set(0, 0);
weights[0].set(1, 1);
weights[1].set(0, 1);
weights[1].set(1, 1);
weights[2].set(0, 1);
weights[2].set(1, 0);
weights[3].set(0, 1);
weights[3].set(1, -1);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
NormalDistribution[] noises = new NormalDistribution[4];
noises[0] = new NormalDistribution(0, 0.1);
noises[1] = new NormalDistribution(0, 0.1);
noises[2] = new NormalDistribution(0, 0.1);
noises[3] = new NormalDistribution(0, 0.1);
// assign labels
int numFlipped = 0;
for (int i = 0; i < numData; i++) {
for (int k = 0; k < numClass; k++) {
double dot = weights[k].dot(dataSet.getRow(i));
double score = dot + noises[k].sample();
if (score >= 0) {
dataSet.addLabel(i, k);
}
if (dot * score < 0) {
numFlipped += 1;
}
}
}
System.out.println("number of flipped = " + numFlipped);
return dataSet;
}
use of org.apache.commons.math3.distribution.NormalDistribution in project pyramid by cheng-li.
the class RegressionSynthesizer method univarNormal.
public RegDataSet univarNormal() {
NormalDistribution normalDistribution = new NormalDistribution(0, 1);
RegDataSet dataSet = RegDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(1).dense(true).missingValue(false).build();
for (int i = 0; i < numDataPoints; i++) {
double featureValue = Sampling.doubleUniform(-1, 1);
double label;
label = normalDistribution.density(featureValue);
label += noise.sample();
dataSet.setFeatureValue(i, 0, featureValue);
dataSet.setLabel(i, label);
}
return dataSet;
}
Aggregations