use of org.apache.commons.math3.util.Pair in project gatk by broadinstitute.
the class GCBiasSimulatedData method simulatedData.
// visible for the integration test
public static Pair<ReadCountCollection, double[]> simulatedData(final int numTargets, final int numSamples) {
final List<Target> phonyTargets = SimulatedTargets.phonyTargets(numTargets);
final List<String> phonySamples = SimulatedSamples.phonySamples(numSamples);
final Random random = new Random(13);
final double[] gcContentByTarget = IntStream.range(0, numTargets).mapToDouble(n -> 0.5 + 0.2 * random.nextGaussian()).map(x -> Math.min(x, 0.95)).map(x -> Math.max(x, 0.05)).toArray();
final double[] gcBiasByTarget = Arrays.stream(gcContentByTarget).map(QUADRATIC_GC_BIAS_CURVE::apply).toArray();
// model mainly GC bias with a small random amount of non-GC bias
// thus noise after GC correction should be nearly zero
final RealMatrix counts = new Array2DRowRealMatrix(numTargets, numSamples);
counts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(final int target, final int column, final double value) {
return gcBiasByTarget[target] * (1.0 + 0.01 * random.nextDouble());
}
});
final ReadCountCollection rcc = new ReadCountCollection(phonyTargets, phonySamples, counts);
return new ImmutablePair<>(rcc, gcContentByTarget);
}
use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.
the class CNLOHCaller method calcNewRhos.
private double[] calcNewRhos(final List<ACNVModeledSegment> segments, final List<double[][][]> responsibilitiesBySeg, final double lambda, final double[] rhos, final int[] mVals, final int[] nVals, final JavaSparkContext ctx) {
// Since, we pass in the entire responsibilities matrix, we need the correct index for each rho. That, and the
// fact that this is a univariate objective function, means we need to create an instance for each rho. And
// then we blast across Spark.
final List<Pair<? extends Function<Double, Double>, SearchInterval>> objectives = IntStream.range(0, rhos.length).mapToObj(i -> new Pair<>(new Function<Double, Double>() {
@Override
public Double apply(Double rho) {
return calculateESmnObjective(rho, segments, responsibilitiesBySeg, mVals, nVals, lambda, i);
}
}, new SearchInterval(0.0, 1.0, rhos[i]))).collect(Collectors.toList());
final JavaRDD<Pair<? extends Function<Double, Double>, SearchInterval>> objectivesRDD = ctx.parallelize(objectives);
final List<Double> resultsAsDouble = objectivesRDD.map(objective -> optimizeIt(objective.getFirst(), objective.getSecond())).collect();
return resultsAsDouble.stream().mapToDouble(Double::doubleValue).toArray();
}
use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.
the class ArtifactStatisticsScorer method calculateArtifactPValue.
/** p-value for being an artifact
*
* @param totalAltAlleleCount total number of alt reads
* @param artifactAltAlleleCount alt read counts in a pair orientation that supports an artifact
* @param biasP believed bias p value for a binomial distribution in artifact variants
* @return p-value for this being an artifact
*/
public static double calculateArtifactPValue(final int totalAltAlleleCount, final int artifactAltAlleleCount, final double biasP) {
ParamUtils.isPositiveOrZero(biasP, "bias parameter must be positive or zero.");
ParamUtils.isPositiveOrZero(totalAltAlleleCount, "total alt allele count must be positive or zero.");
ParamUtils.isPositiveOrZero(artifactAltAlleleCount, "artifact supporting alt allele count must be positive or zero.");
ParamUtils.isPositiveOrZero(totalAltAlleleCount - artifactAltAlleleCount, "Total alt count must be same or greater than the artifact alt count.");
return new BinomialDistribution(null, totalAltAlleleCount, biasP).cumulativeProbability(artifactAltAlleleCount);
}
use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.
the class TargetCoverageSexGenotypeCalculator method processReadCountsAndTargets.
/**
* Processes raw read counts and targets:
* <dl>
* <dt> If more than one sample is present in the collection, filters out fully uncovered targets
* from read counts and removes the uncovered targets from the target list</dt>
*
* <dt> Otherwise, does nothing and warns the user
* </dt>
* </dl>
*
* @param rawReadCounts raw read count collection
* @param targetList user provided target list
* @return pair of processed read counts and targets
*/
private ImmutablePair<ReadCountCollection, List<Target>> processReadCountsAndTargets(@Nonnull final ReadCountCollection rawReadCounts, @Nonnull final List<Target> targetList) {
final ReadCountCollection finalReadCounts;
final List<Target> finalTargetList;
/* remove totally uncovered targets */
if (rawReadCounts.columnNames().size() > 1) {
finalReadCounts = ReadCountCollectionUtils.removeTotallyUncoveredTargets(rawReadCounts, logger);
final Set<Target> targetSetFromProcessedReadCounts = new HashSet<>(finalReadCounts.targets());
finalTargetList = targetList.stream().filter(targetSetFromProcessedReadCounts::contains).collect(Collectors.toList());
} else {
final long numUncoveredTargets = rawReadCounts.records().stream().filter(rec -> (int) rec.getDouble(0) == 0).count();
final long numAllTargets = rawReadCounts.targets().size();
logger.info("Since only one sample is given for genotyping, the user is responsible for asserting" + " the aptitude of targets. Fully uncovered (irrelevant) targets can not be automatically" + " identified (total targets: " + numAllTargets + ", uncovered targets: " + numUncoveredTargets + ")");
finalReadCounts = rawReadCounts;
finalTargetList = targetList;
}
return ImmutablePair.of(finalReadCounts, finalTargetList);
}
use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.
the class HetPulldownCalculator method isPileupHetCompatible.
/**
* Returns true if the distribution of major and other base-pair counts from a pileup at a locus is compatible with
* allele fraction of 0.5.
*
* <p>
* Compatibility is defined by a p-value threshold. That is, compute the two-sided p-value of observing
* a number of major read counts out of a total number of reads, assuming the given heterozygous
* allele fraction. If the p-value is less than the given threshold, then reject the null hypothesis
* that the heterozygous allele fraction is 0.5 (i.e., SNP is likely to be homozygous) and return false,
* otherwise return true.
* </p>
* @param baseCounts base-pair counts
* @param totalBaseCount total base-pair counts (excluding N, etc.)
* @param pvalThreshold p-value threshold for two-sided binomial test (should be in [0, 1], but no check is performed)
* @return boolean compatibility with heterozygous allele fraction
*/
@VisibleForTesting
protected static boolean isPileupHetCompatible(final Nucleotide.Counter baseCounts, final int totalBaseCount, final double pvalThreshold) {
final int majorReadCount = Arrays.stream(BASES).mapToInt(b -> (int) baseCounts.get(b)).max().getAsInt();
if (majorReadCount == 0 || totalBaseCount - majorReadCount == 0) {
return false;
}
final double pval = new BinomialTest().binomialTest(totalBaseCount, majorReadCount, HET_ALLELE_FRACTION, AlternativeHypothesis.TWO_SIDED);
return pval >= pvalThreshold;
}
Aggregations