use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method subsetReadCountsToUsableTargets.
/**
* Subsets targets in the input count to the usable ones based on the percentile threshold indicated
* by the user.
*
* <p>
* It returns a pair of object, where the left one is the updated read-counts with only the usable
* targets, and the right one is the corresponding target factors.
* </p>
*
* @param readCounts the input read-counts.
* @param targetFactorPercentileThreshold the minimum median count percentile under which targets are not considered useful.
* @return never {@code null}.
*/
@VisibleForTesting
static Pair<ReadCountCollection, double[]> subsetReadCountsToUsableTargets(final ReadCountCollection readCounts, final double targetFactorPercentileThreshold, final Logger logger) {
final double[] targetFactors = calculateTargetFactors(readCounts);
final double threshold = new Percentile(targetFactorPercentileThreshold).evaluate(targetFactors);
final List<Target> targetByIndex = readCounts.targets();
final Set<Target> result = IntStream.range(0, targetFactors.length).filter(i -> targetFactors[i] >= threshold).mapToObj(targetByIndex::get).collect(Collectors.toCollection(LinkedHashSet::new));
if (result.size() == targetByIndex.size()) {
logger.info(String.format("All %d targets are kept", targetByIndex.size()));
return new ImmutablePair<>(readCounts, targetFactors);
} else {
final int discardedCount = targetFactors.length - result.size();
logger.info(String.format("Discarded %d target(s) out of %d with factors below %.2g (%.2f percentile)", discardedCount, targetFactors.length, threshold, targetFactorPercentileThreshold));
final double[] targetFactorSubset = DoubleStream.of(targetFactors).filter(i -> i >= threshold).toArray();
return new ImmutablePair<>(readCounts.subsetTargets(result), targetFactorSubset);
}
}
use of org.apache.commons.math3.util.Pair in project gatk by broadinstitute.
the class CNLOHCaller method calcNewRhos.
private double[] calcNewRhos(final List<ACNVModeledSegment> segments, final List<double[][][]> responsibilitiesBySeg, final double lambda, final double[] rhos, final int[] mVals, final int[] nVals, final JavaSparkContext ctx) {
// Since, we pass in the entire responsibilities matrix, we need the correct index for each rho. That, and the
// fact that this is a univariate objective function, means we need to create an instance for each rho. And
// then we blast across Spark.
final List<Pair<? extends Function<Double, Double>, SearchInterval>> objectives = IntStream.range(0, rhos.length).mapToObj(i -> new Pair<>(new Function<Double, Double>() {
@Override
public Double apply(Double rho) {
return calculateESmnObjective(rho, segments, responsibilitiesBySeg, mVals, nVals, lambda, i);
}
}, new SearchInterval(0.0, 1.0, rhos[i]))).collect(Collectors.toList());
final JavaRDD<Pair<? extends Function<Double, Double>, SearchInterval>> objectivesRDD = ctx.parallelize(objectives);
final List<Double> resultsAsDouble = objectivesRDD.map(objective -> optimizeIt(objective.getFirst(), objective.getSecond())).collect();
return resultsAsDouble.stream().mapToDouble(Double::doubleValue).toArray();
}
use of org.apache.commons.math3.util.Pair in project gatk by broadinstitute.
the class ArtifactStatisticsScorer method calculateArtifactPValue.
/** p-value for being an artifact
*
* @param totalAltAlleleCount total number of alt reads
* @param artifactAltAlleleCount alt read counts in a pair orientation that supports an artifact
* @param biasP believed bias p value for a binomial distribution in artifact variants
* @return p-value for this being an artifact
*/
public static double calculateArtifactPValue(final int totalAltAlleleCount, final int artifactAltAlleleCount, final double biasP) {
ParamUtils.isPositiveOrZero(biasP, "bias parameter must be positive or zero.");
ParamUtils.isPositiveOrZero(totalAltAlleleCount, "total alt allele count must be positive or zero.");
ParamUtils.isPositiveOrZero(artifactAltAlleleCount, "artifact supporting alt allele count must be positive or zero.");
ParamUtils.isPositiveOrZero(totalAltAlleleCount - artifactAltAlleleCount, "Total alt count must be same or greater than the artifact alt count.");
return new BinomialDistribution(null, totalAltAlleleCount, biasP).cumulativeProbability(artifactAltAlleleCount);
}
use of org.apache.commons.math3.util.Pair in project gatk by broadinstitute.
the class HDF5PCACoveragePoNCreationUtils method subsetReadCountsToUsableTargets.
/**
* Subsets targets in the input count to the usable ones based on the percentile threshold indicated
* by the user.
*
* <p>
* It returns a pair of object, where the left one is the updated read-counts with only the usable
* targets, and the right one is the corresponding target factors.
* </p>
*
* @param readCounts the input read-counts.
* @param targetFactorPercentileThreshold the minimum median count percentile under which targets are not considered useful.
* @return never {@code null}.
*/
@VisibleForTesting
static Pair<ReadCountCollection, double[]> subsetReadCountsToUsableTargets(final ReadCountCollection readCounts, final double targetFactorPercentileThreshold, final Logger logger) {
final double[] targetFactors = calculateTargetFactors(readCounts);
final double threshold = new Percentile(targetFactorPercentileThreshold).evaluate(targetFactors);
final List<Target> targetByIndex = readCounts.targets();
final Set<Target> result = IntStream.range(0, targetFactors.length).filter(i -> targetFactors[i] >= threshold).mapToObj(targetByIndex::get).collect(Collectors.toCollection(LinkedHashSet::new));
if (result.size() == targetByIndex.size()) {
logger.info(String.format("All %d targets are kept", targetByIndex.size()));
return new ImmutablePair<>(readCounts, targetFactors);
} else {
final int discardedCount = targetFactors.length - result.size();
logger.info(String.format("Discarded %d target(s) out of %d with factors below %.2g (%.2f percentile)", discardedCount, targetFactors.length, threshold, targetFactorPercentileThreshold));
final double[] targetFactorSubset = DoubleStream.of(targetFactors).filter(i -> i >= threshold).toArray();
return new ImmutablePair<>(readCounts.subsetTargets(result), targetFactorSubset);
}
}
use of org.apache.commons.math3.util.Pair in project gatk-protected by broadinstitute.
the class CoverageModelEMWorkspace method updateCopyRatioPosteriorExpectationsLocal.
/**
* Local implementation of the E-step update of copy ratio posteriors
*
* @return a {@link SubroutineSignal} containing the update size (key: "error_norm")
*/
public SubroutineSignal updateCopyRatioPosteriorExpectationsLocal(final double admixingRatio) {
/* step 1. fetch copy ratio emission data */
final List<List<CoverageModelCopyRatioEmissionData>> copyRatioEmissionData = fetchCopyRatioEmissionDataLocal();
/* step 2. run the forward-backward algorithm and calculate copy ratio posteriors */
final INDArray sampleReadDepths = Transforms.exp(sampleMeanLogReadDepths, true);
final List<CopyRatioExpectations> copyRatioPosteriorResults = sampleIndexStream().parallel().mapToObj(si -> copyRatioExpectationsCalculator.getCopyRatioPosteriorExpectations(CopyRatioCallingMetadata.builder().sampleName(processedSampleNameList.get(si)).sampleSexGenotypeData(processedSampleSexGenotypeData.get(si)).sampleCoverageDepth(sampleReadDepths.getDouble(si)).emissionCalculationStrategy(EmissionCalculationStrategy.HYBRID_POISSON_GAUSSIAN).build(), processedTargetList, copyRatioEmissionData.get(si))).collect(Collectors.toList());
/* update log chain posterior expectation */
sampleLogChainPosteriors.assign(Nd4j.create(copyRatioPosteriorResults.stream().mapToDouble(CopyRatioExpectations::getLogChainPosteriorProbability).toArray(), new int[] { numSamples, 1 }));
/* sent the results back to workers */
final ImmutablePair<INDArray, INDArray> copyRatioPosteriorDataPair = convertCopyRatioLatentPosteriorExpectationsToNDArray(copyRatioPosteriorResults);
final INDArray log_c_st = copyRatioPosteriorDataPair.left;
final INDArray var_log_c_st = copyRatioPosteriorDataPair.right;
/* partition the pair of (log_c_st, var_log_c_st), sent the result to workers via broadcast-hash-map */
pushToWorkers(mapINDArrayPairToBlocks(log_c_st.transpose(), var_log_c_st.transpose()), (p, cb) -> cb.cloneWithUpdatedCopyRatioPosteriors(p.get(cb.getTargetSpaceBlock()).left.transpose(), p.get(cb.getTargetSpaceBlock()).right.transpose(), admixingRatio));
cacheWorkers("after E-step update of copy ratio posteriors");
/* collect subroutine signals */
final List<SubroutineSignal> sigs = mapWorkersAndCollect(CoverageModelEMComputeBlock::getLatestMStepSignal);
final double errorNormInfinity = Collections.max(sigs.stream().map(sig -> sig.<Double>get(StandardSubroutineSignals.RESIDUAL_ERROR_NORM)).collect(Collectors.toList()));
return SubroutineSignal.builder().put(StandardSubroutineSignals.RESIDUAL_ERROR_NORM, errorNormInfinity).build();
}
Aggregations