use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class CoveragePoNQCUtils method getContigToMedianCRMap.
@VisibleForTesting
static Map<String, Double> getContigToMedianCRMap(final ReadCountCollection readCountCollection) {
final List<String> allContigsPresent = retrieveAllContigsPresent(readCountCollection);
final Map<String, Double> contigToMedian = new LinkedHashMap<>();
for (String contig : allContigsPresent) {
final ReadCountCollection oneContigReadCountCollection = readCountCollection.subsetTargets(readCountCollection.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
final double[] flatCounts = Doubles.concat(oneContigReadCountCollection.counts().getData());
// Put into CRSpace
final double[] flatCountsInCRSpace = DoubleStream.of(flatCounts).map(d -> Math.pow(2, d)).toArray();
contigToMedian.put(contig, new Median().evaluate(flatCountsInCRSpace));
}
return contigToMedian;
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk-protected by broadinstitute.
the class PCATangentNormalizationUtils method tangentNormalizeNonSpark.
/**
* Tangent normalize given the raw PoN data without using Spark.
*/
private static PCATangentNormalizationResult tangentNormalizeNonSpark(final ReadCountCollection targetFactorNormalizedCounts, final RealMatrix reducedPanelCounts, final RealMatrix reducedPanelPInvCounts, final CaseToPoNTargetMapper targetMapper, final RealMatrix tangentNormalizationInputCounts) {
// Calculate the beta-hats for the input read count columns (samples).
logger.info("Calculating beta hats...");
final RealMatrix tangentBetaHats = calculateBetaHats(reducedPanelPInvCounts, tangentNormalizationInputCounts, EPSILON);
// Actual tangent normalization step.
logger.info("Performing actual tangent normalization (" + tangentNormalizationInputCounts.getColumnDimension() + " columns)...");
final RealMatrix tangentNormalizedCounts = tangentNormalize(reducedPanelCounts, tangentNormalizationInputCounts, tangentBetaHats);
// Output the tangent normalized counts.
logger.info("Post-processing tangent normalization results...");
final ReadCountCollection tangentNormalized = targetMapper.fromPoNtoCaseCountCollection(tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
final ReadCountCollection preTangentNormalized = targetMapper.fromPoNtoCaseCountCollection(tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());
return new PCATangentNormalizationResult(tangentNormalized, preTangentNormalized, tangentBetaHats, targetFactorNormalizedCounts);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class GCCorrector method correctCoverage.
/**
*
* @param inputCounts raw coverage before GC correction
* @param gcContentByTarget array of gc contents, one per target of the input
* @return GC-corrected coverage
*/
public static ReadCountCollection correctCoverage(final ReadCountCollection inputCounts, final double[] gcContentByTarget) {
// each column (sample) has its own GC bias curve, hence its own GC corrector
final List<GCCorrector> gcCorrectors = IntStream.range(0, inputCounts.columnNames().size()).mapToObj(n -> new GCCorrector(gcContentByTarget, inputCounts.counts().getColumnVector(n))).collect(Collectors.toList());
// gc correct a copy of the input counts in-place
final RealMatrix correctedCounts = inputCounts.counts().copy();
correctedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(int target, int column, double coverage) {
return gcCorrectors.get(column).correctedCoverage(coverage, gcContentByTarget[target]);
}
});
// we would like the average correction factor to be 1.0 in the sense that average coverage before and after
// correction should be equal
final double[] columnNormalizationFactors = IntStream.range(0, inputCounts.columnNames().size()).mapToDouble(c -> inputCounts.counts().getColumnVector(c).getL1Norm() / correctedCounts.getColumnVector(c).getL1Norm()).toArray();
correctedCounts.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
@Override
public double visit(int target, int column, double coverage) {
return coverage * columnNormalizationFactors[column];
}
});
return new ReadCountCollection(inputCounts.targets(), inputCounts.columnNames(), correctedCounts);
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class CoveragePoNQCUtils method hasSuspiciousContigs.
/**
* Given a single sample tangent normalization (or other coverage profile), determine whether any contig looks like
* it has an arm level event (defined as 25% (or more) of the contig amplified/deleted)
*
* @param singleSampleTangentNormalized Tangent normalized data for a single sample.
* @return never {@code null}
*/
private static Boolean hasSuspiciousContigs(final ReadCountCollection singleSampleTangentNormalized, final Map<String, Double> contigToMedian) {
final List<String> allContigsPresent = retrieveAllContigsPresent(singleSampleTangentNormalized);
for (String contig : allContigsPresent) {
final ReadCountCollection oneContigReadCountCollection = singleSampleTangentNormalized.subsetTargets(singleSampleTangentNormalized.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
final RealVector counts = oneContigReadCountCollection.counts().getColumnVector(0);
for (int i = 0; i < 4; i++) {
final RealVector partitionCounts = counts.getSubVector(i * counts.getDimension() / 4, counts.getDimension() / 4);
final double[] partitionArray = DoubleStream.of(partitionCounts.toArray()).map(d -> Math.pow(2, d)).sorted().toArray();
double median = new Median().evaluate(partitionArray);
final double medianShiftInCRSpace = contigToMedian.getOrDefault(contig, 1.0) - 1.0;
median -= medianShiftInCRSpace;
if ((median > AMP_THRESHOLD) || (median < DEL_THRESHOLD)) {
logger.info("Suspicious contig: " + singleSampleTangentNormalized.columnNames().get(0) + " " + contig + " (" + median + " -- " + i + ")");
return true;
}
}
}
return false;
}
use of org.broadinstitute.hellbender.tools.exome.ReadCountCollection in project gatk by broadinstitute.
the class CoveragePoNQCUtils method getContigToMedianCRMap.
@VisibleForTesting
static Map<String, Double> getContigToMedianCRMap(final ReadCountCollection readCountCollection) {
final List<String> allContigsPresent = retrieveAllContigsPresent(readCountCollection);
final Map<String, Double> contigToMedian = new LinkedHashMap<>();
for (String contig : allContigsPresent) {
final ReadCountCollection oneContigReadCountCollection = readCountCollection.subsetTargets(readCountCollection.targets().stream().filter(t -> t.getContig().equals(contig)).collect(Collectors.toSet()));
final double[] flatCounts = Doubles.concat(oneContigReadCountCollection.counts().getData());
// Put into CRSpace
final double[] flatCountsInCRSpace = DoubleStream.of(flatCounts).map(d -> Math.pow(2, d)).toArray();
contigToMedian.put(contig, new Median().evaluate(flatCountsInCRSpace));
}
return contigToMedian;
}
Aggregations