use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk by broadinstitute.
the class AlleleFractionInitializer method initialMinorFractions.
/**
* Initialize minor fractions assuming no allelic bias <p></p>
*
* We integrate over f to get posterior probabilities (responsibilities) of alt / ref minor
* that is, responsibility of alt minor is int_{0 to 1/2} f^a (1-f)^r df
* responsibility of ref minor is int_{0 to 1/2} f^r (1-f)^a df
* these are proportional to I(1/2, a + 1, r + 1) and I(1/2, r + 1, a + 1),
* respectively, where I is the (incomplete) regularized Beta function.
* By definition these likelihoods sum to 1, ie they are already normalized. <p></p>
*
* Finally, we set each minor fraction to the responsibility-weighted total count of
* reads in minor allele divided by total reads, ignoring outliers.
*/
private AlleleFractionState.MinorFractions initialMinorFractions(final AlleleFractionData data) {
final int numSegments = data.getNumSegments();
final AlleleFractionState.MinorFractions result = new AlleleFractionState.MinorFractions(numSegments);
for (int segment = 0; segment < numSegments; segment++) {
double responsibilityWeightedMinorAlleleReadCount = 0.0;
double responsibilityWeightedTotalReadCount = 0.0;
for (final AllelicCount count : data.getCountsInSegment(segment)) {
final int a = count.getAltReadCount();
final int r = count.getRefReadCount();
double altMinorResponsibility;
try {
altMinorResponsibility = Beta.regularizedBeta(0.5, a + 1, r + 1);
} catch (final MaxCountExceededException e) {
//if the special function can't be computed, give an all-or-nothing responsibility
altMinorResponsibility = a < r ? 1.0 : 0.0;
}
responsibilityWeightedMinorAlleleReadCount += altMinorResponsibility * a + (1 - altMinorResponsibility) * r;
responsibilityWeightedTotalReadCount += a + r;
}
// we achieve a flat prior via a single pseudocount for minor and non-minor reads, hence the +1 and +2
result.add((responsibilityWeightedMinorAlleleReadCount + 1) / (responsibilityWeightedTotalReadCount + 2));
}
return result;
}
use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk by broadinstitute.
the class ReCapSegCallerUnitTest method testMakeCalls.
@Test
public void testMakeCalls() {
final List<Target> targets = new ArrayList<>();
final List<String> columnNames = Arrays.asList("Sample");
final List<Double> coverage = new ArrayList<>();
//add amplification targets
for (int i = 0; i < 10; i++) {
final SimpleInterval interval = new SimpleInterval("chr", 100 + 2 * i, 101 + 2 * i);
targets.add(new Target(interval));
coverage.add(ParamUtils.log2(2.0));
}
//add deletion targets
for (int i = 0; i < 10; i++) {
final SimpleInterval interval = new SimpleInterval("chr", 300 + 2 * i, 301 + 2 * i);
targets.add(new Target(interval));
coverage.add(ParamUtils.log2(0.5));
}
//add targets that don't belong to a segment
for (int i = 1; i < 10; i++) {
final SimpleInterval interval = new SimpleInterval("chr", 400 + 2 * i, 401 + 2 * i);
targets.add(new Target(interval));
coverage.add(ParamUtils.log2(1.0));
}
//add obviously neutral targets with some small spread
for (int i = -5; i < 6; i++) {
final SimpleInterval interval = new SimpleInterval("chr", 500 + 2 * i, 501 + 2 * i);
targets.add(new Target(interval));
coverage.add(ParamUtils.log2(0.01 * i + 1));
}
//add spread-out targets to a neutral segment (mean near zero)
for (int i = -5; i < 6; i++) {
final SimpleInterval interval = new SimpleInterval("chr", 700 + 2 * i, 701 + 2 * i);
targets.add(new Target(interval));
coverage.add(ParamUtils.log2(0.1 * i + 1));
}
final RealMatrix coverageMatrix = new Array2DRowRealMatrix(targets.size(), 1);
coverageMatrix.setColumn(0, coverage.stream().mapToDouble(x -> x).toArray());
final int n = targets.size();
final int m = coverageMatrix.getRowDimension();
final ReadCountCollection counts = new ReadCountCollection(targets, columnNames, coverageMatrix);
List<ModeledSegment> segments = new ArrayList<>();
//amplification
segments.add(new ModeledSegment(new SimpleInterval("chr", 100, 200), 100, ParamUtils.log2(2.0)));
//deletion
segments.add(new ModeledSegment(new SimpleInterval("chr", 300, 400), 100, ParamUtils.log2(0.5)));
//neutral
segments.add(new ModeledSegment(new SimpleInterval("chr", 450, 550), 100, ParamUtils.log2(1)));
//neutral
segments.add(new ModeledSegment(new SimpleInterval("chr", 650, 750), 100, ParamUtils.log2(1)));
List<ModeledSegment> calls = ReCapSegCaller.makeCalls(counts, segments);
Assert.assertEquals(calls.get(0).getCall(), ReCapSegCaller.AMPLIFICATION_CALL);
Assert.assertEquals(calls.get(1).getCall(), ReCapSegCaller.DELETION_CALL);
Assert.assertEquals(calls.get(2).getCall(), ReCapSegCaller.NEUTRAL_CALL);
Assert.assertEquals(calls.get(3).getCall(), ReCapSegCaller.NEUTRAL_CALL);
}
use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk by broadinstitute.
the class SegmentUtilsUnitTest method testUnionSegments.
/**
* Test for {@link SegmentUtils#unionSegments}. Expected behavior:
* <p>
* On chr1 {@link SegmentUtils#collectBreakpointsByContig} gives:
* </p>
* <p>
* 1, 5, 10, 20, 40, 40, 42, 90, 91, 115, 125, 140.
* </p>
* <p>
* Then {@link SegmentUtils#constructUntrimmedSegments} finds the segments:
* </p>
* <p>
* [1, 4], [5, 10], [11, 19], [20, 40], [41, 41], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
* </p>
* <p>
* and returns the non-empty segments:
* </p>
* <p>
* [1, 4], [5, 10], [20, 40], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
* </p>
* <p>
* Then {@link SegmentUtils#mergeSpuriousStartsAndEnds} merges the last segment left to form [115, 140],
* and {@link SegmentMergeUtils#mergeSpuriousMiddles} randomly merges segment [92, 114] left or right.
* </p>
* <p>
* Finally, {@link SegmentUtils#trimInterval} gives:
* </p>
* <p>
* [1, 10], [20, 40], [42, 42], [90, 114], [115, 140] (if [92, 114] merged left) or
* </p>
* <p>
* [1, 10], [20, 40], [42, 42], [90, 91], [92, 140] (if [92, 114] merged right)
* </p>
* <p>
* The remaining empty segment on chr2 is retained.
*/
@Test
public void testUnionSegments() {
final String sampleName = "placeholder_sample_name";
final List<Target> targets = new ArrayList<Target>();
targets.add(new Target("t1", new SimpleInterval("chr1", 1, 10)));
targets.add(new Target("t2", new SimpleInterval("chr1", 20, 30)));
targets.add(new Target("t3", new SimpleInterval("chr1", 31, 40)));
targets.add(new Target("t4", new SimpleInterval("chr1", 90, 100)));
targets.add(new Target("t5", new SimpleInterval("chr1", 110, 120)));
targets.add(new Target("t6", new SimpleInterval("chr1", 130, 140)));
final RealMatrix zeroCoverageMatrix = new Array2DRowRealMatrix(targets.size(), 1);
final ReadCountCollection counts = new ReadCountCollection(targets, Collections.singletonList(sampleName), zeroCoverageMatrix);
final AllelicCount snp1 = new AllelicCount(new SimpleInterval("chr1", 5, 5), 0, 1);
final AllelicCount snp2 = new AllelicCount(new SimpleInterval("chr1", 40, 40), 0, 1);
final AllelicCount snp3 = new AllelicCount(new SimpleInterval("chr1", 42, 42), 0, 1);
final AllelicCount snp4 = new AllelicCount(new SimpleInterval("chr1", 91, 91), 0, 1);
final AllelicCount snp5 = new AllelicCount(new SimpleInterval("chr1", 115, 115), 0, 1);
final AllelicCount snp6 = new AllelicCount(new SimpleInterval("chr1", 125, 125), 0, 1);
final AllelicCount snp7 = new AllelicCount(new SimpleInterval("chr2", 10, 10), 0, 1);
final List<AllelicCount> snps = Arrays.asList(snp1, snp2, snp3, snp4, snp5, snp6, snp7);
final List<SimpleInterval> targetSegments = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 90, 140));
final List<SimpleInterval> snpSegments = Arrays.asList(new SimpleInterval("chr1", 5, 40), new SimpleInterval("chr1", 42, 91), new SimpleInterval("chr1", 115, 125), new SimpleInterval("chr2", 10, 10));
final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, new Genome(counts, snps));
final List<SimpleInterval> expectedLeft = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 114), new SimpleInterval("chr1", 115, 140), new SimpleInterval("chr2", 10, 10));
final List<SimpleInterval> expectedRight = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 91), new SimpleInterval("chr1", 92, 140), new SimpleInterval("chr2", 10, 10));
Assert.assertTrue(unionedSegments.equals(expectedLeft) || unionedSegments.equals(expectedRight));
}
use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk-protected by broadinstitute.
the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.
@Test
public void testChromosomesOnDifferentSegments() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final double trueMemoryLength = 1e5;
final double trueStandardDeviation = 0.2;
// randomly set positions
final int chainLength = 100;
final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
//fix everything to the same state 2
final int trueState = 2;
final List<Double> data = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final double copyRatio = trueLog2CopyRatios[trueState];
final double observed = generateData(trueStandardDeviation, copyRatio, rng);
data.add(observed);
}
final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
//check that each chromosome has at least one segment
final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
Assert.assertEquals(numDifferentContigsInSegments, 3);
}
use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk-protected by broadinstitute.
the class AlleleFractionInitializer method initialMinorFractions.
/**
* Initialize minor fractions assuming no allelic bias <p></p>
*
* We integrate over f to get posterior probabilities (responsibilities) of alt / ref minor
* that is, responsibility of alt minor is int_{0 to 1/2} f^a (1-f)^r df
* responsibility of ref minor is int_{0 to 1/2} f^r (1-f)^a df
* these are proportional to I(1/2, a + 1, r + 1) and I(1/2, r + 1, a + 1),
* respectively, where I is the (incomplete) regularized Beta function.
* By definition these likelihoods sum to 1, ie they are already normalized. <p></p>
*
* Finally, we set each minor fraction to the responsibility-weighted total count of
* reads in minor allele divided by total reads, ignoring outliers.
*/
private AlleleFractionState.MinorFractions initialMinorFractions(final AlleleFractionData data) {
final int numSegments = data.getNumSegments();
final AlleleFractionState.MinorFractions result = new AlleleFractionState.MinorFractions(numSegments);
for (int segment = 0; segment < numSegments; segment++) {
double responsibilityWeightedMinorAlleleReadCount = 0.0;
double responsibilityWeightedTotalReadCount = 0.0;
for (final AllelicCount count : data.getCountsInSegment(segment)) {
final int a = count.getAltReadCount();
final int r = count.getRefReadCount();
double altMinorResponsibility;
try {
altMinorResponsibility = Beta.regularizedBeta(0.5, a + 1, r + 1);
} catch (final MaxCountExceededException e) {
//if the special function can't be computed, give an all-or-nothing responsibility
altMinorResponsibility = a < r ? 1.0 : 0.0;
}
responsibilityWeightedMinorAlleleReadCount += altMinorResponsibility * a + (1 - altMinorResponsibility) * r;
responsibilityWeightedTotalReadCount += a + r;
}
// we achieve a flat prior via a single pseudocount for minor and non-minor reads, hence the +1 and +2
result.add((responsibilityWeightedMinorAlleleReadCount + 1) / (responsibilityWeightedTotalReadCount + 2));
}
return result;
}
Aggregations