use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class SNPSegmenterUnitTest method testAllelicFractionBasedSegmentationNoSNPs.
@Test(expectedExceptions = IllegalArgumentException.class)
public void testAllelicFractionBasedSegmentationNoSNPs() {
final String sampleName = "test";
final List<AllelicCount> snpCounts = Collections.emptyList();
final TargetCollection<AllelicCount> snps = new HashedListTargetCollection<>(snpCounts);
final File resultFile = createTempFile("snp-segmenter-test-result", ".seg");
SNPSegmenter.writeSegmentFile(snps, sampleName, resultFile);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class SNPSegmenterUnitTest method testAllelicFractionBasedSegmentation.
/**
* Tests that segments are correctly determined using allelic counts from SNP sites.
* Segment-mean and target-number columns from expected segment file are not checked.
*/
@Test
public void testAllelicFractionBasedSegmentation() {
final String sampleName = "test";
final File snpFile = new File(TEST_SUB_DIR, "snps-simplified-for-allelic-fraction-segmentation.tsv");
final List<AllelicCount> snpCounts = new AllelicCountCollection(snpFile).getCounts();
final TargetCollection<AllelicCount> snps = new HashedListTargetCollection<>(snpCounts);
final File resultFile = createTempFile("snp-segmenter-test-result", ".seg");
SNPSegmenter.writeSegmentFile(snps, sampleName, resultFile);
final File expectedFile = new File(TEST_SUB_DIR, "snp-segmenter-test-expected.seg");
Assert.assertTrue(resultFile.exists(), "SNPSegmenterTest output was not written to temp file: " + resultFile);
final List<SimpleInterval> result = SegmentUtils.readIntervalsFromSegmentFile(resultFile);
final List<SimpleInterval> expected = SegmentUtils.readIntervalsFromSegmentFile(expectedFile);
Assert.assertEquals(result, expected);
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class SegmentUtilsUnitTest method testUnionSegments.
/**
* Test for {@link SegmentUtils#unionSegments}. Expected behavior:
* <p>
* On chr1 {@link SegmentUtils#collectBreakpointsByContig} gives:
* </p>
* <p>
* 1, 5, 10, 20, 40, 40, 42, 90, 91, 115, 125, 140.
* </p>
* <p>
* Then {@link SegmentUtils#constructUntrimmedSegments} finds the segments:
* </p>
* <p>
* [1, 4], [5, 10], [11, 19], [20, 40], [41, 41], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
* </p>
* <p>
* and returns the non-empty segments:
* </p>
* <p>
* [1, 4], [5, 10], [20, 40], [42, 89], [90, 91], [92, 114], [115, 125], [126, 140].
* </p>
* <p>
* Then {@link SegmentUtils#mergeSpuriousStartsAndEnds} merges the last segment left to form [115, 140],
* and {@link SegmentMergeUtils#mergeSpuriousMiddles} randomly merges segment [92, 114] left or right.
* </p>
* <p>
* Finally, {@link SegmentUtils#trimInterval} gives:
* </p>
* <p>
* [1, 10], [20, 40], [42, 42], [90, 114], [115, 140] (if [92, 114] merged left) or
* </p>
* <p>
* [1, 10], [20, 40], [42, 42], [90, 91], [92, 140] (if [92, 114] merged right)
* </p>
* <p>
* The remaining empty segment on chr2 is retained.
*/
@Test
public void testUnionSegments() {
final String sampleName = "placeholder_sample_name";
final List<Target> targets = new ArrayList<Target>();
targets.add(new Target("t1", new SimpleInterval("chr1", 1, 10)));
targets.add(new Target("t2", new SimpleInterval("chr1", 20, 30)));
targets.add(new Target("t3", new SimpleInterval("chr1", 31, 40)));
targets.add(new Target("t4", new SimpleInterval("chr1", 90, 100)));
targets.add(new Target("t5", new SimpleInterval("chr1", 110, 120)));
targets.add(new Target("t6", new SimpleInterval("chr1", 130, 140)));
final RealMatrix zeroCoverageMatrix = new Array2DRowRealMatrix(targets.size(), 1);
final ReadCountCollection counts = new ReadCountCollection(targets, Collections.singletonList(sampleName), zeroCoverageMatrix);
final AllelicCount snp1 = new AllelicCount(new SimpleInterval("chr1", 5, 5), 0, 1);
final AllelicCount snp2 = new AllelicCount(new SimpleInterval("chr1", 40, 40), 0, 1);
final AllelicCount snp3 = new AllelicCount(new SimpleInterval("chr1", 42, 42), 0, 1);
final AllelicCount snp4 = new AllelicCount(new SimpleInterval("chr1", 91, 91), 0, 1);
final AllelicCount snp5 = new AllelicCount(new SimpleInterval("chr1", 115, 115), 0, 1);
final AllelicCount snp6 = new AllelicCount(new SimpleInterval("chr1", 125, 125), 0, 1);
final AllelicCount snp7 = new AllelicCount(new SimpleInterval("chr2", 10, 10), 0, 1);
final List<AllelicCount> snps = Arrays.asList(snp1, snp2, snp3, snp4, snp5, snp6, snp7);
final List<SimpleInterval> targetSegments = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 90, 140));
final List<SimpleInterval> snpSegments = Arrays.asList(new SimpleInterval("chr1", 5, 40), new SimpleInterval("chr1", 42, 91), new SimpleInterval("chr1", 115, 125), new SimpleInterval("chr2", 10, 10));
final List<SimpleInterval> unionedSegments = SegmentUtils.unionSegments(targetSegments, snpSegments, new Genome(counts, snps));
final List<SimpleInterval> expectedLeft = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 114), new SimpleInterval("chr1", 115, 140), new SimpleInterval("chr2", 10, 10));
final List<SimpleInterval> expectedRight = Arrays.asList(new SimpleInterval("chr1", 1, 10), new SimpleInterval("chr1", 20, 40), new SimpleInterval("chr1", 42, 42), new SimpleInterval("chr1", 90, 91), new SimpleInterval("chr1", 92, 140), new SimpleInterval("chr2", 10, 10));
Assert.assertTrue(unionedSegments.equals(expectedLeft) || unionedSegments.equals(expectedRight));
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AlleleFractionLikelihoodsUnitTest method testHetLogLikelihoodMinorFractionNearZero.
//if f is very close to 0 we have an analytic result for comparison
@Test
public void testHetLogLikelihoodMinorFractionNearZero() {
//pi is just a prefactor so we don't need to test it thoroughly here
final double pi = 0.01;
for (final double f : Arrays.asList(1e-6, 1e-7, 1e-8)) {
for (final double mean : Arrays.asList(0.9, 1.0, 1.1)) {
for (final double variance : Arrays.asList(0.01, 0.005, 0.001)) {
final double alpha = mean * mean / variance;
final double beta = mean / variance;
final AlleleFractionGlobalParameters parameters = new AlleleFractionGlobalParameters(mean, variance, pi);
for (final int a : Arrays.asList(1, 2, 3)) {
//alt count
for (final int r : Arrays.asList(50, 100, 200)) {
//ref count
final AllelicCount count = new AllelicCount(DUMMY, r, a);
final double actual = AlleleFractionLikelihoods.hetLogLikelihood(parameters, f, count, AlleleFractionIndicator.ALT_MINOR);
final double expected = a * log(beta) + Gamma.logGamma(alpha - a) - Gamma.logGamma(alpha) + log((1 - pi) / 2) + a * log(f / (1 - f));
Assert.assertEquals(actual, expected, 1e-3);
}
}
}
}
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class PlotACNVResults method validateContigs.
private void validateContigs(final Map<String, Integer> contigLengthMap) {
final Set<String> contigNames = contigLengthMap.keySet();
//validate contig names and lengths in SNP counts file
final AllelicCountCollection snpCounts = new AllelicCountCollection(snpCountsFile);
final Set<String> snpCountsContigNames = snpCounts.getCounts().stream().map(AllelicCount::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(snpCountsContigNames)) {
logger.warn("Contigs present in the SNP counts file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> snpCountsContigMaxPositionMap = snpCounts.getCounts().stream().filter(c -> contigNames.contains(c.getContig())).collect(Collectors.toMap(AllelicCount::getContig, AllelicCount::getEnd, Integer::max));
snpCountsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(snpCountsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the SNP-counts file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in tangent file
final ReadCountCollection tangent;
try {
tangent = ReadCountCollectionUtils.parse(tangentFile);
} catch (final IOException e) {
throw new UserException.CouldNotReadInputFile(tangentFile, e);
}
final Set<String> tangentContigNames = tangent.targets().stream().map(Target::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(tangentContigNames)) {
logger.warn("Contigs present in the tangent-normalized coverage file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> tangentContigMaxPositionMap = tangent.targets().stream().filter(t -> contigNames.contains(t.getContig())).collect(Collectors.toMap(Target::getContig, Target::getEnd, Integer::max));
tangentContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(tangentContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the tangent-normalized coverage file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in segments file
final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
final Set<String> segmentsContigNames = segments.stream().map(ACNVModeledSegment::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(segmentsContigNames)) {
logger.warn("Contigs present in the segments file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> segmentsContigMaxPositionMap = segments.stream().filter(s -> contigNames.contains(s.getContig())).collect(Collectors.toMap(ACNVModeledSegment::getContig, ACNVModeledSegment::getEnd, Integer::max));
segmentsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(segmentsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the segments file exceeds contig length in the sequence dictionary."));
}
Aggregations