use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class XHMMSegmentGenotyperIntegrationTest method assertVariantsAreCoveredBySegments.
private void assertVariantsAreCoveredBySegments(final List<VariantContext> variants, final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
for (final VariantContext variant : variants) {
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> matches = variantSegments.stream().filter(s -> new SimpleInterval(variant).equals(s.getSegment().getInterval())).collect(Collectors.toList());
Assert.assertFalse(matches.isEmpty());
for (final Genotype genotype : variant.getGenotypes()) {
final boolean discovery = genotype.getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString().equals(XHMMSegmentGenotyper.DISCOVERY_TRUE);
if (discovery) {
Assert.assertTrue(matches.stream().anyMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
} else {
Assert.assertTrue(matches.stream().noneMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
}
}
}
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class CopyRatioSegmenterUnitTest method testSegmentation.
@Test
public void testSegmentation() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
final List<Double> trueLog2CopyRatios = Arrays.asList(-2.0, 0.0, 1.4);
final double trueMemoryLength = 1e5;
final double trueStandardDeviation = 0.2;
final CopyRatioHMM trueModel = new CopyRatioHMM(trueLog2CopyRatios, trueWeights, trueMemoryLength, trueStandardDeviation);
final int chainLength = 10000;
final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
final List<Integer> trueStates = trueModel.generateHiddenStateChain(positions);
final List<Double> trueLog2CopyRatioSequence = trueStates.stream().map(n -> trueLog2CopyRatios.get(n)).collect(Collectors.toList());
final List<Double> data = trueLog2CopyRatioSequence.stream().map(cr -> generateData(trueStandardDeviation, cr, rng)).collect(Collectors.toList());
final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
final double[] segmentCopyRatios = segments.stream().flatMap(s -> Collections.nCopies((int) s.getTargetCount(), s.getSegmentMeanInLog2CRSpace()).stream()).mapToDouble(x -> x).toArray();
final double averageCopyRatioError = IntStream.range(0, trueLog2CopyRatioSequence.size()).mapToDouble(n -> Math.abs(segmentCopyRatios[n] - trueLog2CopyRatioSequence.get(n))).average().getAsDouble();
Assert.assertEquals(averageCopyRatioError, 0, 0.025);
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsSmallBins.
@Test
public void testSparkGenomeReadCountsSmallBins() throws IOException {
final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "2000" };
runCommandLine(arguments);
Assert.assertTrue(outputFile.exists());
Assert.assertTrue(outputFile.length() > 0);
// Proportional Coverage
final ReadCountCollection proportionalCoverage = ReadCountCollectionUtils.parse(outputFile);
Assert.assertTrue(proportionalCoverage.records().stream().anyMatch(t -> Math.abs(t.getDouble(0)) > 1e-10));
// The reads are all in three bins of contig 3 with values {.5, .25, .25}
Assert.assertTrue(proportionalCoverage.records().stream().filter(t -> t.getContig().equals("3")).anyMatch(t -> Math.abs(t.getDouble(0)) > .2));
Assert.assertTrue(Math.abs(proportionalCoverage.records().stream().filter(t -> t.getContig().equals("3")).mapToDouble(t -> t.getDouble(0)).sum() - 1.0) < 1e-10);
// raw coverage
final ReadCountCollection coverage = ReadCountCollectionUtils.parse(new File(outputFile.getAbsolutePath() + SparkGenomeReadCounts.RAW_COV_OUTPUT_EXTENSION));
Assert.assertTrue(coverage.records().stream().anyMatch(t -> Math.abs(t.getDouble(0)) > 1e-10));
// The reads are all in three bins of contig 3 with values
Assert.assertEquals(coverage.records().stream().filter(t -> t.getContig().equals("3")).filter(t -> Math.abs(t.getDouble(0)) >= 1).count(), 3);
final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
Assert.assertTrue(targetsFile.exists());
Assert.assertTrue(targetsFile.length() > 0);
final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
// 4 is the number of contigs in the fasta file
Assert.assertEquals(targets.size(), 16000 / 2000 * 4);
Assert.assertEquals(targets.get(1).getEnd(), 4000);
Assert.assertEquals(targets.get(2).getName(), "target_1_4001_6000");
Assert.assertEquals(targets.get(8).getName(), "target_2_1_2000");
Assert.assertEquals(targets.get(17).getName(), "target_3_2001_4000");
Assert.assertEquals(proportionalCoverage.targets().size(), targets.size());
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class SparkGenomeReadCountsIntegrationTest method testSparkGenomeReadCountsBigBins.
@Test
public void testSparkGenomeReadCountsBigBins() throws IOException {
final File outputFile = createTempFile(BAM_FILE.getName(), ".cov");
final String[] arguments = { "--disableSequenceDictionaryValidation", "-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME, REFERENCE_FILE.getAbsolutePath(), "-" + StandardArgumentDefinitions.INPUT_SHORT_NAME, BAM_FILE.getAbsolutePath(), "-" + SparkGenomeReadCounts.OUTPUT_FILE_SHORT_NAME, outputFile.getAbsolutePath(), "-" + SparkGenomeReadCounts.BINSIZE_SHORT_NAME, "16000" };
runCommandLine(arguments);
Assert.assertTrue(outputFile.exists());
Assert.assertTrue(outputFile.length() > 0);
final ReadCountCollection coverage = ReadCountCollectionUtils.parse(outputFile);
final File targetsFile = new File(outputFile.getAbsolutePath() + ".targets.tsv");
Assert.assertTrue(targetsFile.exists());
Assert.assertTrue(targetsFile.length() > 0);
final List<Target> targets = TargetTableReader.readTargetFile(targetsFile);
Assert.assertEquals(targets.size(), 4);
Assert.assertEquals(targets.get(1).getEnd(), 16000);
Assert.assertEquals(targets.get(2).getName(), "target_3_1_16000");
Assert.assertEquals(coverage.targets().size(), targets.size());
}
use of org.broadinstitute.hellbender.tools.exome.Target in project gatk by broadinstitute.
the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.
@Test
public void testChromosomesOnDifferentSegments() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final double trueMemoryLength = 1e5;
final double trueStandardDeviation = 0.2;
// randomly set positions
final int chainLength = 100;
final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
//fix everything to the same state 2
final int trueState = 2;
final List<Double> data = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final double copyRatio = trueLog2CopyRatios[trueState];
final double observed = generateData(trueStandardDeviation, copyRatio, rng);
data.add(observed);
}
final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
//check that each chromosome has at least one segment
final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Aggregations