use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord in project gatk-protected by broadinstitute.
the class XHMMSegmentGenotyperIntegrationTest method assertVariantsAreCoveredBySegments.
private void assertVariantsAreCoveredBySegments(final List<VariantContext> variants, final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
for (final VariantContext variant : variants) {
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> matches = variantSegments.stream().filter(s -> new SimpleInterval(variant).equals(s.getSegment().getInterval())).collect(Collectors.toList());
Assert.assertFalse(matches.isEmpty());
for (final Genotype genotype : variant.getGenotypes()) {
final boolean discovery = genotype.getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString().equals(XHMMSegmentGenotyper.DISCOVERY_TRUE);
if (discovery) {
Assert.assertTrue(matches.stream().anyMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
} else {
Assert.assertTrue(matches.stream().noneMatch(s -> s.getSampleName().equals(genotype.getSampleName())));
}
}
}
}
use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord in project gatk-protected by broadinstitute.
the class CoverageModelEMWorkspace method getCopyRatioSegmentsSpark.
/**
* Fetch copy ratio segments from compute blocks (Spark implementation)
*
* @return a list of {@link CopyRatioHMMResults}
*/
private List<List<HiddenStateSegmentRecord<STATE, Target>>> getCopyRatioSegmentsSpark() {
/* local final member variables for lambda capture */
final List<Target> processedTargetList = new ArrayList<>();
processedTargetList.addAll(this.processedTargetList);
final List<SexGenotypeData> processedSampleSexGenotypeData = new ArrayList<>();
processedSampleSexGenotypeData.addAll(this.processedSampleSexGenotypeData);
final List<String> processedSampleNameList = new ArrayList<>();
processedSampleNameList.addAll(this.processedSampleNameList);
final INDArray sampleReadDepths = Transforms.exp(sampleMeanLogReadDepths, true);
final CopyRatioExpectationsCalculator<CoverageModelCopyRatioEmissionData, STATE> copyRatioExpectationsCalculator = this.copyRatioExpectationsCalculator;
final BiFunction<SexGenotypeData, Target, STATE> referenceStateFactory = this.referenceStateFactory;
return fetchCopyRatioEmissionDataSpark().mapPartitionsToPair(it -> {
final List<Tuple2<Integer, CopyRatioHMMResults<CoverageModelCopyRatioEmissionData, STATE>>> newPartitionData = new ArrayList<>();
while (it.hasNext()) {
final Tuple2<Integer, List<CoverageModelCopyRatioEmissionData>> prevDatum = it.next();
final int sampleIndex = prevDatum._1;
final CopyRatioCallingMetadata copyRatioCallingMetadata = CopyRatioCallingMetadata.builder().sampleName(processedSampleNameList.get(sampleIndex)).sampleSexGenotypeData(processedSampleSexGenotypeData.get(sampleIndex)).sampleCoverageDepth(sampleReadDepths.getDouble(sampleIndex)).emissionCalculationStrategy(EmissionCalculationStrategy.HYBRID_POISSON_GAUSSIAN).build();
newPartitionData.add(new Tuple2<>(sampleIndex, copyRatioExpectationsCalculator.getCopyRatioHMMResults(copyRatioCallingMetadata, processedTargetList, prevDatum._2)));
}
return newPartitionData.iterator();
}, true).mapPartitionsToPair(it -> {
final List<Tuple2<Integer, List<HiddenStateSegmentRecord<STATE, Target>>>> newPartitionData = new ArrayList<>();
while (it.hasNext()) {
final Tuple2<Integer, CopyRatioHMMResults<CoverageModelCopyRatioEmissionData, STATE>> prevDatum = it.next();
final int sampleIndex = prevDatum._1;
final CopyRatioHMMResults<CoverageModelCopyRatioEmissionData, STATE> result = prevDatum._2;
final HMMSegmentProcessor<CoverageModelCopyRatioEmissionData, STATE, Target> processor = new HMMSegmentProcessor<>(Collections.singletonList(result.getMetaData().getSampleName()), Collections.singletonList(result.getMetaData().getSampleSexGenotypeData()), referenceStateFactory, Collections.singletonList(new HashedListTargetCollection<>(processedTargetList)), Collections.singletonList(result.getForwardBackwardResult()), Collections.singletonList(result.getViterbiResult()));
newPartitionData.add(new Tuple2<>(sampleIndex, processor.getSegmentsAsList()));
}
return newPartitionData.iterator();
}).collect().stream().sorted(Comparator.comparingInt(t -> t._1)).map(t -> t._2).collect(Collectors.toList());
}
use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord in project gatk-protected by broadinstitute.
the class CoverageModelEMWorkspace method getCopyRatioSegmentsLocal.
private List<List<HiddenStateSegmentRecord<STATE, Target>>> getCopyRatioSegmentsLocal() {
final List<List<CoverageModelCopyRatioEmissionData>> copyRatioEmissionData = fetchCopyRatioEmissionDataLocal();
final INDArray sampleReadDepths = Transforms.exp(sampleMeanLogReadDepths, true);
return sampleIndexStream().mapToObj(si -> {
final CopyRatioCallingMetadata metadata = CopyRatioCallingMetadata.builder().sampleName(processedSampleNameList.get(si)).sampleSexGenotypeData(processedSampleSexGenotypeData.get(si)).sampleCoverageDepth(sampleReadDepths.getDouble(si)).emissionCalculationStrategy(EmissionCalculationStrategy.HYBRID_POISSON_GAUSSIAN).build();
return copyRatioExpectationsCalculator.getCopyRatioHMMResults(metadata, processedTargetList, copyRatioEmissionData.get(si));
}).map(result -> {
final HMMSegmentProcessor<CoverageModelCopyRatioEmissionData, STATE, Target> processor = new HMMSegmentProcessor<>(Collections.singletonList(result.getMetaData().getSampleName()), Collections.singletonList(result.getMetaData().getSampleSexGenotypeData()), referenceStateFactory, Collections.singletonList(new HashedListTargetCollection<>(processedTargetList)), Collections.singletonList(result.getForwardBackwardResult()), Collections.singletonList(result.getViterbiResult()));
return processor.getSegmentsAsList();
}).collect(Collectors.toList());
}
use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord in project gatk-protected by broadinstitute.
the class CoverageModelEMWorkspace method getViterbiAsNDArray.
/**
* Fetches the Viterbi copy ratio (or copy number) states as a target-sample matrix
*
* @return an {@link INDArray}
*/
protected INDArray getViterbiAsNDArray(final List<List<HiddenStateSegmentRecord<STATE, Target>>> segments) {
final INDArray res = Nd4j.create(numSamples, numTargets);
final TargetCollection<Target> targetCollection = new HashedListTargetCollection<>(processedTargetList);
for (int si = 0; si < numSamples; si++) {
final SexGenotypeData sampleSexGenotype = processedSampleSexGenotypeData.get(si);
/* start with all ref */
final double[] calls = IntStream.range(0, numTargets).mapToDouble(ti -> referenceStateFactory.apply(sampleSexGenotype, processedTargetList.get(ti)).getScalar()).toArray();
/* go through segments and mutate ref calls as necessary */
segments.get(si).forEach(seg -> {
final IndexRange range = targetCollection.indexRange(seg.getSegment());
final double copyRatio = seg.getSegment().getCall().getScalar();
for (int ti = range.from; ti < range.to; ti++) {
calls[ti] = copyRatio;
}
});
res.get(NDArrayIndex.point(si), NDArrayIndex.all()).assign(Nd4j.create(calls, new int[] { 1, numTargets }));
}
return res.transpose();
}
use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord in project gatk by broadinstitute.
the class XHMMSegmentGenotyperIntegrationTest method assertVariantSegmentsAreCovered.
private void assertVariantSegmentsAreCovered(final List<VariantContext> variants, final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) {
for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> variantSegment : variantSegments) {
final Optional<VariantContext> match = variants.stream().filter(vc -> new SimpleInterval(vc).equals(variantSegment.getSegment().getInterval())).findFirst();
Assert.assertTrue(match.isPresent());
final VariantContext matchedVariant = match.get();
final Genotype genotype = matchedVariant.getGenotype(variantSegment.getSampleName());
final String discovery = genotype.getAnyAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString();
Assert.assertTrue(discovery.equals(XHMMSegmentGenotyper.DISCOVERY_TRUE));
final CopyNumberTriState call = variantSegment.getSegment().getCall();
final List<Allele> gt = genotype.getAlleles();
Assert.assertEquals(gt.size(), 1);
// The call may not be the same for case where the event-quality is relatively low.
if (variantSegment.getSegment().getEventQuality() > 10) {
Assert.assertEquals(CopyNumberTriStateAllele.valueOf(gt.get(0)).state, call, genotype.toString());
}
final String[] SQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.SOME_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final double someQual = variantSegment.getSegment().getSomeQuality();
Assert.assertEquals(Double.parseDouble(SQ[call == CopyNumberTriState.DELETION ? 0 : 1]), someQual, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());
final String[] LQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.START_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final double startQuality = variantSegment.getSegment().getStartQuality();
Assert.assertEquals(Double.parseDouble(LQ[call == CopyNumberTriState.DELETION ? 0 : 1]), startQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());
final String[] RQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.END_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final double endQuality = variantSegment.getSegment().getEndQuality();
Assert.assertEquals(Double.parseDouble(RQ[call == CopyNumberTriState.DELETION ? 0 : 1]), endQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString());
// Check the PL.
final int[] PL = genotype.getPL();
final int observedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, PL[CopyNumberTriStateAllele.REF.index()] - PL[CopyNumberTriStateAllele.valueOf(call).index()]);
final double expectedCallPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleErrorRate(QualityUtils.qualToProb(variantSegment.getSegment().getExactQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
final double expectedRefPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleCorrectRate(QualityUtils.qualToProb(variantSegment.getSegment().getEventQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION);
final int expectedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, (int) Math.round(expectedRefPL - expectedCallPL));
Assert.assertTrue(Math.abs(observedGQFromPL - expectedGQFromPL) <= 1, genotype.toString() + " " + variantSegment.getSegment().getEventQuality());
}
}
Aggregations