Search in sources :

Example 1 with HiddenStateSegment

use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment in project gatk-protected by broadinstitute.

the class ConvertGSVariantsToSegments method apply.

@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
    final SimpleInterval interval = new SimpleInterval(variant);
    final int targetCount = targets.indexRange(interval).size();
    final int[] callCounts = new int[CopyNumberTriState.values().length];
    for (final Genotype genotype : variant.getGenotypes().iterateInSampleNameOrder()) {
        final String sample = genotype.getSampleName();
        final double mean = doubleFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION));
        final int copyNumber = intFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FORMAT));
        final CopyNumberTriState call = copyNumber == neutralCopyNumber ? CopyNumberTriState.NEUTRAL : (copyNumber < neutralCopyNumber) ? CopyNumberTriState.DELETION : CopyNumberTriState.DUPLICATION;
        callCounts[call.ordinal()]++;
        final double[] probs = doubleArrayFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_POSTERIOR));
        final double log10PostQualCall = calculateLog10CallQuality(probs, call);
        final double log10PostQualNonRef = calculateLog10CallQualityNonRef(probs);
        final double phredProbCall = -10.0 * log10PostQualCall;
        final double phredProbNonRef = -10.0 * log10PostQualNonRef;
        final HiddenStateSegment<CopyNumberTriState, Target> segment = new HiddenStateSegment<>(interval, targetCount, mean, // GS VCF does not contain any stddev or var estimate for coverage fraction.
        0.0, call, // GS does not provide an EQ, we approximate it to be the 1 - sum of all call compatible CN corresponding posterior probs
        phredProbCall, // GS does not provide a SQ, we leave is a NaN.
        Double.NaN, // GS does not provide a START Q.
        Double.NaN, // GS does not provide a END Q.
        Double.NaN, phredProbNonRef);
        final HiddenStateSegmentRecord<CopyNumberTriState, Target> record = new HiddenStateSegmentRecord<>(sample, segment);
        try {
            outputWriter.writeRecord(record);
        } catch (final IOException ex) {
            throw new UserException.CouldNotCreateOutputFile(outputFile, ex);
        }
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState) HiddenStateSegment(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment) HiddenStateSegmentRecord(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 2 with HiddenStateSegment

use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment in project gatk-protected by broadinstitute.

the class ConvertGSVariantsToSegmentsIntegrationTest method composeExpectedSegments.

private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
    final VCFFileReader reader = new VCFFileReader(vcf, false);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
    reader.iterator().forEachRemaining(vc -> {
        final int targetCount = targets.indexRange(vc).size();
        for (final Genotype genotype : vc.getGenotypes()) {
            final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
            final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(",")).mapToDouble(Double::parseDouble).toArray();
            final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
            final CopyNumberTriState call = expectedCall(cn);
            final double exactLog10Prob = expectedExactLog10(call, cnp);
            final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()), 0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN, -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum));
            result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
        }
    });
    return result;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) Genotype(htsjdk.variant.variantcontext.Genotype) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState) HiddenStateSegment(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment) HiddenStateSegmentRecord(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 3 with HiddenStateSegment

use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment in project gatk by broadinstitute.

the class ConvertGSVariantsToSegments method apply.

@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
    final SimpleInterval interval = new SimpleInterval(variant);
    final int targetCount = targets.indexRange(interval).size();
    final int[] callCounts = new int[CopyNumberTriState.values().length];
    for (final Genotype genotype : variant.getGenotypes().iterateInSampleNameOrder()) {
        final String sample = genotype.getSampleName();
        final double mean = doubleFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION));
        final int copyNumber = intFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FORMAT));
        final CopyNumberTriState call = copyNumber == neutralCopyNumber ? CopyNumberTriState.NEUTRAL : (copyNumber < neutralCopyNumber) ? CopyNumberTriState.DELETION : CopyNumberTriState.DUPLICATION;
        callCounts[call.ordinal()]++;
        final double[] probs = doubleArrayFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_POSTERIOR));
        final double log10PostQualCall = calculateLog10CallQuality(probs, call);
        final double log10PostQualNonRef = calculateLog10CallQualityNonRef(probs);
        final double phredProbCall = -10.0 * log10PostQualCall;
        final double phredProbNonRef = -10.0 * log10PostQualNonRef;
        final HiddenStateSegment<CopyNumberTriState, Target> segment = new HiddenStateSegment<>(interval, targetCount, mean, // GS VCF does not contain any stddev or var estimate for coverage fraction.
        0.0, call, // GS does not provide an EQ, we approximate it to be the 1 - sum of all call compatible CN corresponding posterior probs
        phredProbCall, // GS does not provide a SQ, we leave is a NaN.
        Double.NaN, // GS does not provide a START Q.
        Double.NaN, // GS does not provide a END Q.
        Double.NaN, phredProbNonRef);
        final HiddenStateSegmentRecord<CopyNumberTriState, Target> record = new HiddenStateSegmentRecord<>(sample, segment);
        try {
            outputWriter.writeRecord(record);
        } catch (final IOException ex) {
            throw new UserException.CouldNotCreateOutputFile(outputFile, ex);
        }
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState) HiddenStateSegment(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment) HiddenStateSegmentRecord(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 4 with HiddenStateSegment

use of org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment in project gatk by broadinstitute.

the class ConvertGSVariantsToSegmentsIntegrationTest method composeExpectedSegments.

private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
    final VCFFileReader reader = new VCFFileReader(vcf, false);
    final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
    reader.iterator().forEachRemaining(vc -> {
        final int targetCount = targets.indexRange(vc).size();
        for (final Genotype genotype : vc.getGenotypes()) {
            final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
            final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(",")).mapToDouble(Double::parseDouble).toArray();
            final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
            final CopyNumberTriState call = expectedCall(cn);
            final double exactLog10Prob = expectedExactLog10(call, cnp);
            final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()), 0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN, -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum));
            result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
        }
    });
    return result;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) Genotype(htsjdk.variant.variantcontext.Genotype) Target(org.broadinstitute.hellbender.tools.exome.Target) CopyNumberTriState(org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState) HiddenStateSegment(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment) HiddenStateSegmentRecord(org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Aggregations

Genotype (htsjdk.variant.variantcontext.Genotype)4 Target (org.broadinstitute.hellbender.tools.exome.Target)4 CopyNumberTriState (org.broadinstitute.hellbender.tools.exome.germlinehmm.CopyNumberTriState)4 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)4 HiddenStateSegment (org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegment)4 HiddenStateSegmentRecord (org.broadinstitute.hellbender.utils.hmm.segmentation.HiddenStateSegmentRecord)4 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 UserException (org.broadinstitute.hellbender.exceptions.UserException)2