Search in sources :

Example 6 with FeatureContext

use of org.broadinstitute.hellbender.engine.FeatureContext in project gatk by broadinstitute.

the class VariantAnnotatorEngineUnitTest method testCoverageAnnotationViaEngine.

@Test
public void testCoverageAnnotationViaEngine() throws Exception {
    final File file = new File(publicTestDir + "Homo_sapiens_assembly19.dbsnp135.chr1_1M.exome_intervals.vcf");
    final FeatureInput<VariantContext> dbSNPBinding = new FeatureInput<>(file.getAbsolutePath(), "dbsnp", Collections.emptyMap());
    final List<String> annotationGroupsToUse = Collections.emptyList();
    final List<String> annotationsToUse = Arrays.asList(Coverage.class.getSimpleName(), DepthPerAlleleBySample.class.getSimpleName(), SampleList.class.getSimpleName());
    final List<String> annotationsToExclude = Collections.emptyList();
    final List<FeatureInput<VariantContext>> features = Collections.emptyList();
    final VariantAnnotatorEngine vae = VariantAnnotatorEngine.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude, dbSNPBinding, features);
    final int alt = 5;
    final int ref = 3;
    final Allele refAllele = Allele.create("A", true);
    final Allele altAllele = Allele.create("T");
    final ReadLikelihoods<Allele> likelihoods = makeReadLikelihoods(ref, alt, refAllele, altAllele);
    final VariantContext resultVC = vae.annotateContext(makeVC(refAllele, altAllele), new FeatureContext(), null, likelihoods, ann -> ann instanceof Coverage || ann instanceof DepthPerAlleleBySample);
    Assert.assertEquals(resultVC.getCommonInfo().getAttribute(VCFConstants.DEPTH_KEY), String.valueOf(ref + alt));
    Assert.assertEquals(resultVC.getGenotype(0).getAD(), new int[] { ref, alt });
    //skipped because we only asked for Coverage and DepthPerAlleleBySample
    Assert.assertFalse(resultVC.getCommonInfo().hasAttribute(GATKVCFConstants.SAMPLE_LIST_KEY));
}
Also used : FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) FeatureInput(org.broadinstitute.hellbender.engine.FeatureInput) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 7 with FeatureContext

use of org.broadinstitute.hellbender.engine.FeatureContext in project gatk-protected by broadinstitute.

the class AnnotateVcfWithExpectedAlleleFraction method apply.

@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
    final double[] weights = vc.getGenotypes().stream().mapToDouble(g -> weight(g)).toArray();
    final double expectedAlleleFraction = MathUtils.sum(MathArrays.ebeMultiply(weights, mixingFractionsInSampleOrder));
    vcfWriter.add(new VariantContextBuilder(vc).attribute(EXPECTED_ALLELE_FRACTION_NAME, expectedAlleleFraction).make());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) VCFHeader(htsjdk.variant.vcf.VCFHeader) MathArrays(org.apache.commons.math3.util.MathArrays) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) HashSet(java.util.HashSet) Map(java.util.Map) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) VariantWalker(org.broadinstitute.hellbender.engine.VariantWalker) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) ReadsContext(org.broadinstitute.hellbender.engine.ReadsContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 8 with FeatureContext

use of org.broadinstitute.hellbender.engine.FeatureContext in project gatk by broadinstitute.

the class GenotypingEngine method emptyCallContext.

/**
     * Produces an empty variant-call context to output when there is no enough data provided to call anything.
     *
     * @param features feature context
     * @param ref the reference context.
     * @param rawContext the read alignment at that location.
     * @return it might be null if no enough information is provided to do even an empty call. For example when
     * we have limited-context (i.e. any of the tracker, reference or alignment is {@code null}.
     */
protected final VariantCallContext emptyCallContext(final FeatureContext features, final ReferenceContext ref, final AlignmentContext rawContext, final SAMFileHeader header) {
    if (features == null || ref == null || rawContext == null || !forceSiteEmission()) {
        return null;
    }
    VariantContext vc;
    if (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) {
        final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(features, rawContext.getLocation(), false, logger, configuration.alleles);
        if (ggaVc == null) {
            return null;
        }
        vc = new VariantContextBuilder(callSourceString(), ref.getInterval().getContig(), ggaVc.getStart(), ggaVc.getEnd(), ggaVc.getAlleles()).make();
    } else {
        // deal with bad/non-standard reference bases
        if (!Allele.acceptableAlleleBases(new byte[] { ref.getBase() })) {
            return null;
        }
        final Set<Allele> alleles = new LinkedHashSet<>(Collections.singleton(Allele.create(ref.getBase(), true)));
        vc = new VariantContextBuilder(callSourceString(), ref.getInterval().getContig(), ref.getInterval().getStart(), ref.getInterval().getStart(), alleles).make();
    }
    if (vc != null && annotationEngine != null) {
        // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
        final ReadPileup pileup = rawContext.getBasePileup();
        vc = annotationEngine.annotateContext(vc, features, ref, null, a -> true);
    }
    return new VariantCallContext(vc, false);
}
Also used : java.util(java.util) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) GATKVariantContextUtils(org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Collectors(java.util.stream.Collectors) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) GATKVCFHeaderLines(org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines) VariantAnnotatorEngine(org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) LogManager(org.apache.logging.log4j.LogManager) org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc(org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc) VCFConstants(htsjdk.variant.vcf.VCFConstants) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup)

Example 9 with FeatureContext

use of org.broadinstitute.hellbender.engine.FeatureContext in project gatk by broadinstitute.

the class GenotypingEngine method calculateGenotypes.

/**
     * Main entry function to calculate genotypes of a given VC with corresponding GL's that is shared across genotypers (namely UG and HC).
     *
     * @param features                           Features
     * @param refContext                         Reference context
     * @param rawContext                         Raw context
     * @param stratifiedContexts                 Stratified alignment contexts
     * @param vc                                 Input VC
     * @param model                              GL calculation model
     * @param inheritAttributesFromInputVC       Output VC will contain attributes inherited from input vc
     * @return                                   VC with assigned genotypes
     */
protected VariantCallContext calculateGenotypes(final FeatureContext features, final ReferenceContext refContext, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final GenotypeLikelihoodsCalculationModel model, final boolean inheritAttributesFromInputVC, final ReadLikelihoods<Allele> likelihoods, final SAMFileHeader header) {
    final boolean limitedContext = features == null || refContext == null || rawContext == null || stratifiedContexts == null;
    // if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
    if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0) {
        return emptyCallContext(features, refContext, rawContext, header);
    }
    final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
    final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
    VariantContext reducedVC = vc;
    if (maxAltAlleles < vc.getAlternateAlleles().size()) {
        final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
        final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
        reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
    }
    final AFCalculator afCalculator = configuration.genotypeArgs.USE_NEW_AF_CALCULATOR ? newAFCalculator : afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles);
    final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc, defaultPloidy, model));
    final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);
    // posterior probability that at least one alt allele exists in the samples
    final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
    // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
    final double log10Confidence = !outputAlternativeAlleles.siteIsMonomorphic || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 : AFresult.getLog10PosteriorOfAFGT0() + 0.0;
    // Add 0.0 removes -0.0 occurrences.
    final double phredScaledConfidence = (-10.0 * log10Confidence) + 0.0;
    // skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs
    if (!passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) && !forceSiteEmission() && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) {
        // technically, at this point our confidence in a reference call isn't accurately estimated
        //  because it didn't take into account samples with no data, so let's get a better estimate
        final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model);
        final int INDEX_FOR_AC_EQUALS_1 = 1;
        return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele);
    }
    // start constructing the resulting VC
    final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
    final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), vc.getContig(), vc.getStart(), vc.getEnd(), outputAlleles);
    builder.log10PError(log10Confidence);
    if (!passesCallThreshold(phredScaledConfidence)) {
        builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME);
    }
    // create the genotypes
    //TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
    final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
    // calculating strand bias involves overwriting data structures, so we do it last
    final Map<String, Object> attributes = composeCallAttributes(inheritAttributesFromInputVC, vc, rawContext, stratifiedContexts, features, refContext, outputAlternativeAlleles.alternativeAlleleMLECounts(), outputAlternativeAlleles.siteIsMonomorphic, AFresult, outputAlternativeAlleles.outputAlleles(vc.getReference()), genotypes, model, likelihoods);
    VariantContext vcCall = builder.genotypes(genotypes).attributes(attributes).make();
    if (annotationEngine != null && !limitedContext) {
        // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
        // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
        final ReadPileup pileup = rawContext.getBasePileup();
        stratifiedContexts = AlignmentContext.splitContextBySampleName(pileup, header);
        vcCall = annotationEngine.annotateContext(vcCall, features, refContext, likelihoods, a -> true);
    }
    // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
    if (// limitedContext callers need to handle allele trimming on their own to keep their alleles in sync
    outputAlleles.size() != vc.getAlleles().size() && !limitedContext) {
        vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
    }
    return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, probOfAtLeastOneAltAllele));
}
Also used : java.util(java.util) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) GATKVariantContextUtils(org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Collectors(java.util.stream.Collectors) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) GATKVCFHeaderLines(org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines) VariantAnnotatorEngine(org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) LogManager(org.apache.logging.log4j.LogManager) org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc(org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc) VCFConstants(htsjdk.variant.vcf.VCFConstants) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup)

Example 10 with FeatureContext

use of org.broadinstitute.hellbender.engine.FeatureContext in project gatk by broadinstitute.

the class AnnotateVcfWithExpectedAlleleFraction method apply.

@Override
public void apply(final VariantContext vc, final ReadsContext readsContext, final ReferenceContext refContext, final FeatureContext fc) {
    final double[] weights = vc.getGenotypes().stream().mapToDouble(g -> weight(g)).toArray();
    final double expectedAlleleFraction = MathUtils.sum(MathArrays.ebeMultiply(weights, mixingFractionsInSampleOrder));
    vcfWriter.add(new VariantContextBuilder(vc).attribute(EXPECTED_ALLELE_FRACTION_NAME, expectedAlleleFraction).make());
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) VCFHeader(htsjdk.variant.vcf.VCFHeader) MathArrays(org.apache.commons.math3.util.MathArrays) Argument(org.broadinstitute.barclay.argparser.Argument) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) VariantProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup) HashSet(java.util.HashSet) Map(java.util.Map) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) VariantWalker(org.broadinstitute.hellbender.engine.VariantWalker) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) ReadsContext(org.broadinstitute.hellbender.engine.ReadsContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Aggregations

FeatureContext (org.broadinstitute.hellbender.engine.FeatureContext)16 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)11 Test (org.testng.annotations.Test)11 File (java.io.File)9 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)8 VariantContext (htsjdk.variant.variantcontext.VariantContext)7 FeatureInput (org.broadinstitute.hellbender.engine.FeatureInput)6 FeatureManager (org.broadinstitute.hellbender.engine.FeatureManager)5 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)5 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)4 Collectors (java.util.stream.Collectors)4 AlignmentContext (org.broadinstitute.hellbender.engine.AlignmentContext)3 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 SAMFileHeader (htsjdk.samtools.SAMFileHeader)2 htsjdk.variant.variantcontext (htsjdk.variant.variantcontext)2 Genotype (htsjdk.variant.variantcontext.Genotype)2 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)2 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)2 VCFConstants (htsjdk.variant.vcf.VCFConstants)2