Search in sources :

Example 16 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class MappingQualityZeroUnitTest method testLikelihoods.

@Test
public void testLikelihoods() {
    final Allele REF = Allele.create("T", true);
    final Allele ALT = Allele.create("A");
    final int refDepth = 5;
    final int altDepth = 3;
    final int refMQ = 10;
    final int altMQ = 0;
    final List<GATKRead> refReads = IntStream.range(0, refDepth).mapToObj(i -> makeRead(refMQ)).collect(Collectors.toList());
    final List<GATKRead> altReads = IntStream.range(0, altDepth).mapToObj(i -> makeRead(altMQ)).collect(Collectors.toList());
    final ReadLikelihoods<Allele> likelihoods = AnnotationArtificialData.makeLikelihoods("sample1", refReads, altReads, -1.0, -1.0, REF, ALT);
    final VariantContext vc = makeVC();
    final ReferenceContext referenceContext = null;
    final Map<String, Object> annotate = new MappingQualityZero().annotate(referenceContext, vc, likelihoods);
    Assert.assertEquals(annotate.size(), 1, "size");
    Assert.assertEquals(annotate.keySet(), Collections.singleton(VCFConstants.MAPPING_QUALITY_ZERO_KEY), "annots");
    Assert.assertEquals(annotate.get(VCFConstants.MAPPING_QUALITY_ZERO_KEY), String.valueOf(altDepth));
}
Also used : IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.testng.annotations.Test) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Collectors(java.util.stream.Collectors) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) List(java.util.List) Assert(org.testng.Assert) Map(java.util.Map) AlleleList(org.broadinstitute.hellbender.utils.genotyper.AlleleList) IndexedAlleleList(org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IndexedSampleList(org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFConstants(htsjdk.variant.vcf.VCFConstants) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Allele(htsjdk.variant.variantcontext.Allele) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) VariantContext(htsjdk.variant.variantcontext.VariantContext) Test(org.testng.annotations.Test)

Example 17 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class GenotypingEngine method calculateGenotypes.

/**
     * Main entry function to calculate genotypes of a given VC with corresponding GL's that is shared across genotypers (namely UG and HC).
     *
     * @param features                           Features
     * @param refContext                         Reference context
     * @param rawContext                         Raw context
     * @param stratifiedContexts                 Stratified alignment contexts
     * @param vc                                 Input VC
     * @param model                              GL calculation model
     * @param inheritAttributesFromInputVC       Output VC will contain attributes inherited from input vc
     * @return                                   VC with assigned genotypes
     */
protected VariantCallContext calculateGenotypes(final FeatureContext features, final ReferenceContext refContext, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final GenotypeLikelihoodsCalculationModel model, final boolean inheritAttributesFromInputVC, final ReadLikelihoods<Allele> likelihoods, final SAMFileHeader header) {
    final boolean limitedContext = features == null || refContext == null || rawContext == null || stratifiedContexts == null;
    // if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
    if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0) {
        return emptyCallContext(features, refContext, rawContext, header);
    }
    final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
    final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
    VariantContext reducedVC = vc;
    if (maxAltAlleles < vc.getAlternateAlleles().size()) {
        final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
        final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
        reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
    }
    final AFCalculator afCalculator = configuration.genotypeArgs.USE_NEW_AF_CALCULATOR ? newAFCalculator : afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles);
    final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc, defaultPloidy, model));
    final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);
    // posterior probability that at least one alt allele exists in the samples
    final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
    // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
    final double log10Confidence = !outputAlternativeAlleles.siteIsMonomorphic || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 : AFresult.getLog10PosteriorOfAFGT0() + 0.0;
    // Add 0.0 removes -0.0 occurrences.
    final double phredScaledConfidence = (-10.0 * log10Confidence) + 0.0;
    // skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs
    if (!passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) && !forceSiteEmission() && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) {
        // technically, at this point our confidence in a reference call isn't accurately estimated
        //  because it didn't take into account samples with no data, so let's get a better estimate
        final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model);
        final int INDEX_FOR_AC_EQUALS_1 = 1;
        return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele);
    }
    // start constructing the resulting VC
    final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
    final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), vc.getContig(), vc.getStart(), vc.getEnd(), outputAlleles);
    builder.log10PError(log10Confidence);
    if (!passesCallThreshold(phredScaledConfidence)) {
        builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME);
    }
    // create the genotypes
    //TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
    final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
    // calculating strand bias involves overwriting data structures, so we do it last
    final Map<String, Object> attributes = composeCallAttributes(inheritAttributesFromInputVC, vc, rawContext, stratifiedContexts, features, refContext, outputAlternativeAlleles.alternativeAlleleMLECounts(), outputAlternativeAlleles.siteIsMonomorphic, AFresult, outputAlternativeAlleles.outputAlleles(vc.getReference()), genotypes, model, likelihoods);
    VariantContext vcCall = builder.genotypes(genotypes).attributes(attributes).make();
    if (annotationEngine != null && !limitedContext) {
        // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
        // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
        final ReadPileup pileup = rawContext.getBasePileup();
        stratifiedContexts = AlignmentContext.splitContextBySampleName(pileup, header);
        vcCall = annotationEngine.annotateContext(vcCall, features, refContext, likelihoods, a -> true);
    }
    // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
    if (// limitedContext callers need to handle allele trimming on their own to keep their alleles in sync
    outputAlleles.size() != vc.getAlleles().size() && !limitedContext) {
        vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
    }
    return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, probOfAtLeastOneAltAllele));
}
Also used : java.util(java.util) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) GATKVariantContextUtils(org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Collectors(java.util.stream.Collectors) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) GATKVCFHeaderLines(org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines) VariantAnnotatorEngine(org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) LogManager(org.apache.logging.log4j.LogManager) org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc(org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc) VCFConstants(htsjdk.variant.vcf.VCFConstants) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup)

Example 18 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class HaplotypeBAMWriterUnitTest method generateReadLikelihoods.

private ReadLikelihoods<Haplotype> generateReadLikelihoods(final int[] readCount) {
    final AlleleList<Haplotype> haplotypeList = generateHaplotypeList();
    final SampleList sampleList = generateSampleList(readCount.length);
    final Map<String, List<GATKRead>> readSamples = new LinkedHashMap<>(readCount.length);
    for (int i = 0; i < readCount.length; i++) {
        readSamples.put(sampleList.getSample(i), generateReadsList(i, readCount[i]));
    }
    return new ReadLikelihoods<>(sampleList, haplotypeList, readSamples);
}
Also used : ArrayList(java.util.ArrayList) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) List(java.util.List) AlleleList(org.broadinstitute.hellbender.utils.genotyper.AlleleList) IndexedAlleleList(org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList) IndexedSampleList(org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) IndexedSampleList(org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) LinkedHashMap(java.util.LinkedHashMap)

Example 19 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class DepthPerAlleleBySample method annotate.

@Override
public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) {
    Utils.nonNull(gb, "gb is null");
    Utils.nonNull(vc, "vc is null");
    if (g == null || !g.isCalled() || likelihoods == null) {
        return;
    }
    final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles());
    // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
    Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a  subset of ReadLikelihoods alleles " + likelihoods.alleles());
    final Map<Allele, Integer> alleleCounts = new LinkedHashMap<>();
    for (final Allele allele : vc.getAlleles()) {
        alleleCounts.put(allele, 0);
    }
    final Map<Allele, List<Allele>> alleleSubset = alleles.stream().collect(Collectors.toMap(a -> a, Arrays::asList));
    final ReadLikelihoods<Allele> subsettedLikelihoods = likelihoods.marginalize(alleleSubset);
    subsettedLikelihoods.bestAlleles(g.getSampleName()).stream().filter(ba -> ba.isInformative()).forEach(ba -> alleleCounts.compute(ba.allele, (allele, prevCount) -> prevCount + 1));
    final int[] counts = new int[alleleCounts.size()];
    //first one in AD is always ref
    counts[0] = alleleCounts.get(vc.getReference());
    for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
        counts[i + 1] = alleleCounts.get(vc.getAlternateAllele(i));
    }
    gb.AD(counts);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) java.util(java.util) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Utils(org.broadinstitute.hellbender.utils.Utils) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) Collectors(java.util.stream.Collectors) VCFConstants(htsjdk.variant.vcf.VCFConstants) Allele(htsjdk.variant.variantcontext.Allele)

Example 20 with ReadLikelihoods

use of org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods in project gatk by broadinstitute.

the class StrandBiasTest method getContingencyTable.

/**
     Allocate and fill a 2x2 strand contingency table.  In the end, it'll look something like this:
     *             fw      rc
     *   allele1   #       #
     *   allele2   #       #
     * @return a 2x2 contingency table
     */
public static int[][] getContingencyTable(final ReadLikelihoods<Allele> likelihoods, final VariantContext vc, final int minCount, final Collection<String> samples) {
    if (likelihoods == null || vc == null) {
        return null;
    }
    final Allele ref = vc.getReference();
    final List<Allele> allAlts = vc.getAlternateAlleles();
    final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
    for (final String sample : samples) {
        final int[] sampleTable = new int[ARRAY_SIZE];
        likelihoods.bestAlleles(sample).stream().filter(ba -> ba.isInformative()).forEach(ba -> updateTable(sampleTable, ba.allele, ba.read, ref, allAlts));
        if (passesMinimumThreshold(sampleTable, minCount)) {
            copyToMainTable(sampleTable, table);
        }
    }
    return table;
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) java.util(java.util) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Allele(htsjdk.variant.variantcontext.Allele)

Aggregations

ReadLikelihoods (org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods)22 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)17 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)17 Allele (htsjdk.variant.variantcontext.Allele)11 Test (org.testng.annotations.Test)11 GATKVCFConstants (org.broadinstitute.hellbender.utils.variant.GATKVCFConstants)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 java.util (java.util)9 Collectors (java.util.stream.Collectors)9 AlleleList (org.broadinstitute.hellbender.utils.genotyper.AlleleList)9 IndexedAlleleList (org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList)9 IndexedSampleList (org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList)9 Assert (org.testng.Assert)7 htsjdk.variant.variantcontext (htsjdk.variant.variantcontext)6 VCFConstants (htsjdk.variant.vcf.VCFConstants)6 List (java.util.List)6 GenotypesContext (htsjdk.variant.variantcontext.GenotypesContext)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 TextCigarCodec (htsjdk.samtools.TextCigarCodec)4 Genotype (htsjdk.variant.variantcontext.Genotype)4