Search in sources :

Example 26 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.

the class GenotypingEngine method calculateGenotypes.

/**
     * Main entry function to calculate genotypes of a given VC with corresponding GL's that is shared across genotypers (namely UG and HC).
     *
     * @param features                           Features
     * @param refContext                         Reference context
     * @param rawContext                         Raw context
     * @param stratifiedContexts                 Stratified alignment contexts
     * @param vc                                 Input VC
     * @param model                              GL calculation model
     * @param inheritAttributesFromInputVC       Output VC will contain attributes inherited from input vc
     * @return                                   VC with assigned genotypes
     */
protected VariantCallContext calculateGenotypes(final FeatureContext features, final ReferenceContext refContext, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final GenotypeLikelihoodsCalculationModel model, final boolean inheritAttributesFromInputVC, final ReadLikelihoods<Allele> likelihoods, final SAMFileHeader header) {
    final boolean limitedContext = features == null || refContext == null || rawContext == null || stratifiedContexts == null;
    // if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
    if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0) {
        return emptyCallContext(features, refContext, rawContext, header);
    }
    final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
    final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
    VariantContext reducedVC = vc;
    if (maxAltAlleles < vc.getAlternateAlleles().size()) {
        final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
        final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
        reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
    }
    final AFCalculator afCalculator = configuration.genotypeArgs.USE_NEW_AF_CALCULATOR ? newAFCalculator : afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles);
    final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc, defaultPloidy, model));
    final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);
    // posterior probability that at least one alt allele exists in the samples
    final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
    // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
    final double log10Confidence = !outputAlternativeAlleles.siteIsMonomorphic || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 : AFresult.getLog10PosteriorOfAFGT0() + 0.0;
    // Add 0.0 removes -0.0 occurrences.
    final double phredScaledConfidence = (-10.0 * log10Confidence) + 0.0;
    // skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs
    if (!passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) && !forceSiteEmission() && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) {
        // technically, at this point our confidence in a reference call isn't accurately estimated
        //  because it didn't take into account samples with no data, so let's get a better estimate
        final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model);
        final int INDEX_FOR_AC_EQUALS_1 = 1;
        return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele);
    }
    // start constructing the resulting VC
    final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
    final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), vc.getContig(), vc.getStart(), vc.getEnd(), outputAlleles);
    builder.log10PError(log10Confidence);
    if (!passesCallThreshold(phredScaledConfidence)) {
        builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME);
    }
    // create the genotypes
    //TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
    final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) : AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0));
    // calculating strand bias involves overwriting data structures, so we do it last
    final Map<String, Object> attributes = composeCallAttributes(inheritAttributesFromInputVC, vc, rawContext, stratifiedContexts, features, refContext, outputAlternativeAlleles.alternativeAlleleMLECounts(), outputAlternativeAlleles.siteIsMonomorphic, AFresult, outputAlternativeAlleles.outputAlleles(vc.getReference()), genotypes, model, likelihoods);
    VariantContext vcCall = builder.genotypes(genotypes).attributes(attributes).make();
    if (annotationEngine != null && !limitedContext) {
        // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
        // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
        final ReadPileup pileup = rawContext.getBasePileup();
        stratifiedContexts = AlignmentContext.splitContextBySampleName(pileup, header);
        vcCall = annotationEngine.annotateContext(vcCall, features, refContext, likelihoods, a -> true);
    }
    // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
    if (// limitedContext callers need to handle allele trimming on their own to keep their alleles in sync
    outputAlleles.size() != vc.getAlleles().size() && !limitedContext) {
        vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
    }
    return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, probOfAtLeastOneAltAllele));
}
Also used : java.util(java.util) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) GATKVCFConstants(org.broadinstitute.hellbender.utils.variant.GATKVCFConstants) GATKVariantContextUtils(org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils) SampleList(org.broadinstitute.hellbender.utils.genotyper.SampleList) htsjdk.variant.variantcontext(htsjdk.variant.variantcontext) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Collectors(java.util.stream.Collectors) ReadLikelihoods(org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) GATKVCFHeaderLines(org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines) VariantAnnotatorEngine(org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) LogManager(org.apache.logging.log4j.LogManager) org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc(org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc) VCFConstants(htsjdk.variant.vcf.VCFConstants) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException) org.broadinstitute.hellbender.utils(org.broadinstitute.hellbender.utils) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup)

Example 27 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.

the class PileupUnitTest method testInsertLengthOutput.

@Test
public void testInsertLengthOutput() throws Exception {
    // create two artifitial reads
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final SimpleInterval loc = new SimpleInterval("1:1");
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setFragmentLength(100);
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setFragmentLength(50);
    // generate the pileups
    final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1, read2), 1);
    // test one read
    Assert.assertEquals(Pileup.insertLengthOutput(pileup.makeFilteredPileup(p -> p.getRead().getName().equals("read1"))), "100");
    Assert.assertEquals(Pileup.insertLengthOutput(pileup.makeFilteredPileup(p -> p.getRead().getName().equals("read2"))), "50");
    // test two reads
    Assert.assertEquals(Pileup.insertLengthOutput(pileup), "100,50");
    // test an empty pileup
    Assert.assertEquals(Pileup.insertLengthOutput(new ReadPileup(loc)), "");
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 28 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.

the class LocusIteratorByState method lazyLoadNextAlignmentContext.

/**
     * Creates the next alignment context from the given state.  Note that this is implemented as a
     * lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the
     * next entry.
     */
private void lazyLoadNextAlignmentContext() {
    while (nextAlignmentContext == null && readStates.hasNext()) {
        readStates.collectPendingReads();
        final Locatable location = getLocation();
        final Map<String, ReadPileup> fullPileupPerSample = new LinkedHashMap<>();
        for (final Map.Entry<String, PerSampleReadStateManager> sampleStatePair : readStates) {
            final String sample = sampleStatePair.getKey();
            final PerSampleReadStateManager readState = sampleStatePair.getValue();
            final Iterator<AlignmentStateMachine> iterator = readState.iterator();
            final List<PileupElement> pile = new ArrayList<>(readState.size());
            while (iterator.hasNext()) {
                // state object with the read/offset information
                final AlignmentStateMachine state = iterator.next();
                final GATKRead read = state.getRead();
                final CigarOperator op = state.getCigarOperator();
                if (!includeReadsWithNsAtLoci && op == CigarOperator.N) {
                    continue;
                }
                if (!dontIncludeReadInPileup(read, location.getStart())) {
                    if (!includeReadsWithDeletionAtLoci && op == CigarOperator.D) {
                        continue;
                    }
                    pile.add(state.makePileupElement());
                }
            }
            if (!pile.isEmpty()) {
                // if this pileup added at least one base, add it to the full pileup
                fullPileupPerSample.put(sample, new ReadPileup(location, pile));
            }
        }
        // critical - must be called after we get the current state offsets and location
        readStates.updateReadStates();
        if (!fullPileupPerSample.isEmpty()) {
            // if we got reads with non-D/N over the current position, we are done
            nextAlignmentContext = new AlignmentContext(location, new ReadPileup(location, fullPileupPerSample));
        }
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) PileupElement(org.broadinstitute.hellbender.utils.pileup.PileupElement) CigarOperator(htsjdk.samtools.CigarOperator) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) Locatable(htsjdk.samtools.util.Locatable)

Example 29 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.

the class AlignmentContext method splitContextBySampleName.

/**
     * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referenced by sample name instead
     * of sample object.
     *
     * @param assumedSingleSample If non-null, assume this is the only sample in our pileup and return immediately.
     *                            If null, get the list of samples from the provided header and do the work of splitting by sample.
     * @return a Map of sample name to StratifiedAlignmentContext; samples without coverage are not included
     **/
public Map<String, AlignmentContext> splitContextBySampleName(final String assumedSingleSample, final SAMFileHeader header) {
    if (assumedSingleSample != null) {
        return Collections.singletonMap(assumedSingleSample, this);
    }
    final Locatable loc = this.getLocation();
    // this will throw an user error if there are samples without RG/sampleName
    final Map<String, ReadPileup> pileups = this.getBasePileup().splitBySample(header, assumedSingleSample);
    final Map<String, AlignmentContext> contexts = new LinkedHashMap<>(pileups.size());
    for (final Map.Entry<String, ReadPileup> entry : pileups.entrySet()) {
        // Don't add empty pileups to the split context.
        if (entry.getValue().isEmpty()) {
            continue;
        }
        contexts.put(entry.getKey(), new AlignmentContext(loc, entry.getValue()));
    }
    return contexts;
}
Also used : ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Locatable(htsjdk.samtools.util.Locatable) LinkedHashMap(java.util.LinkedHashMap)

Example 30 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.

the class AlignmentContextUnitTest method testNoSample.

@Test(expectedExceptions = UserException.ReadMissingReadGroup.class)
public void testNoSample() throws Exception {
    final SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    final Locatable loc = new SimpleInterval("chr1", 1, 1);
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setReadGroup(readGroupOne.getId());
    final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1), 1);
    final AlignmentContext ac = new AlignmentContext(loc, pileup);
    ac.splitContextBySampleName(header);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Locatable(htsjdk.samtools.util.Locatable) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)36 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)24 Test (org.testng.annotations.Test)19 AlignmentContext (org.broadinstitute.hellbender.engine.AlignmentContext)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 SAMFileHeader (htsjdk.samtools.SAMFileHeader)11 Locatable (htsjdk.samtools.util.Locatable)11 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)9 PileupElement (org.broadinstitute.hellbender.utils.pileup.PileupElement)7 java.util (java.util)5 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 Collectors (java.util.stream.Collectors)4 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)3 FeatureContext (org.broadinstitute.hellbender.engine.FeatureContext)3 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 CigarOperator (htsjdk.samtools.CigarOperator)2 htsjdk.variant.variantcontext (htsjdk.variant.variantcontext)2 htsjdk.variant.vcf (htsjdk.variant.vcf)2 VCFConstants (htsjdk.variant.vcf.VCFConstants)2