Search in sources :

Example 16 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.

the class ASEReadCounter method apply.

@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) {
    final String contig = alignmentContext.getContig();
    final long position = alignmentContext.getPosition();
    final char refAllele = (char) referenceContext.getBase();
    final List<VariantContext> VCs = featureContext.getValues(variants);
    if (VCs != null && VCs.size() > 1) {
        throw new UserException("More then one variant context at position: " + contig + ":" + position);
    }
    if (VCs == null || VCs.isEmpty()) {
        return;
    }
    final VariantContext vc = VCs.get(0);
    if (!vc.isBiallelic()) {
        logger.warn("Ignoring site: cannot run ASE on non-biallelic sites: " + vc.toString());
        return;
    }
    if (vc.getHetCount() < 1) {
        logger.warn("Ignoring site: variant is not het at postion: " + contig + ":" + position);
        return;
    }
    if (vc.getNAlleles() == 1 || vc.getAlternateAllele(0).getBases().length == 0) {
        throw new UserException("The file of variant sites must contain heterozygous sites and cannot be a GVCF file containing <NON_REF> alleles.");
    }
    final char altAllele = (char) vc.getAlternateAllele(0).getBases()[0];
    final String siteID = vc.getID();
    final ReadPileup pileup = filterPileup(alignmentContext.getBasePileup(), countType);
    // count up the depths of all and QC+ bases
    final String line = calculateLineForSite(pileup, siteID, refAllele, altAllele);
    if (line != null) {
        outputStream.println(line);
    }
}
Also used : ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) VariantContext(htsjdk.variant.variantcontext.VariantContext) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 17 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModelUnitTest method testCalcNIndelInformativeReads.

@Test(dataProvider = "CalcNIndelInformativeReadsData")
public void testCalcNIndelInformativeReads(final String readBases, final String ref, final int maxIndelSize, final List<Integer> expected) {
    final byte qual = (byte) 30;
    final byte[] quals = Utils.dupBytes(qual, readBases.length());
    for (int i = 0; i < readBases.getBytes().length; i++) {
        final GATKRead read = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), quals, readBases.length() + "M");
        final SimpleInterval loc = new SimpleInterval("20", i + 1, i + 1);
        final ReadPileup pileup = new ReadPileup(loc, Collections.singletonList(read), i);
        final int actual = model.calcNIndelInformativeReads(pileup, i, ref.getBytes(), maxIndelSize);
        Assert.assertEquals(actual, (int) expected.get(i), "failed at position " + i);
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 18 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk by broadinstitute.

the class HaplotypeCallerGenotypingEngineUnitTest method testAddMiscellaneousAllele.

@Test(dataProvider = "AddMiscellaneousDataProvider", enabled = false)
public void testAddMiscellaneousAllele(final String readBases, final int readOffset, final String ref, final int refOffset, final String referenceAllele, final String[] alternatives, final double[] likelihoods, final double[] expected) {
    final byte baseQual = (byte) 30;
    final byte[] baseQuals = Utils.dupBytes(baseQual, readBases.length());
    final GATKRead read = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), baseQuals, readBases.length() + "M");
    final Locatable loc = new SimpleInterval("20", refOffset, refOffset);
    final ReadPileup pileup = new ReadPileup(loc, Collections.singletonList(read), readOffset);
    final VariantContextBuilder vcb = new VariantContextBuilder();
    final GenotypeBuilder gb = new GenotypeBuilder();
    final List<String> alleleStrings = new ArrayList<>(1 + alternatives.length);
    alleleStrings.add(referenceAllele);
    alleleStrings.addAll(Arrays.asList(alternatives));
    gb.AD(new int[] { 1 });
    gb.DP(1);
    gb.PL(likelihoods);
    vcb.alleles(alleleStrings);
    vcb.loc("20", refOffset, refOffset + referenceAllele.length() - 1);
    vcb.genotypes(gb.make());
    final VariantContext vc = vcb.make();
    // GenotypingEngine.addMiscellaneousAllele(vc,pileup,ref.getBytes(),0);
    final VariantContext updatedVc = null;
    final GenotypeLikelihoods updatedLikelihoods = updatedVc.getGenotype(0).getLikelihoods();
    Assert.assertEquals(updatedLikelihoods.getAsVector().length, expected.length);
    final double[] updatedLikelihoodsArray = updatedVc.getGenotype(0).getLikelihoods().getAsVector();
    for (int i = 0; i < updatedLikelihoodsArray.length; i++) {
        Assert.assertEquals(updatedLikelihoodsArray[i], expected[i], 0.0001);
    }
    Allele altAllele = null;
    for (final Allele allele : updatedVc.getAlleles()) if (allele.isSymbolic() && allele.getBaseString().equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME))
        altAllele = allele;
    Assert.assertNotNull(altAllele);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 19 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModel method getPileupsOverReference.

/**
     * Get a list of pileups that span the entire active region span, in order, one for each position
     */
private List<ReadPileup> getPileupsOverReference(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final SimpleInterval activeRegionSpan, final ReadLikelihoods<Haplotype> readLikelihoods) {
    Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
    Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
    final List<GATKRead> reads = activeRegion.getReads();
    final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, samples.asSetOfSamples(), activeRegion.getHeader(), true);
    final int startPos = activeRegionSpan.getStart();
    final List<ReadPileup> pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos);
    AlignmentContext next = libs.advanceToLocus(startPos, true);
    for (int curPos = startPos; curPos <= activeRegionSpan.getEnd(); curPos++) {
        if (next != null && next.getLocation().getStart() == curPos) {
            pileups.add(next.getBasePileup());
            next = libs.hasNext() ? libs.next() : null;
        } else {
            // no data, so we create empty pileups
            pileups.add(new ReadPileup(new SimpleInterval(activeRegionSpan.getContig(), curPos, curPos)));
        }
    }
    return pileups;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) AlignmentContext(org.broadinstitute.hellbender.engine.AlignmentContext) ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 20 with ReadPileup

use of org.broadinstitute.hellbender.utils.pileup.ReadPileup in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModel method calculateRefConfidence.

/**
     * Calculate the reference confidence for a single sample given the its read data
     *
     * Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
     * detailed information about the certainty that the sample is hom-ref for each base in the region.
     *
     *
     *
     * @param refHaplotype the reference haplotype, used to get the reference bases across activeRegion.getLoc()
     * @param calledHaplotypes a list of haplotypes that segregate in this region, for realignment of the reads in the
     *                         readLikelihoods, corresponding to each reads best haplotype.  Must contain the refHaplotype.
     * @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
     * @param activeRegion the active region we want to get the reference confidence over
     * @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
     * @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
     * @param variantCalls calls made in this region.  The return result will contain any variant call in this list in the
     *                     correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
     *                     under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
     * @return an ordered list of variant contexts that spans activeRegion.getLoc() and includes both reference confidence
     *         contexts as well as calls from variantCalls if any were provided
     */
public List<VariantContext> calculateRefConfidence(final Haplotype refHaplotype, final Collection<Haplotype> calledHaplotypes, final SimpleInterval paddedReferenceLoc, final AssemblyRegion activeRegion, final ReadLikelihoods<Haplotype> readLikelihoods, final PloidyModel ploidyModel, final List<VariantContext> variantCalls) {
    Utils.nonNull(refHaplotype, "refHaplotype cannot be null");
    Utils.nonNull(calledHaplotypes, "calledHaplotypes cannot be null");
    Utils.validateArg(calledHaplotypes.contains(refHaplotype), "calledHaplotypes must contain the refHaplotype");
    Utils.nonNull(paddedReferenceLoc, "paddedReferenceLoc cannot be null");
    Utils.nonNull(activeRegion, "activeRegion cannot be null");
    Utils.nonNull(readLikelihoods, "readLikelihoods cannot be null");
    Utils.validateArg(readLikelihoods.numberOfSamples() == 1, () -> "readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.numberOfSamples());
    Utils.validateArg(refHaplotype.length() == activeRegion.getExtendedSpan().size(), () -> "refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getSpan().size() + " are different");
    Utils.nonNull(ploidyModel, "the ploidy model cannot be null");
    // the first sample = the only sample in reference-confidence mode.
    final int ploidy = ploidyModel.samplePloidy(0);
    final SimpleInterval refSpan = activeRegion.getSpan();
    final List<ReadPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
    final byte[] ref = refHaplotype.getBases();
    final List<VariantContext> results = new ArrayList<>(refSpan.size());
    final String sampleName = readLikelihoods.getSample(0);
    final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedSpan().getStart();
    for (final ReadPileup pileup : refPileups) {
        final Locatable curPos = pileup.getLocation();
        final int offset = curPos.getStart() - refSpan.getStart();
        final VariantContext overlappingSite = getOverlappingVariantContext(curPos, variantCalls);
        if (overlappingSite != null && overlappingSite.getStart() == curPos.getStart()) {
            results.add(overlappingSite);
        } else {
            // otherwise emit a reference confidence variant context
            results.add(makeReferenceConfidenceVariantContext(ploidy, ref, sampleName, globalRefOffset, pileup, curPos, offset));
        }
    }
    return results;
}
Also used : ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)36 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)24 Test (org.testng.annotations.Test)19 AlignmentContext (org.broadinstitute.hellbender.engine.AlignmentContext)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 SAMFileHeader (htsjdk.samtools.SAMFileHeader)11 Locatable (htsjdk.samtools.util.Locatable)11 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)9 PileupElement (org.broadinstitute.hellbender.utils.pileup.PileupElement)7 java.util (java.util)5 VariantContext (htsjdk.variant.variantcontext.VariantContext)4 Collectors (java.util.stream.Collectors)4 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)3 FeatureContext (org.broadinstitute.hellbender.engine.FeatureContext)3 ReferenceContext (org.broadinstitute.hellbender.engine.ReferenceContext)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 CigarOperator (htsjdk.samtools.CigarOperator)2 htsjdk.variant.variantcontext (htsjdk.variant.variantcontext)2 htsjdk.variant.vcf (htsjdk.variant.vcf)2 VCFConstants (htsjdk.variant.vcf.VCFConstants)2