Search in sources :

Example 21 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class ReadThreadingAssembler method createGraph.

/**
     * Creates the sequence graph for the given kmerSize
     *
     * @param reads            reads to use
     * @param refHaplotype     reference haplotype
     * @param kmerSize         kmer size
     * @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph
     * @param allowLowComplexityGraphs if true, do not check for low-complexity graphs
     * @param allowNonUniqueKmersInRef if true, do not fail if the reference has non-unique kmers
     * @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity)
     */
private AssemblyResult createGraph(final Iterable<GATKRead> reads, final Haplotype refHaplotype, final int kmerSize, final Iterable<Haplotype> activeAlleleHaplotypes, final boolean allowLowComplexityGraphs, final boolean allowNonUniqueKmersInRef, final SAMFileHeader header) {
    if (refHaplotype.length() < kmerSize) {
        // happens in cases where the assembled region is just too small
        return new AssemblyResult(AssemblyResult.Status.FAILED, null, null);
    }
    if (!allowNonUniqueKmersInRef && !ReadThreadingGraph.determineNonUniqueKmers(new ReadThreadingGraph.SequenceForKmers("ref", refHaplotype.getBases(), 0, refHaplotype.getBases().length, 1, true), kmerSize).isEmpty()) {
        if (debug) {
            logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because reference contains non-unique kmers");
        }
        return null;
    }
    final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, MIN_BASE_QUALITY_TO_USE_IN_ASSEMBLY, numPruningSamples);
    rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingBranches);
    // add the reference sequence to the graph
    rtgraph.addSequence("ref", refHaplotype.getBases(), true);
    // add the artificial GGA haplotypes to the graph
    int hapCount = 0;
    for (final Haplotype h : activeAlleleHaplotypes) {
        rtgraph.addSequence("activeAllele" + hapCount++, h.getBases(), GGA_MODE_ARTIFICIAL_COUNTS, false);
    }
    // Next pull kmers out of every read and throw them on the graph
    for (final GATKRead read : reads) {
        rtgraph.addRead(read, header);
    }
    // actually build the read threading graph
    rtgraph.buildGraphIfNecessary();
    // sanity check: make sure there are no cycles in the graph
    if (rtgraph.hasCycles()) {
        if (debug) {
            logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it contains a cycle");
        }
        return null;
    }
    // sanity check: make sure the graph had enough complexity with the given kmer
    if (!allowLowComplexityGraphs && rtgraph.isLowComplexity()) {
        if (debug) {
            logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it does not produce a graph with enough complexity");
        }
        return null;
    }
    return getAssemblyResult(refHaplotype, kmerSize, rtgraph);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AssemblyResult(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 22 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class ReadThreadingAssembler method composeGivenHaplotypes.

/**
     * Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype
     *
     * @param refHaplotype the reference haplotype
     * @param givenHaplotypes the list of alternate alleles in VariantContexts
     * @param activeRegionWindow the window containing the reference haplotype
     *
     * @return a non-null list of haplotypes
     */
private static List<Haplotype> composeGivenHaplotypes(final Haplotype refHaplotype, final List<VariantContext> givenHaplotypes, final SimpleInterval activeRegionWindow) {
    Utils.nonNull(refHaplotype, "the reference haplotype cannot be null");
    Utils.nonNull(givenHaplotypes, "given haplotypes cannot be null");
    Utils.nonNull(activeRegionWindow, "active region window cannot be null");
    Utils.validateArg(activeRegionWindow.size() == refHaplotype.length(), "inconsistent reference haplotype and active region window");
    final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
    final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
    for (final VariantContext compVC : givenHaplotypes) {
        Utils.validateArg(GATKVariantContextUtils.overlapsRegion(compVC, activeRegionWindow), " some variant provided does not overlap with active region window");
        for (final Allele compAltAllele : compVC.getAlternateAlleles()) {
            final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
            if (insertedRefHaplotype != null) {
                // can be null if the requested allele can't be inserted into the haplotype
                returnHaplotypes.add(insertedRefHaplotype);
            }
        }
    }
    return new ArrayList<>(returnHaplotypes);
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) VariantContext(htsjdk.variant.variantcontext.VariantContext) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 23 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class HaplotypeCallerGenotypingEngine method cleanUpSymbolicUnassembledEvents.

/**
     * Removes symbolic events from list of haplotypes
     * @param haplotypes       Input/output list of haplotypes, before/after removal
     */
// TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
protected static void cleanUpSymbolicUnassembledEvents(final List<Haplotype> haplotypes) {
    Utils.nonNull(haplotypes);
    final List<Haplotype> haplotypesToRemove = new ArrayList<>();
    for (final Haplotype h : haplotypes) {
        for (final VariantContext vc : h.getEventMap().getVariantContexts()) {
            if (vc.isSymbolic()) {
                for (final Haplotype h2 : haplotypes) {
                    for (final VariantContext vc2 : h2.getEventMap().getVariantContexts()) {
                        if (vc.getStart() == vc2.getStart() && (vc2.isIndel() || vc2.isMNP())) {
                            // unfortunately symbolic alleles can't currently be combined with non-point events
                            haplotypesToRemove.add(h);
                            break;
                        }
                    }
                }
            }
        }
    }
    haplotypes.removeAll(haplotypesToRemove);
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 24 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class ReadThreadingAssembler method findBestPaths.

private List<Haplotype> findBestPaths(final Collection<SeqGraph> graphs, final Haplotype refHaplotype, final SimpleInterval refLoc, final SimpleInterval activeRegionWindow, final Map<SeqGraph, AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
    // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
    final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
    final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
    final Collection<KBestHaplotypeFinder> finders = new ArrayList<>(graphs.size());
    int failedCigars = 0;
    for (final SeqGraph graph : graphs) {
        final SeqVertex source = graph.getReferenceSourceVertex();
        final SeqVertex sink = graph.getReferenceSinkVertex();
        Utils.validateArg(source != null && sink != null, () -> "Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph " + graph);
        final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph, source, sink);
        finders.add(haplotypeFinder);
        final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
        while (bestHaplotypes.hasNext()) {
            final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
            final Haplotype h = kBestHaplotype.haplotype();
            if (!returnHaplotypes.contains(h)) {
                final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), h.getBases());
                if (cigar == null) {
                    // couldn't produce a meaningful alignment of haplotype to reference, fail quietly
                    failedCigars++;
                    continue;
                } else if (cigar.isEmpty()) {
                    throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
                } else if (pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < MIN_HAPLOTYPE_REFERENCE_LENGTH) {
                    // N cigar elements means that a bubble was too divergent from the reference so skip over this path
                    continue;
                } else if (cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength()) {
                    // SW failure
                    throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength() + " ref = " + refHaplotype + " path " + new String(h.getBases()));
                }
                h.setCigar(cigar);
                h.setAlignmentStartHapwrtRef(activeRegionStart);
                h.setGenomeLocation(activeRegionWindow);
                returnHaplotypes.add(h);
                assemblyResultSet.add(h, assemblyResultByGraph.get(graph));
                if (debug) {
                    logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize());
                }
            }
        }
    }
    // the first returned by any finder.
    if (!returnHaplotypes.contains(refHaplotype)) {
        double refScore = Double.NaN;
        for (final KBestHaplotypeFinder finder : finders) {
            final double candidate = finder.score(refHaplotype);
            if (Double.isNaN(candidate)) {
                continue;
            }
            refScore = candidate;
            break;
        }
        refHaplotype.setScore(refScore);
        returnHaplotypes.add(refHaplotype);
    }
    if (failedCigars != 0) {
        logger.debug(String.format("failed to align some haplotypes (%d) back to the reference (loc=%s); these will be ignored.", failedCigars, refLoc.toString()));
    }
    if (debug) {
        if (returnHaplotypes.size() > 1) {
            logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
        } else {
            logger.info("Found only the reference haplotype in the assembly graph.");
        }
        for (final Haplotype h : returnHaplotypes) {
            logger.info(h.toString());
            logger.info("> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() + " ref " + h.isReference());
        }
    }
    return new ArrayList<>(returnHaplotypes);
}
Also used : Cigar(htsjdk.samtools.Cigar) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 25 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class ReferenceConfidenceModelUnitTest method testRefConfidencePartialReads.

@Test
public void testRefConfidencePartialReads() {
    final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples, 2);
    final IndependentSampleGenotypesModel genotypingModel = new IndependentSampleGenotypesModel();
    final String ref = "ACGTAACCGGTT";
    for (int readLen = 3; readLen < ref.length(); readLen++) {
        for (int start = 0; start < ref.length() - readLen; start++) {
            final RefConfData data = new RefConfData(ref, 0);
            final List<Haplotype> haplotypes = Arrays.asList(data.getRefHap());
            final List<VariantContext> calls = Collections.emptyList();
            data.getActiveRegion().add(data.makeRead(start, readLen));
            final ReadLikelihoods<Haplotype> likelihoods = createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
            final List<Integer> expectedDPs = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getSpan().size(), 0));
            for (int i = start; i < readLen + start; i++) expectedDPs.set(i, 1);
            final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, calls);
            checkReferenceModelResult(data, contexts, expectedDPs, calls);
        }
    }
}
Also used : IndependentSampleGenotypesModel(org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel) VariantContext(htsjdk.variant.variantcontext.VariantContext) HomogeneousPloidyModel(org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel) PloidyModel(org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel) HomogeneousPloidyModel(org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)88 Test (org.testng.annotations.Test)31 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 DataProvider (org.testng.annotations.DataProvider)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)8 ArrayList (java.util.ArrayList)7 Cigar (htsjdk.samtools.Cigar)6 AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)6 HomogeneousPloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel)6 IndependentSampleGenotypesModel (org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel)6 PloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel)6 ReadThreadingGraph (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph)6 SampleList (org.broadinstitute.hellbender.utils.genotyper.SampleList)6 EventMap (org.broadinstitute.hellbender.utils.haplotype.EventMap)6 CigarElement (htsjdk.samtools.CigarElement)4 Allele (htsjdk.variant.variantcontext.Allele)4 File (java.io.File)4