Search in sources :

Example 51 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class ReadThreadingAssembler method composeGivenHaplotypes.

/**
     * Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype
     *
     * @param refHaplotype the reference haplotype
     * @param givenHaplotypes the list of alternate alleles in VariantContexts
     * @param activeRegionWindow the window containing the reference haplotype
     *
     * @return a non-null list of haplotypes
     */
private static List<Haplotype> composeGivenHaplotypes(final Haplotype refHaplotype, final List<VariantContext> givenHaplotypes, final SimpleInterval activeRegionWindow) {
    Utils.nonNull(refHaplotype, "the reference haplotype cannot be null");
    Utils.nonNull(givenHaplotypes, "given haplotypes cannot be null");
    Utils.nonNull(activeRegionWindow, "active region window cannot be null");
    Utils.validateArg(activeRegionWindow.size() == refHaplotype.length(), "inconsistent reference haplotype and active region window");
    final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
    final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
    for (final VariantContext compVC : givenHaplotypes) {
        Utils.validateArg(GATKVariantContextUtils.overlapsRegion(compVC, activeRegionWindow), " some variant provided does not overlap with active region window");
        for (final Allele compAltAllele : compVC.getAlternateAlleles()) {
            final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
            if (insertedRefHaplotype != null) {
                // can be null if the requested allele can't be inserted into the haplotype
                returnHaplotypes.add(insertedRefHaplotype);
            }
        }
    }
    return new ArrayList<>(returnHaplotypes);
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) VariantContext(htsjdk.variant.variantcontext.VariantContext) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 52 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class HaplotypeCallerGenotypingEngine method cleanUpSymbolicUnassembledEvents.

/**
     * Removes symbolic events from list of haplotypes
     * @param haplotypes       Input/output list of haplotypes, before/after removal
     */
// TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
protected static void cleanUpSymbolicUnassembledEvents(final List<Haplotype> haplotypes) {
    Utils.nonNull(haplotypes);
    final List<Haplotype> haplotypesToRemove = new ArrayList<>();
    for (final Haplotype h : haplotypes) {
        for (final VariantContext vc : h.getEventMap().getVariantContexts()) {
            if (vc.isSymbolic()) {
                for (final Haplotype h2 : haplotypes) {
                    for (final VariantContext vc2 : h2.getEventMap().getVariantContexts()) {
                        if (vc.getStart() == vc2.getStart() && (vc2.isIndel() || vc2.isMNP())) {
                            // unfortunately symbolic alleles can't currently be combined with non-point events
                            haplotypesToRemove.add(h);
                            break;
                        }
                    }
                }
            }
        }
    }
    haplotypes.removeAll(haplotypesToRemove);
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 53 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class ReadThreadingAssembler method runLocalAssembly.

/**
     * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
     * @param assemblyRegion              AssemblyRegion object holding the reads which are to be used during assembly
     * @param refHaplotype              reference haplotype object
     * @param fullReferenceWithPadding  byte array holding the reference sequence with padding
     * @param refLoc                    GenomeLoc object corresponding to the reference sequence with padding
     * @param givenAlleles   the alleles to inject into the haplotypes during GGA mode
     * @param readErrorCorrector        a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
     * @return                          the resulting assembly-result-set
     */
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
    Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
    Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
    Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
    Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
    Utils.nonNull(refLoc, "refLoc");
    Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
    ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
    // create the list of artificial haplotypes that should be added to the graph for GGA mode
    final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
    // error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
    final List<GATKRead> correctedReads;
    if (readErrorCorrector != null) {
        // now correct all reads in active region after filtering/downsampling
        // Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
        // and we only want the read-error corrected reads for graph building.
        readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
        correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
    } else {
        correctedReads = assemblyRegion.getReads();
    }
    final List<SeqGraph> nonRefGraphs = new LinkedList<>();
    final AssemblyResultSet resultSet = new AssemblyResultSet();
    resultSet.setRegionForGenotyping(assemblyRegion);
    resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
    resultSet.setPaddedReferenceLoc(refLoc);
    final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
    refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
    resultSet.add(refHaplotype);
    final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
    // create the graphs by calling our subclass assemble method
    for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
        if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
            // do some QC on the graph
            sanityCheckGraph(result.getGraph(), refHaplotype);
            // add it to graphs with meaningful non-reference features
            assemblyResultByGraph.put(result.getGraph(), result);
            nonRefGraphs.add(result.getGraph());
        }
    }
    findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
    // print the graphs if the appropriate debug option has been turned on
    if (graphOutputPath != null) {
        printGraphs(nonRefGraphs);
    }
    return resultSet;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) AssemblyResult(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 54 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class AlignmentUtilsUnitTest method makeReadAlignedToRefData.

@DataProvider(name = "ReadAlignedToRefData")
public Object[][] makeReadAlignedToRefData() {
    List<Object[]> tests = new ArrayList<>();
    final String hapBases = "ACTGAAGGTTCC";
    final Haplotype allM = makeHaplotypeForAlignedToRefTest(hapBases, hapBases.length() + "M");
    // make sure we get back a cigar of the right length
    for (int i = -1; i < hapBases.length(); i++) {
        final GATKRead read = makeReadForAlignedToRefTest(hapBases);
        final byte[] bases = read.getBases();
        if (i != -1) {
            bases[i] = (byte) 'A';
        }
        read.setBases(bases);
        tests.add(new Object[] { read, allM, 10, 10, allM.getCigar().toString() });
    }
    // make sure insertions at the front are correctly handled
    for (int padFront = 1; padFront < 10; padFront++) {
        final GATKRead read = makeReadForAlignedToRefTest(Utils.dupString("N", padFront) + hapBases);
        tests.add(new Object[] { read, allM, 10, 10, padFront + "I" + allM.getCigar().toString() });
    }
    // make sure insertions at the back are correctly handled
    for (int padBack = 1; padBack < 10; padBack++) {
        final GATKRead read = makeReadForAlignedToRefTest(hapBases + Utils.dupString("N", padBack));
        tests.add(new Object[] { read, allM, 10, 10, allM.getCigar().toString() + padBack + "I" });
    }
    // make sure refStart and hapStart are respected
    for (int refStart = 1; refStart < 10; refStart++) {
        for (int hapStart = refStart; hapStart < 10 + refStart; hapStart++) {
            final Haplotype hap = new Haplotype(allM.getBases());
            hap.setCigar(allM.getCigar());
            hap.setAlignmentStartHapwrtRef(hapStart);
            final GATKRead read = makeReadForAlignedToRefTest(new String(hap.getBases()));
            tests.add(new Object[] { read, hap, refStart, refStart + hapStart, allM.getCigar().toString() });
        }
    }
    // example case of bad alignment because SW doesn't necessarily left-align indels
    {
        final String hap = "ACTGTGGGTTCCTCTTATTTTATTTCTACATCAATGTTCATATTTAACTTATTATTTTATCTTATTTTTAAATTTCTTTTATGTTGAGCCTTGATGAAAGCCATAGGTTCTCTCATATAATTGTATGTGTATGTATGTATATGTACATAATATATACATATATGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTGTATTACATAATATATACATATATGTATATATTATGTATATGTACATAATATATACATATATG";
        final String hapCigar = "399M";
        final String readBases = "ATGTACATAATATATACATATATGTATATGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTGTATTACATAATATATACATATATGTATATATTATGTATATGTACATAATAT";
        final GATKRead read = makeReadForAlignedToRefTest(readBases);
        final int refStart = 10130100;
        final int hapStart = 500;
        final String badCigar = "31M6D211M";
        final String goodCigar = "28M6D214M";
        final Haplotype badHap = new Haplotype(hap.getBytes());
        badHap.setCigar(TextCigarCodec.decode(hapCigar));
        badHap.setAlignmentStartHapwrtRef(hapStart);
        final int expectedPos = 10130740;
        tests.add(new Object[] { read, badHap, refStart, expectedPos, goodCigar });
    }
    return tests.toArray(new Object[][] {});
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) DataProvider(org.testng.annotations.DataProvider)

Example 55 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class AlignmentUtilsUnitTest method makeHaplotypeForAlignedToRefTest.

/******************************************************
     * Tests for AlignmentUtils.createReadAlignedToRef()
     ******************************************************/
private Haplotype makeHaplotypeForAlignedToRefTest(final String bases, final String cigar) {
    final Haplotype hap = new Haplotype(bases.getBytes());
    hap.setCigar(TextCigarCodec.decode(cigar));
    return hap;
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Aggregations

Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)88 Test (org.testng.annotations.Test)31 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 DataProvider (org.testng.annotations.DataProvider)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)8 ArrayList (java.util.ArrayList)7 Cigar (htsjdk.samtools.Cigar)6 AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)6 HomogeneousPloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel)6 IndependentSampleGenotypesModel (org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel)6 PloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel)6 ReadThreadingGraph (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph)6 SampleList (org.broadinstitute.hellbender.utils.genotyper.SampleList)6 EventMap (org.broadinstitute.hellbender.utils.haplotype.EventMap)6 CigarElement (htsjdk.samtools.CigarElement)4 Allele (htsjdk.variant.variantcontext.Allele)4 File (java.io.File)4