Search in sources :

Example 36 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class HaplotypeCallerGenotypingEngineUnitTest method makeConstructPhaseSetMappingData.

@DataProvider(name = "ConstructPhaseSetMappingProvider")
public Object[][] makeConstructPhaseSetMappingData() {
    List<Object[]> tests = new ArrayList<Object[]>();
    final Allele ref = Allele.create("A", true);
    final Allele altC = Allele.create("C", false);
    final Allele altT = Allele.create("T", false);
    final VariantContext vc1 = new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altC)).make();
    final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make();
    final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make();
    final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make();
    final List<VariantContext> calls = Arrays.asList(vc2, vc3, vc4);
    final Haplotype pos1 = new Haplotype("CAAAA".getBytes());
    pos1.setEventMap(new EventMap(Arrays.asList(vc1)));
    pos1.getEventMap().put(1, vc1);
    final Haplotype pos2 = new Haplotype("ACAAA".getBytes());
    pos2.setEventMap(new EventMap(Arrays.asList(vc2)));
    pos2.getEventMap().put(2, vc2);
    final Haplotype pos3 = new Haplotype("AACAA".getBytes());
    pos3.setEventMap(new EventMap(Arrays.asList(vc3)));
    pos3.getEventMap().put(3, vc3);
    final Haplotype pos4 = new Haplotype("AAACA".getBytes());
    pos4.setEventMap(new EventMap(Arrays.asList(vc4)));
    pos4.getEventMap().put(4, vc4);
    final Haplotype pos24 = new Haplotype("ACACA".getBytes());
    pos24.setEventMap(new EventMap(Arrays.asList(vc2, vc4)));
    pos24.getEventMap().put(2, vc2);
    pos24.getEventMap().put(4, vc4);
    final Haplotype pos34 = new Haplotype("AACCA".getBytes());
    pos34.setEventMap(new EventMap(Arrays.asList(vc3, vc4)));
    pos34.getEventMap().put(3, vc3);
    pos34.getEventMap().put(4, vc4);
    final Haplotype pos234 = new Haplotype("ACCCA".getBytes());
    pos234.setEventMap(new EventMap(Arrays.asList(vc2, vc3, vc4)));
    pos234.getEventMap().put(2, vc2);
    pos234.getEventMap().put(3, vc3);
    pos234.getEventMap().put(4, vc4);
    final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>();
    // test no phased variants #1
    final Set<Haplotype> haplotypes2 = new HashSet<>();
    haplotypes2.add(pos2);
    haplotypeMap.put(vc2, haplotypes2);
    tests.add(new Object[] { Arrays.asList(vc2), new HashMap<>(haplotypeMap), 2, 0, 0, 0, 0 });
    // test no phased variants #2
    final Set<Haplotype> haplotypes3 = new HashSet<>();
    haplotypes3.add(pos3);
    haplotypeMap.put(vc3, haplotypes3);
    tests.add(new Object[] { Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0 });
    // test opposite phase
    tests.add(new Object[] { Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 2, 2, 1, 1, 1 });
    // test no phased variants #3
    final Set<Haplotype> haplotypes4 = new HashSet<>();
    haplotypes4.add(pos4);
    haplotypeMap.put(vc4, haplotypes4);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0 });
    // test mixture
    final Set<Haplotype> haplotypes24 = new HashSet<>();
    haplotypes24.add(pos24);
    haplotypeMap.put(vc2, haplotypes24);
    haplotypeMap.put(vc4, haplotypes24);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 1 });
    // test 2 hets
    haplotypeMap.remove(vc3);
    tests.add(new Object[] { Arrays.asList(vc2, vc4), new HashMap<>(haplotypeMap), 1, 2, 1, 2, 0 });
    // test 2 with opposite phase
    final Set<Haplotype> haplotypes1 = new HashSet<>();
    haplotypes1.add(pos1);
    haplotypeMap.put(vc1, haplotypes1);
    tests.add(new Object[] { Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2 });
    // test homs around a het
    final Set<Haplotype> haplotypes2hom = new HashSet<>();
    haplotypes2hom.add(pos24);
    haplotypes2hom.add(pos234);
    final Set<Haplotype> haplotypes4hom = new HashSet<>();
    haplotypes4hom.add(pos24);
    haplotypes4hom.add(pos234);
    final Set<Haplotype> haplotypes3het = new HashSet<>();
    haplotypes3het.add(pos234);
    haplotypeMap.put(vc2, haplotypes2hom);
    haplotypeMap.put(vc3, haplotypes3het);
    haplotypeMap.put(vc4, haplotypes4hom);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0 });
    // test hets around a hom
    final Set<Haplotype> haplotypes2het = new HashSet<>();
    haplotypes2het.add(pos234);
    final Set<Haplotype> haplotypes4het = new HashSet<>();
    haplotypes4het.add(pos234);
    final Set<Haplotype> haplotypes3hom = new HashSet<>();
    haplotypes3hom.add(pos3);
    haplotypes3hom.add(pos234);
    haplotypeMap.put(vc2, haplotypes2het);
    haplotypeMap.put(vc3, haplotypes3hom);
    haplotypeMap.put(vc4, haplotypes4het);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0 });
    // test no phased variants around a hom
    final Set<Haplotype> haplotypes2incomplete = new HashSet<>();
    haplotypes2incomplete.add(pos24);
    final Set<Haplotype> haplotypes3incomplete = new HashSet<>();
    haplotypes3incomplete.add(pos34);
    final Set<Haplotype> haplotypes4complete = new HashSet<>();
    haplotypes4complete.add(pos24);
    haplotypes4complete.add(pos34);
    haplotypes4complete.add(pos234);
    haplotypeMap.put(vc2, haplotypes2incomplete);
    haplotypeMap.put(vc3, haplotypes3incomplete);
    haplotypeMap.put(vc4, haplotypes4complete);
    tests.add(new Object[] { calls, new HashMap<>(haplotypeMap), 0, 0, 0, 0, 0 });
    return tests.toArray(new Object[][] {});
}
Also used : EventMap(org.broadinstitute.hellbender.utils.haplotype.EventMap) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) DataProvider(org.testng.annotations.DataProvider)

Example 37 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class HaplotypeCallerGenotypingEngineUnitTest method makeCreateHaplotypeMappingData.

@DataProvider(name = "CreateHaplotypeMappingProvider")
public Object[][] makeCreateHaplotypeMappingData() {
    List<Object[]> tests = new ArrayList<Object[]>();
    final Set<Haplotype> haplotypes = new HashSet<>();
    final Allele ref = Allele.create("A", true);
    final Allele altC = Allele.create("C", false);
    final Allele altT = Allele.create("T", false);
    final Haplotype AtoC1 = new Haplotype("AACAA".getBytes());
    final VariantContext vc1 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altC)).make();
    AtoC1.setEventMap(new EventMap(Arrays.asList(vc1)));
    AtoC1.getEventMap().put(3, vc1);
    haplotypes.add(AtoC1);
    final Haplotype AtoC2 = new Haplotype("AAACA".getBytes());
    final VariantContext vc2 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altT)).make();
    AtoC2.setEventMap(new EventMap(Arrays.asList(vc2)));
    AtoC2.getEventMap().put(4, vc2);
    haplotypes.add(AtoC2);
    tests.add(new Object[] { vc1, haplotypes, AtoC1 });
    tests.add(new Object[] { vc2, haplotypes, AtoC2 });
    tests.add(new Object[] { new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altT)).make(), haplotypes, null });
    return tests.toArray(new Object[][] {});
}
Also used : Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) EventMap(org.broadinstitute.hellbender.utils.haplotype.EventMap) DataProvider(org.testng.annotations.DataProvider)

Example 38 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.

the class AlignmentUtilsUnitTest method makeComplexReadAlignedToRef.

@DataProvider(name = "ComplexReadAlignedToRef")
public Object[][] makeComplexReadAlignedToRef() {
    List<Object[]> tests = new ArrayList<>();
    final List<Mutation> allMutations = Arrays.asList(new Mutation(1, 1, CigarOperator.M), new Mutation(2, 1, CigarOperator.M), new Mutation(3, 1, CigarOperator.I), new Mutation(7, 1, CigarOperator.D));
    int i = 0;
    final String referenceBases = "ACTGACTGACTG";
    final String paddedReference = "NNNN" + referenceBases + "NNNN";
    for (final List<Mutation> mutations : Utils.makePermutations(allMutations, 3, false)) {
        final MutatedSequence hap = mutateSequence(referenceBases, mutations);
        final Haplotype haplotype = new Haplotype(hap.seq.getBytes());
        final SWPairwiseAlignment align = new SWPairwiseAlignment(paddedReference.getBytes(), hap.seq.getBytes());
        haplotype.setAlignmentStartHapwrtRef(align.getAlignmentStart2wrt1());
        haplotype.setCigar(align.getCigar());
        for (final List<Mutation> readMutations : Utils.makePermutations(allMutations, 3, false)) {
            final MutatedSequence readBases = mutateSequence(hap.seq, readMutations);
            final GATKRead read = makeReadForAlignedToRefTest(readBases.seq);
            tests.add(new Object[] { i++, read, paddedReference, haplotype, hap.numMismatches + readBases.numMismatches });
        }
    }
    return tests.toArray(new Object[][] {});
}
Also used : SWPairwiseAlignment(org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) DataProvider(org.testng.annotations.DataProvider)

Example 39 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class ReadThreadingAssembler method runLocalAssembly.

/**
     * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
     * @param assemblyRegion              AssemblyRegion object holding the reads which are to be used during assembly
     * @param refHaplotype              reference haplotype object
     * @param fullReferenceWithPadding  byte array holding the reference sequence with padding
     * @param refLoc                    GenomeLoc object corresponding to the reference sequence with padding
     * @param givenAlleles   the alleles to inject into the haplotypes during GGA mode
     * @param readErrorCorrector        a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
     * @return                          the resulting assembly-result-set
     */
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
    Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
    Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
    Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
    Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
    Utils.nonNull(refLoc, "refLoc");
    Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
    ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
    // create the list of artificial haplotypes that should be added to the graph for GGA mode
    final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
    // error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
    final List<GATKRead> correctedReads;
    if (readErrorCorrector != null) {
        // now correct all reads in active region after filtering/downsampling
        // Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
        // and we only want the read-error corrected reads for graph building.
        readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
        correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
    } else {
        correctedReads = assemblyRegion.getReads();
    }
    final List<SeqGraph> nonRefGraphs = new LinkedList<>();
    final AssemblyResultSet resultSet = new AssemblyResultSet();
    resultSet.setRegionForGenotyping(assemblyRegion);
    resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
    resultSet.setPaddedReferenceLoc(refLoc);
    final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
    refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
    resultSet.add(refHaplotype);
    final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
    // create the graphs by calling our subclass assemble method
    for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
        if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
            // do some QC on the graph
            sanityCheckGraph(result.getGraph(), refHaplotype);
            // add it to graphs with meaningful non-reference features
            assemblyResultByGraph.put(result.getGraph(), result);
            nonRefGraphs.add(result.getGraph());
        }
    }
    findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
    // print the graphs if the appropriate debug option has been turned on
    if (graphOutputPath != null) {
        printGraphs(nonRefGraphs);
    }
    return resultSet;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) AssemblyResult(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 40 with Haplotype

use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.

the class ReadThreadingAssembler method findBestPaths.

private List<Haplotype> findBestPaths(final Collection<SeqGraph> graphs, final Haplotype refHaplotype, final SimpleInterval refLoc, final SimpleInterval activeRegionWindow, final Map<SeqGraph, AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
    // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
    final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
    final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
    final Collection<KBestHaplotypeFinder> finders = new ArrayList<>(graphs.size());
    int failedCigars = 0;
    for (final SeqGraph graph : graphs) {
        final SeqVertex source = graph.getReferenceSourceVertex();
        final SeqVertex sink = graph.getReferenceSinkVertex();
        Utils.validateArg(source != null && sink != null, () -> "Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph " + graph);
        final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph, source, sink);
        finders.add(haplotypeFinder);
        final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
        while (bestHaplotypes.hasNext()) {
            final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
            final Haplotype h = kBestHaplotype.haplotype();
            if (!returnHaplotypes.contains(h)) {
                final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), h.getBases());
                if (cigar == null) {
                    // couldn't produce a meaningful alignment of haplotype to reference, fail quietly
                    failedCigars++;
                    continue;
                } else if (cigar.isEmpty()) {
                    throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
                } else if (pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < MIN_HAPLOTYPE_REFERENCE_LENGTH) {
                    // N cigar elements means that a bubble was too divergent from the reference so skip over this path
                    continue;
                } else if (cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength()) {
                    // SW failure
                    throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength() + " ref = " + refHaplotype + " path " + new String(h.getBases()));
                }
                h.setCigar(cigar);
                h.setAlignmentStartHapwrtRef(activeRegionStart);
                h.setGenomeLocation(activeRegionWindow);
                returnHaplotypes.add(h);
                assemblyResultSet.add(h, assemblyResultByGraph.get(graph));
                if (debug) {
                    logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize());
                }
            }
        }
    }
    // the first returned by any finder.
    if (!returnHaplotypes.contains(refHaplotype)) {
        double refScore = Double.NaN;
        for (final KBestHaplotypeFinder finder : finders) {
            final double candidate = finder.score(refHaplotype);
            if (Double.isNaN(candidate)) {
                continue;
            }
            refScore = candidate;
            break;
        }
        refHaplotype.setScore(refScore);
        returnHaplotypes.add(refHaplotype);
    }
    if (failedCigars != 0) {
        logger.debug(String.format("failed to align some haplotypes (%d) back to the reference (loc=%s); these will be ignored.", failedCigars, refLoc.toString()));
    }
    if (debug) {
        if (returnHaplotypes.size() > 1) {
            logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
        } else {
            logger.info("Found only the reference haplotype in the assembly graph.");
        }
        for (final Haplotype h : returnHaplotypes) {
            logger.info(h.toString());
            logger.info("> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() + " ref " + h.isReference());
        }
    }
    return new ArrayList<>(returnHaplotypes);
}
Also used : Cigar(htsjdk.samtools.Cigar) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Aggregations

Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)88 Test (org.testng.annotations.Test)31 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)28 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)28 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)12 DataProvider (org.testng.annotations.DataProvider)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)9 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)8 ArrayList (java.util.ArrayList)7 Cigar (htsjdk.samtools.Cigar)6 AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)6 HomogeneousPloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel)6 IndependentSampleGenotypesModel (org.broadinstitute.hellbender.tools.walkers.genotyper.IndependentSampleGenotypesModel)6 PloidyModel (org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel)6 ReadThreadingGraph (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph)6 SampleList (org.broadinstitute.hellbender.utils.genotyper.SampleList)6 EventMap (org.broadinstitute.hellbender.utils.haplotype.EventMap)6 CigarElement (htsjdk.samtools.CigarElement)4 Allele (htsjdk.variant.variantcontext.Allele)4 File (java.io.File)4