Search in sources :

Example 1 with AssemblyResultSet

use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk-protected by broadinstitute.

the class ReadThreadingAssemblerUnitTest method assemble.

private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
    final Haplotype refHaplotype = new Haplotype(refBases, true);
    final Cigar c = new Cigar();
    c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
    refHaplotype.setCigar(c);
    final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
    activeRegion.addAll(reads);
    //        logger.warn("Assembling " + activeRegion + " with " + engine);
    final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
    return assemblyResultSet.getHaplotypeList();
}
Also used : Cigar(htsjdk.samtools.Cigar) AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) KBestHaplotype(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype) CigarElement(htsjdk.samtools.CigarElement)

Example 2 with AssemblyResultSet

use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk-protected by broadinstitute.

the class ReadThreadingAssembler method runLocalAssembly.

/**
     * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
     * @param assemblyRegion              AssemblyRegion object holding the reads which are to be used during assembly
     * @param refHaplotype              reference haplotype object
     * @param fullReferenceWithPadding  byte array holding the reference sequence with padding
     * @param refLoc                    GenomeLoc object corresponding to the reference sequence with padding
     * @param givenAlleles   the alleles to inject into the haplotypes during GGA mode
     * @param readErrorCorrector        a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
     * @return                          the resulting assembly-result-set
     */
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
    Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
    Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
    Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
    Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
    Utils.nonNull(refLoc, "refLoc");
    Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
    ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
    // create the list of artificial haplotypes that should be added to the graph for GGA mode
    final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
    // error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
    final List<GATKRead> correctedReads;
    if (readErrorCorrector != null) {
        // now correct all reads in active region after filtering/downsampling
        // Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
        // and we only want the read-error corrected reads for graph building.
        readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
        correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
    } else {
        correctedReads = assemblyRegion.getReads();
    }
    final List<SeqGraph> nonRefGraphs = new LinkedList<>();
    final AssemblyResultSet resultSet = new AssemblyResultSet();
    resultSet.setRegionForGenotyping(assemblyRegion);
    resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
    resultSet.setPaddedReferenceLoc(refLoc);
    final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
    refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
    resultSet.add(refHaplotype);
    final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
    // create the graphs by calling our subclass assemble method
    for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
        if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
            // do some QC on the graph
            sanityCheckGraph(result.getGraph(), refHaplotype);
            // add it to graphs with meaningful non-reference features
            assemblyResultByGraph.put(result.getGraph(), result);
            nonRefGraphs.add(result.getGraph());
        }
    }
    findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
    // print the graphs if the appropriate debug option has been turned on
    if (graphOutputPath != null) {
        printGraphs(nonRefGraphs);
    }
    return resultSet;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) AssemblyResult(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 3 with AssemblyResultSet

use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk by broadinstitute.

the class ReadThreadingAssembler method runLocalAssembly.

/**
     * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
     * @param assemblyRegion              AssemblyRegion object holding the reads which are to be used during assembly
     * @param refHaplotype              reference haplotype object
     * @param fullReferenceWithPadding  byte array holding the reference sequence with padding
     * @param refLoc                    GenomeLoc object corresponding to the reference sequence with padding
     * @param givenAlleles   the alleles to inject into the haplotypes during GGA mode
     * @param readErrorCorrector        a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
     * @return                          the resulting assembly-result-set
     */
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
    Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
    Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
    Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
    Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
    Utils.nonNull(refLoc, "refLoc");
    Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
    ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
    // create the list of artificial haplotypes that should be added to the graph for GGA mode
    final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
    // error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
    final List<GATKRead> correctedReads;
    if (readErrorCorrector != null) {
        // now correct all reads in active region after filtering/downsampling
        // Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
        // and we only want the read-error corrected reads for graph building.
        readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
        correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
    } else {
        correctedReads = assemblyRegion.getReads();
    }
    final List<SeqGraph> nonRefGraphs = new LinkedList<>();
    final AssemblyResultSet resultSet = new AssemblyResultSet();
    resultSet.setRegionForGenotyping(assemblyRegion);
    resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
    resultSet.setPaddedReferenceLoc(refLoc);
    final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
    refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
    resultSet.add(refHaplotype);
    final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
    // create the graphs by calling our subclass assemble method
    for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
        if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
            // do some QC on the graph
            sanityCheckGraph(result.getGraph(), refHaplotype);
            // add it to graphs with meaningful non-reference features
            assemblyResultByGraph.put(result.getGraph(), result);
            nonRefGraphs.add(result.getGraph());
        }
    }
    findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
    // print the graphs if the appropriate debug option has been turned on
    if (graphOutputPath != null) {
        printGraphs(nonRefGraphs);
    }
    return resultSet;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) AssemblyResult(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Example 4 with AssemblyResultSet

use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk by broadinstitute.

the class ReadThreadingAssemblerUnitTest method assemble.

private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
    final Haplotype refHaplotype = new Haplotype(refBases, true);
    final Cigar c = new Cigar();
    c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
    refHaplotype.setCigar(c);
    final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
    activeRegion.addAll(reads);
    //        logger.warn("Assembling " + activeRegion + " with " + engine);
    final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
    return assemblyResultSet.getHaplotypeList();
}
Also used : Cigar(htsjdk.samtools.Cigar) AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) KBestHaplotype(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype) CigarElement(htsjdk.samtools.CigarElement)

Aggregations

AssemblyResultSet (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet)4 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)4 Cigar (htsjdk.samtools.Cigar)2 CigarElement (htsjdk.samtools.CigarElement)2 AssemblyRegion (org.broadinstitute.hellbender.engine.AssemblyRegion)2 AssemblyResult (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult)2 KBestHaplotype (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype)2 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)2 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)2