Search in sources :

Example 6 with AssemblyResult

use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult in project gatk by broadinstitute.

the class ReadThreadingAssembler method runLocalAssembly.

/**
     * Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
     * @param assemblyRegion              AssemblyRegion object holding the reads which are to be used during assembly
     * @param refHaplotype              reference haplotype object
     * @param fullReferenceWithPadding  byte array holding the reference sequence with padding
     * @param refLoc                    GenomeLoc object corresponding to the reference sequence with padding
     * @param givenAlleles   the alleles to inject into the haplotypes during GGA mode
     * @param readErrorCorrector        a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
     * @return                          the resulting assembly-result-set
     */
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
    Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
    Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
    Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
    Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
    Utils.nonNull(refLoc, "refLoc");
    Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
    ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
    // create the list of artificial haplotypes that should be added to the graph for GGA mode
    final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
    // error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
    final List<GATKRead> correctedReads;
    if (readErrorCorrector != null) {
        // now correct all reads in active region after filtering/downsampling
        // Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
        // and we only want the read-error corrected reads for graph building.
        readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
        correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
    } else {
        correctedReads = assemblyRegion.getReads();
    }
    final List<SeqGraph> nonRefGraphs = new LinkedList<>();
    final AssemblyResultSet resultSet = new AssemblyResultSet();
    resultSet.setRegionForGenotyping(assemblyRegion);
    resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
    resultSet.setPaddedReferenceLoc(refLoc);
    final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
    refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
    resultSet.add(refHaplotype);
    final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
    // create the graphs by calling our subclass assemble method
    for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
        if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
            // do some QC on the graph
            sanityCheckGraph(result.getGraph(), refHaplotype);
            // add it to graphs with meaningful non-reference features
            assemblyResultByGraph.put(result.getGraph(), result);
            nonRefGraphs.add(result.getGraph());
        }
    }
    findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
    // print the graphs if the appropriate debug option has been turned on
    if (graphOutputPath != null) {
        printGraphs(nonRefGraphs);
    }
    return resultSet;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) AssemblyResult(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype)

Aggregations

AssemblyResult (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResult)6 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)4 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)4 File (java.io.File)2 AssemblyResultSet (org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet)2 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)2