use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk-protected by broadinstitute.
the class ReadThreadingAssemblerUnitTest method assemble.
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
final Haplotype refHaplotype = new Haplotype(refBases, true);
final Cigar c = new Cigar();
c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
refHaplotype.setCigar(c);
final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
activeRegion.addAll(reads);
// logger.warn("Assembling " + activeRegion + " with " + engine);
final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
return assemblyResultSet.getHaplotypeList();
}
use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk-protected by broadinstitute.
the class ReadThreadingAssembler method runLocalAssembly.
/**
* Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
* @param assemblyRegion AssemblyRegion object holding the reads which are to be used during assembly
* @param refHaplotype reference haplotype object
* @param fullReferenceWithPadding byte array holding the reference sequence with padding
* @param refLoc GenomeLoc object corresponding to the reference sequence with padding
* @param givenAlleles the alleles to inject into the haplotypes during GGA mode
* @param readErrorCorrector a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
* @return the resulting assembly-result-set
*/
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
Utils.nonNull(refLoc, "refLoc");
Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
// create the list of artificial haplotypes that should be added to the graph for GGA mode
final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
// error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
final List<GATKRead> correctedReads;
if (readErrorCorrector != null) {
// now correct all reads in active region after filtering/downsampling
// Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
// and we only want the read-error corrected reads for graph building.
readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
} else {
correctedReads = assemblyRegion.getReads();
}
final List<SeqGraph> nonRefGraphs = new LinkedList<>();
final AssemblyResultSet resultSet = new AssemblyResultSet();
resultSet.setRegionForGenotyping(assemblyRegion);
resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
resultSet.setPaddedReferenceLoc(refLoc);
final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
resultSet.add(refHaplotype);
final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
// create the graphs by calling our subclass assemble method
for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
// do some QC on the graph
sanityCheckGraph(result.getGraph(), refHaplotype);
// add it to graphs with meaningful non-reference features
assemblyResultByGraph.put(result.getGraph(), result);
nonRefGraphs.add(result.getGraph());
}
}
findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
// print the graphs if the appropriate debug option has been turned on
if (graphOutputPath != null) {
printGraphs(nonRefGraphs);
}
return resultSet;
}
use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk by broadinstitute.
the class ReadThreadingAssembler method runLocalAssembly.
/**
* Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
* @param assemblyRegion AssemblyRegion object holding the reads which are to be used during assembly
* @param refHaplotype reference haplotype object
* @param fullReferenceWithPadding byte array holding the reference sequence with padding
* @param refLoc GenomeLoc object corresponding to the reference sequence with padding
* @param givenAlleles the alleles to inject into the haplotypes during GGA mode
* @param readErrorCorrector a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
* @return the resulting assembly-result-set
*/
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
Utils.nonNull(refLoc, "refLoc");
Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
// create the list of artificial haplotypes that should be added to the graph for GGA mode
final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
// error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
final List<GATKRead> correctedReads;
if (readErrorCorrector != null) {
// now correct all reads in active region after filtering/downsampling
// Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
// and we only want the read-error corrected reads for graph building.
readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
} else {
correctedReads = assemblyRegion.getReads();
}
final List<SeqGraph> nonRefGraphs = new LinkedList<>();
final AssemblyResultSet resultSet = new AssemblyResultSet();
resultSet.setRegionForGenotyping(assemblyRegion);
resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
resultSet.setPaddedReferenceLoc(refLoc);
final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
resultSet.add(refHaplotype);
final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
// create the graphs by calling our subclass assemble method
for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
// do some QC on the graph
sanityCheckGraph(result.getGraph(), refHaplotype);
// add it to graphs with meaningful non-reference features
assemblyResultByGraph.put(result.getGraph(), result);
nonRefGraphs.add(result.getGraph());
}
}
findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
// print the graphs if the appropriate debug option has been turned on
if (graphOutputPath != null) {
printGraphs(nonRefGraphs);
}
return resultSet;
}
use of org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet in project gatk by broadinstitute.
the class ReadThreadingAssemblerUnitTest method assemble.
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
final Haplotype refHaplotype = new Haplotype(refBases, true);
final Cigar c = new Cigar();
c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
refHaplotype.setCigar(c);
final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
activeRegion.addAll(reads);
// logger.warn("Assembling " + activeRegion + " with " + engine);
final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
return assemblyResultSet.getHaplotypeList();
}
Aggregations