use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class ReadThreadingAssembler method composeGivenHaplotypes.
/**
* Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype
*
* @param refHaplotype the reference haplotype
* @param givenHaplotypes the list of alternate alleles in VariantContexts
* @param activeRegionWindow the window containing the reference haplotype
*
* @return a non-null list of haplotypes
*/
private static List<Haplotype> composeGivenHaplotypes(final Haplotype refHaplotype, final List<VariantContext> givenHaplotypes, final SimpleInterval activeRegionWindow) {
Utils.nonNull(refHaplotype, "the reference haplotype cannot be null");
Utils.nonNull(givenHaplotypes, "given haplotypes cannot be null");
Utils.nonNull(activeRegionWindow, "active region window cannot be null");
Utils.validateArg(activeRegionWindow.size() == refHaplotype.length(), "inconsistent reference haplotype and active region window");
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
for (final VariantContext compVC : givenHaplotypes) {
Utils.validateArg(GATKVariantContextUtils.overlapsRegion(compVC, activeRegionWindow), " some variant provided does not overlap with active region window");
for (final Allele compAltAllele : compVC.getAlternateAlleles()) {
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
if (insertedRefHaplotype != null) {
// can be null if the requested allele can't be inserted into the haplotype
returnHaplotypes.add(insertedRefHaplotype);
}
}
}
return new ArrayList<>(returnHaplotypes);
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class HaplotypeCallerGenotypingEngine method cleanUpSymbolicUnassembledEvents.
/**
* Removes symbolic events from list of haplotypes
* @param haplotypes Input/output list of haplotypes, before/after removal
*/
// TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
protected static void cleanUpSymbolicUnassembledEvents(final List<Haplotype> haplotypes) {
Utils.nonNull(haplotypes);
final List<Haplotype> haplotypesToRemove = new ArrayList<>();
for (final Haplotype h : haplotypes) {
for (final VariantContext vc : h.getEventMap().getVariantContexts()) {
if (vc.isSymbolic()) {
for (final Haplotype h2 : haplotypes) {
for (final VariantContext vc2 : h2.getEventMap().getVariantContexts()) {
if (vc.getStart() == vc2.getStart() && (vc2.isIndel() || vc2.isMNP())) {
// unfortunately symbolic alleles can't currently be combined with non-point events
haplotypesToRemove.add(h);
break;
}
}
}
}
}
}
haplotypes.removeAll(haplotypesToRemove);
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class ReadThreadingAssembler method runLocalAssembly.
/**
* Main entry point into the assembly engine. Build a set of deBruijn graphs out of the provided reference sequence and list of reads
* @param assemblyRegion AssemblyRegion object holding the reads which are to be used during assembly
* @param refHaplotype reference haplotype object
* @param fullReferenceWithPadding byte array holding the reference sequence with padding
* @param refLoc GenomeLoc object corresponding to the reference sequence with padding
* @param givenAlleles the alleles to inject into the haplotypes during GGA mode
* @param readErrorCorrector a ReadErrorCorrector object, if read are to be corrected before assembly. Can be null if no error corrector is to be used.
* @return the resulting assembly-result-set
*/
public AssemblyResultSet runLocalAssembly(final AssemblyRegion assemblyRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final SimpleInterval refLoc, final List<VariantContext> givenAlleles, final ReadErrorCorrector readErrorCorrector, final SAMFileHeader header) {
Utils.nonNull(assemblyRegion, "Assembly engine cannot be used with a null AssemblyRegion.");
Utils.nonNull(assemblyRegion.getExtendedSpan(), "Active region must have an extended location.");
Utils.nonNull(refHaplotype, "Reference haplotype cannot be null.");
Utils.nonNull(fullReferenceWithPadding, "fullReferenceWithPadding");
Utils.nonNull(refLoc, "refLoc");
Utils.validateArg(fullReferenceWithPadding.length == refLoc.size(), "Reference bases and reference loc must be the same size.");
ParamUtils.isPositiveOrZero(pruneFactor, "Pruning factor cannot be negative");
// create the list of artificial haplotypes that should be added to the graph for GGA mode
final List<Haplotype> givenHaplotypes = composeGivenHaplotypes(refHaplotype, givenAlleles, assemblyRegion.getExtendedSpan());
// error-correct reads before clipping low-quality tails: some low quality bases might be good and we want to recover them
final List<GATKRead> correctedReads;
if (readErrorCorrector != null) {
// now correct all reads in active region after filtering/downsampling
// Note that original reads in active region are NOT modified by default, since they will be used later for GL computation,
// and we only want the read-error corrected reads for graph building.
readErrorCorrector.addReadsToKmers(assemblyRegion.getReads());
correctedReads = new ArrayList<>(readErrorCorrector.correctReads(assemblyRegion.getReads()));
} else {
correctedReads = assemblyRegion.getReads();
}
final List<SeqGraph> nonRefGraphs = new LinkedList<>();
final AssemblyResultSet resultSet = new AssemblyResultSet();
resultSet.setRegionForGenotyping(assemblyRegion);
resultSet.setFullReferenceWithPadding(fullReferenceWithPadding);
resultSet.setPaddedReferenceLoc(refLoc);
final SimpleInterval activeRegionExtendedLocation = assemblyRegion.getExtendedSpan();
refHaplotype.setGenomeLocation(activeRegionExtendedLocation);
resultSet.add(refHaplotype);
final Map<SeqGraph, AssemblyResult> assemblyResultByGraph = new HashMap<>();
// create the graphs by calling our subclass assemble method
for (final AssemblyResult result : assemble(correctedReads, refHaplotype, givenHaplotypes, header)) {
if (result.getStatus() == AssemblyResult.Status.ASSEMBLED_SOME_VARIATION) {
// do some QC on the graph
sanityCheckGraph(result.getGraph(), refHaplotype);
// add it to graphs with meaningful non-reference features
assemblyResultByGraph.put(result.getGraph(), result);
nonRefGraphs.add(result.getGraph());
}
}
findBestPaths(nonRefGraphs, refHaplotype, refLoc, activeRegionExtendedLocation, assemblyResultByGraph, resultSet);
// print the graphs if the appropriate debug option has been turned on
if (graphOutputPath != null) {
printGraphs(nonRefGraphs);
}
return resultSet;
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class AlignmentUtilsUnitTest method makeReadAlignedToRefData.
@DataProvider(name = "ReadAlignedToRefData")
public Object[][] makeReadAlignedToRefData() {
List<Object[]> tests = new ArrayList<>();
final String hapBases = "ACTGAAGGTTCC";
final Haplotype allM = makeHaplotypeForAlignedToRefTest(hapBases, hapBases.length() + "M");
// make sure we get back a cigar of the right length
for (int i = -1; i < hapBases.length(); i++) {
final GATKRead read = makeReadForAlignedToRefTest(hapBases);
final byte[] bases = read.getBases();
if (i != -1) {
bases[i] = (byte) 'A';
}
read.setBases(bases);
tests.add(new Object[] { read, allM, 10, 10, allM.getCigar().toString() });
}
// make sure insertions at the front are correctly handled
for (int padFront = 1; padFront < 10; padFront++) {
final GATKRead read = makeReadForAlignedToRefTest(Utils.dupString("N", padFront) + hapBases);
tests.add(new Object[] { read, allM, 10, 10, padFront + "I" + allM.getCigar().toString() });
}
// make sure insertions at the back are correctly handled
for (int padBack = 1; padBack < 10; padBack++) {
final GATKRead read = makeReadForAlignedToRefTest(hapBases + Utils.dupString("N", padBack));
tests.add(new Object[] { read, allM, 10, 10, allM.getCigar().toString() + padBack + "I" });
}
// make sure refStart and hapStart are respected
for (int refStart = 1; refStart < 10; refStart++) {
for (int hapStart = refStart; hapStart < 10 + refStart; hapStart++) {
final Haplotype hap = new Haplotype(allM.getBases());
hap.setCigar(allM.getCigar());
hap.setAlignmentStartHapwrtRef(hapStart);
final GATKRead read = makeReadForAlignedToRefTest(new String(hap.getBases()));
tests.add(new Object[] { read, hap, refStart, refStart + hapStart, allM.getCigar().toString() });
}
}
// example case of bad alignment because SW doesn't necessarily left-align indels
{
final String hap = "ACTGTGGGTTCCTCTTATTTTATTTCTACATCAATGTTCATATTTAACTTATTATTTTATCTTATTTTTAAATTTCTTTTATGTTGAGCCTTGATGAAAGCCATAGGTTCTCTCATATAATTGTATGTGTATGTATGTATATGTACATAATATATACATATATGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTGTATTACATAATATATACATATATGTATATATTATGTATATGTACATAATATATACATATATG";
final String hapCigar = "399M";
final String readBases = "ATGTACATAATATATACATATATGTATATGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTACATAATATATACGTATATGTATGTGTATGTGTATTACATAATATATACATATATGTATATATTATGTATATGTACATAATAT";
final GATKRead read = makeReadForAlignedToRefTest(readBases);
final int refStart = 10130100;
final int hapStart = 500;
final String badCigar = "31M6D211M";
final String goodCigar = "28M6D214M";
final Haplotype badHap = new Haplotype(hap.getBytes());
badHap.setCigar(TextCigarCodec.decode(hapCigar));
badHap.setAlignmentStartHapwrtRef(hapStart);
final int expectedPos = 10130740;
tests.add(new Object[] { read, badHap, refStart, expectedPos, goodCigar });
}
return tests.toArray(new Object[][] {});
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class AlignmentUtilsUnitTest method makeHaplotypeForAlignedToRefTest.
/******************************************************
* Tests for AlignmentUtils.createReadAlignedToRef()
******************************************************/
private Haplotype makeHaplotypeForAlignedToRefTest(final String bases, final String cigar) {
final Haplotype hap = new Haplotype(bases.getBytes());
hap.setCigar(TextCigarCodec.decode(cigar));
return hap;
}
Aggregations