use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class ReadThreadingAssembler method createGraph.
/**
* Creates the sequence graph for the given kmerSize
*
* @param reads reads to use
* @param refHaplotype reference haplotype
* @param kmerSize kmer size
* @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph
* @param allowLowComplexityGraphs if true, do not check for low-complexity graphs
* @param allowNonUniqueKmersInRef if true, do not fail if the reference has non-unique kmers
* @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity)
*/
private AssemblyResult createGraph(final Iterable<GATKRead> reads, final Haplotype refHaplotype, final int kmerSize, final Iterable<Haplotype> activeAlleleHaplotypes, final boolean allowLowComplexityGraphs, final boolean allowNonUniqueKmersInRef, final SAMFileHeader header) {
if (refHaplotype.length() < kmerSize) {
// happens in cases where the assembled region is just too small
return new AssemblyResult(AssemblyResult.Status.FAILED, null, null);
}
if (!allowNonUniqueKmersInRef && !ReadThreadingGraph.determineNonUniqueKmers(new ReadThreadingGraph.SequenceForKmers("ref", refHaplotype.getBases(), 0, refHaplotype.getBases().length, 1, true), kmerSize).isEmpty()) {
if (debug) {
logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because reference contains non-unique kmers");
}
return null;
}
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, MIN_BASE_QUALITY_TO_USE_IN_ASSEMBLY, numPruningSamples);
rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingBranches);
// add the reference sequence to the graph
rtgraph.addSequence("ref", refHaplotype.getBases(), true);
// add the artificial GGA haplotypes to the graph
int hapCount = 0;
for (final Haplotype h : activeAlleleHaplotypes) {
rtgraph.addSequence("activeAllele" + hapCount++, h.getBases(), GGA_MODE_ARTIFICIAL_COUNTS, false);
}
// Next pull kmers out of every read and throw them on the graph
for (final GATKRead read : reads) {
rtgraph.addRead(read, header);
}
// actually build the read threading graph
rtgraph.buildGraphIfNecessary();
// sanity check: make sure there are no cycles in the graph
if (rtgraph.hasCycles()) {
if (debug) {
logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it contains a cycle");
}
return null;
}
// sanity check: make sure the graph had enough complexity with the given kmer
if (!allowLowComplexityGraphs && rtgraph.isLowComplexity()) {
if (debug) {
logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because it does not produce a graph with enough complexity");
}
return null;
}
return getAssemblyResult(refHaplotype, kmerSize, rtgraph);
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class ReadThreadingAssembler method composeGivenHaplotypes.
/**
* Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype
*
* @param refHaplotype the reference haplotype
* @param givenHaplotypes the list of alternate alleles in VariantContexts
* @param activeRegionWindow the window containing the reference haplotype
*
* @return a non-null list of haplotypes
*/
private static List<Haplotype> composeGivenHaplotypes(final Haplotype refHaplotype, final List<VariantContext> givenHaplotypes, final SimpleInterval activeRegionWindow) {
Utils.nonNull(refHaplotype, "the reference haplotype cannot be null");
Utils.nonNull(givenHaplotypes, "given haplotypes cannot be null");
Utils.nonNull(activeRegionWindow, "active region window cannot be null");
Utils.validateArg(activeRegionWindow.size() == refHaplotype.length(), "inconsistent reference haplotype and active region window");
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
for (final VariantContext compVC : givenHaplotypes) {
Utils.validateArg(GATKVariantContextUtils.overlapsRegion(compVC, activeRegionWindow), " some variant provided does not overlap with active region window");
for (final Allele compAltAllele : compVC.getAlternateAlleles()) {
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
if (insertedRefHaplotype != null) {
// can be null if the requested allele can't be inserted into the haplotype
returnHaplotypes.add(insertedRefHaplotype);
}
}
}
return new ArrayList<>(returnHaplotypes);
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class HaplotypeCallerGenotypingEngine method cleanUpSymbolicUnassembledEvents.
/**
* Removes symbolic events from list of haplotypes
* @param haplotypes Input/output list of haplotypes, before/after removal
*/
// TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
protected static void cleanUpSymbolicUnassembledEvents(final List<Haplotype> haplotypes) {
Utils.nonNull(haplotypes);
final List<Haplotype> haplotypesToRemove = new ArrayList<>();
for (final Haplotype h : haplotypes) {
for (final VariantContext vc : h.getEventMap().getVariantContexts()) {
if (vc.isSymbolic()) {
for (final Haplotype h2 : haplotypes) {
for (final VariantContext vc2 : h2.getEventMap().getVariantContexts()) {
if (vc.getStart() == vc2.getStart() && (vc2.isIndel() || vc2.isMNP())) {
// unfortunately symbolic alleles can't currently be combined with non-point events
haplotypesToRemove.add(h);
break;
}
}
}
}
}
}
haplotypes.removeAll(haplotypesToRemove);
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk by broadinstitute.
the class ReadThreadingAssembler method findBestPaths.
private List<Haplotype> findBestPaths(final Collection<SeqGraph> graphs, final Haplotype refHaplotype, final SimpleInterval refLoc, final SimpleInterval activeRegionWindow, final Map<SeqGraph, AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
final Collection<KBestHaplotypeFinder> finders = new ArrayList<>(graphs.size());
int failedCigars = 0;
for (final SeqGraph graph : graphs) {
final SeqVertex source = graph.getReferenceSourceVertex();
final SeqVertex sink = graph.getReferenceSinkVertex();
Utils.validateArg(source != null && sink != null, () -> "Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph " + graph);
final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph, source, sink);
finders.add(haplotypeFinder);
final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
while (bestHaplotypes.hasNext()) {
final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
final Haplotype h = kBestHaplotype.haplotype();
if (!returnHaplotypes.contains(h)) {
final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), h.getBases());
if (cigar == null) {
// couldn't produce a meaningful alignment of haplotype to reference, fail quietly
failedCigars++;
continue;
} else if (cigar.isEmpty()) {
throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength());
} else if (pathIsTooDivergentFromReference(cigar) || cigar.getReferenceLength() < MIN_HAPLOTYPE_REFERENCE_LENGTH) {
// N cigar elements means that a bubble was too divergent from the reference so skip over this path
continue;
} else if (cigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength()) {
// SW failure
throw new IllegalStateException("Smith-Waterman alignment failure. Cigar = " + cigar + " with reference length " + cigar.getReferenceLength() + " but expecting reference length of " + refHaplotype.getCigar().getReferenceLength() + " ref = " + refHaplotype + " path " + new String(h.getBases()));
}
h.setCigar(cigar);
h.setAlignmentStartHapwrtRef(activeRegionStart);
h.setGenomeLocation(activeRegionWindow);
returnHaplotypes.add(h);
assemblyResultSet.add(h, assemblyResultByGraph.get(graph));
if (debug) {
logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize());
}
}
}
}
// the first returned by any finder.
if (!returnHaplotypes.contains(refHaplotype)) {
double refScore = Double.NaN;
for (final KBestHaplotypeFinder finder : finders) {
final double candidate = finder.score(refHaplotype);
if (Double.isNaN(candidate)) {
continue;
}
refScore = candidate;
break;
}
refHaplotype.setScore(refScore);
returnHaplotypes.add(refHaplotype);
}
if (failedCigars != 0) {
logger.debug(String.format("failed to align some haplotypes (%d) back to the reference (loc=%s); these will be ignored.", failedCigars, refLoc.toString()));
}
if (debug) {
if (returnHaplotypes.size() > 1) {
logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
} else {
logger.info("Found only the reference haplotype in the assembly graph.");
}
for (final Haplotype h : returnHaplotypes) {
logger.info(h.toString());
logger.info("> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() + " ref " + h.isReference());
}
}
return new ArrayList<>(returnHaplotypes);
}
use of org.broadinstitute.hellbender.utils.haplotype.Haplotype in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method testRefConfidencePartialReads.
@Test
public void testRefConfidencePartialReads() {
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples, 2);
final IndependentSampleGenotypesModel genotypingModel = new IndependentSampleGenotypesModel();
final String ref = "ACGTAACCGGTT";
for (int readLen = 3; readLen < ref.length(); readLen++) {
for (int start = 0; start < ref.length() - readLen; start++) {
final RefConfData data = new RefConfData(ref, 0);
final List<Haplotype> haplotypes = Arrays.asList(data.getRefHap());
final List<VariantContext> calls = Collections.emptyList();
data.getActiveRegion().add(data.makeRead(start, readLen));
final ReadLikelihoods<Haplotype> likelihoods = createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final List<Integer> expectedDPs = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getSpan().size(), 0));
for (int i = start; i < readLen + start; i++) expectedDPs.set(i, 1);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}
}
Aggregations