use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk by broadinstitute.
the class AlignmentUtilsUnitTest method makeComplexReadAlignedToRef.
@DataProvider(name = "ComplexReadAlignedToRef")
public Object[][] makeComplexReadAlignedToRef() {
List<Object[]> tests = new ArrayList<>();
final List<Mutation> allMutations = Arrays.asList(new Mutation(1, 1, CigarOperator.M), new Mutation(2, 1, CigarOperator.M), new Mutation(3, 1, CigarOperator.I), new Mutation(7, 1, CigarOperator.D));
int i = 0;
final String referenceBases = "ACTGACTGACTG";
final String paddedReference = "NNNN" + referenceBases + "NNNN";
for (final List<Mutation> mutations : Utils.makePermutations(allMutations, 3, false)) {
final MutatedSequence hap = mutateSequence(referenceBases, mutations);
final Haplotype haplotype = new Haplotype(hap.seq.getBytes());
final SWPairwiseAlignment align = new SWPairwiseAlignment(paddedReference.getBytes(), hap.seq.getBytes());
haplotype.setAlignmentStartHapwrtRef(align.getAlignmentStart2wrt1());
haplotype.setCigar(align.getCigar());
for (final List<Mutation> readMutations : Utils.makePermutations(allMutations, 3, false)) {
final MutatedSequence readBases = mutateSequence(hap.seq, readMutations);
final GATKRead read = makeReadForAlignedToRefTest(readBases.seq);
tests.add(new Object[] { i++, read, paddedReference, haplotype, hap.numMismatches + readBases.numMismatches });
}
}
return tests.toArray(new Object[][] {});
}
use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk-protected by broadinstitute.
the class ReadThreadingGraph method generateCigarAgainstUpwardsReferencePath.
/**
* Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the
* provided vertex is the source) and the reference path.
*
* @param vertex the source of the dangling head
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @return a SmithWaterman object which can be null if no proper alignment could be generated
*/
@VisibleForTesting
final DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
// find the highest common descendant path between vertex and the reference source if available
final List<MultiDeBruijnVertex> altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor);
if (// add 1 to include the LCA
altPath == null || isRefSink(altPath.get(0)) || altPath.size() < minDanglingBranchLength + 1) {
return null;
}
// now get the reference path from the LCA
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards, Optional.empty());
// create the Smith-Waterman strings to use
final byte[] refBases = getBasesForPath(refPath, true);
final byte[] altBases = getBasesForPath(altPath, true);
// run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting)
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(refBases, altBases, SWPairwiseAlignment.STANDARD_NGS, SWPairwiseAlignment.OverhangStrategy.LEADING_INDEL);
return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar()));
}
use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk-protected by broadinstitute.
the class ReadThreadingGraph method generateCigarAgainstDownwardsReferencePath.
/**
* Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the
* provided vertex is the sink) and the reference path.
*
* @param vertex the sink of the dangling chain
* @param pruneFactor the prune factor to use in ignoring chain pieces
* @return a SmithWaterman object which can be null if no proper alignment could be generated
*/
@VisibleForTesting
final DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
// while heads can be 0, tails absolutely cannot
final int minTailPathLength = Math.max(1, minDanglingBranchLength);
// find the lowest common ancestor path between this vertex and the diverging master path if available
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
if (// add 1 to include the LCA
altPath == null || isRefSource(altPath.get(0)) || altPath.size() < minTailPathLength + 1) {
return null;
}
// now get the reference path from the LCA
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards, Optional.ofNullable(incomingEdgeOf(altPath.get(1))));
// create the Smith-Waterman strings to use
final byte[] refBases = getBasesForPath(refPath, false);
final byte[] altBases = getBasesForPath(altPath, false);
// run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting)
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(refBases, altBases, SWPairwiseAlignment.STANDARD_NGS, SWPairwiseAlignment.OverhangStrategy.LEADING_INDEL);
return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar()));
}
use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk by broadinstitute.
the class AlignmentUtils method createReadAlignedToRef.
/**
* Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
* via the alignment of haplotype (via its getCigar) method.
*
* @param originalRead the read we want to write aligned to the reference genome
* @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
* @param referenceStart the start of the reference that haplotype is aligned to. Provides global coordinate frame.
* @param isInformative true if the read is differentially informative for one of the haplotypes
*
* @throws IllegalArgumentException if {@code originalRead} is {@code null} or {@code haplotype} is {@code null} or it
* does not have a Cigar or the {@code referenceStart} is invalid (less than 1).
*
* @return a GATKRead aligned to reference. Never {@code null}.
*/
public static GATKRead createReadAlignedToRef(final GATKRead originalRead, final Haplotype haplotype, final Haplotype refHaplotype, final int referenceStart, final boolean isInformative) {
Utils.nonNull(originalRead);
Utils.nonNull(haplotype);
Utils.nonNull(refHaplotype);
Utils.nonNull(haplotype.getCigar());
if (referenceStart < 1) {
throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
}
// compute the smith-waterman alignment of read -> haplotype
final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), originalRead.getBases(), CigarUtils.NEW_SW_PARAMETERS);
if (swPairwiseAlignment.getAlignmentStart2wrt1() == -1) {
// sw can fail (reasons not clear) so if it happens just don't realign the read
return originalRead;
}
final Cigar swCigar = consolidateCigar(swPairwiseAlignment.getCigar());
// since we're modifying the read we need to clone it
final GATKRead read = originalRead.copy();
// only informative reads are given the haplotype tag to enhance visualization
if (isInformative) {
read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
}
// compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
final int readStartOnHaplotype = calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
read.setPosition(read.getContig(), readStartOnReference);
// compute the read -> ref alignment by mapping read -> hap -> ref from the
// SW of read -> hap mapped through the given by hap -> ref
final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef);
final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw);
final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(), originalRead.getBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
read.setCigar(readToRefCigar);
if (readToRefCigar.getReadLength() != read.getLength()) {
throw new GATKException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength() + " != read length " + read.getLength() + " for read " + read.toString() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength() + "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
}
return read;
}
use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk by broadinstitute.
the class CigarUtils method calculateCigar.
/**
* Calculate the cigar elements for this path against the reference sequence
*
* @param refSeq the reference sequence that all of the bases in this path should align to
* @return a Cigar mapping this path to refSeq, or null if no reasonable alignment could be found
*/
public static Cigar calculateCigar(final byte[] refSeq, final byte[] altSeq) {
Utils.nonNull(refSeq, "refSeq");
Utils.nonNull(altSeq, "altSeq");
if (altSeq.length == 0) {
// horrible edge case from the unit tests, where this path has no bases
return new Cigar(Arrays.asList(new CigarElement(refSeq.length, CigarOperator.D)));
}
// If two strings are equal (a O(n) check) then it's trivial to get CIGAR for them.
if (Arrays.equals(refSeq, altSeq)) {
final Cigar matching = new Cigar();
matching.add(new CigarElement(refSeq.length, CigarOperator.MATCH_OR_MISMATCH));
return matching;
}
final Cigar nonStandard;
final String paddedRef = SW_PAD + new String(refSeq) + SW_PAD;
final String paddedPath = SW_PAD + new String(altSeq) + SW_PAD;
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(paddedRef.getBytes(), paddedPath.getBytes(), NEW_SW_PARAMETERS);
if (isSWFailure(alignment)) {
return null;
}
// cut off the padding bases
final int baseStart = SW_PAD.length();
// -1 because it's inclusive
final int baseEnd = paddedPath.length() - SW_PAD.length() - 1;
nonStandard = AlignmentUtils.trimCigarByBases(alignment.getCigar(), baseStart, baseEnd);
if (nonStandard.getReferenceLength() != refSeq.length) {
nonStandard.add(new CigarElement(refSeq.length - nonStandard.getReferenceLength(), CigarOperator.D));
}
// finally, return the cigar with all indels left aligned
return leftAlignCigarSequentially(nonStandard, refSeq, altSeq, 0, 0);
}
Aggregations