Search in sources :

Example 1 with SWPairwiseAlignment

use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk by broadinstitute.

the class AlignmentUtilsUnitTest method makeComplexReadAlignedToRef.

@DataProvider(name = "ComplexReadAlignedToRef")
public Object[][] makeComplexReadAlignedToRef() {
    List<Object[]> tests = new ArrayList<>();
    final List<Mutation> allMutations = Arrays.asList(new Mutation(1, 1, CigarOperator.M), new Mutation(2, 1, CigarOperator.M), new Mutation(3, 1, CigarOperator.I), new Mutation(7, 1, CigarOperator.D));
    int i = 0;
    final String referenceBases = "ACTGACTGACTG";
    final String paddedReference = "NNNN" + referenceBases + "NNNN";
    for (final List<Mutation> mutations : Utils.makePermutations(allMutations, 3, false)) {
        final MutatedSequence hap = mutateSequence(referenceBases, mutations);
        final Haplotype haplotype = new Haplotype(hap.seq.getBytes());
        final SWPairwiseAlignment align = new SWPairwiseAlignment(paddedReference.getBytes(), hap.seq.getBytes());
        haplotype.setAlignmentStartHapwrtRef(align.getAlignmentStart2wrt1());
        haplotype.setCigar(align.getCigar());
        for (final List<Mutation> readMutations : Utils.makePermutations(allMutations, 3, false)) {
            final MutatedSequence readBases = mutateSequence(hap.seq, readMutations);
            final GATKRead read = makeReadForAlignedToRefTest(readBases.seq);
            tests.add(new Object[] { i++, read, paddedReference, haplotype, hap.numMismatches + readBases.numMismatches });
        }
    }
    return tests.toArray(new Object[][] {});
}
Also used : SWPairwiseAlignment(org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) DataProvider(org.testng.annotations.DataProvider)

Example 2 with SWPairwiseAlignment

use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk-protected by broadinstitute.

the class ReadThreadingGraph method generateCigarAgainstUpwardsReferencePath.

/**
     * Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the
     * provided vertex is the source) and the reference path.
     *
     * @param vertex   the source of the dangling head
     * @param pruneFactor  the prune factor to use in ignoring chain pieces
     * @return a SmithWaterman object which can be null if no proper alignment could be generated
     */
@VisibleForTesting
final DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
    // find the highest common descendant path between vertex and the reference source if available
    final List<MultiDeBruijnVertex> altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor);
    if (// add 1 to include the LCA
    altPath == null || isRefSink(altPath.get(0)) || altPath.size() < minDanglingBranchLength + 1) {
        return null;
    }
    // now get the reference path from the LCA
    final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards, Optional.empty());
    // create the Smith-Waterman strings to use
    final byte[] refBases = getBasesForPath(refPath, true);
    final byte[] altBases = getBasesForPath(altPath, true);
    // run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting)
    final SWPairwiseAlignment alignment = new SWPairwiseAlignment(refBases, altBases, SWPairwiseAlignment.STANDARD_NGS, SWPairwiseAlignment.OverhangStrategy.LEADING_INDEL);
    return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar()));
}
Also used : SWPairwiseAlignment(org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with SWPairwiseAlignment

use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk-protected by broadinstitute.

the class ReadThreadingGraph method generateCigarAgainstDownwardsReferencePath.

/**
     * Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the
     * provided vertex is the sink) and the reference path.
     *
     * @param vertex   the sink of the dangling chain
     * @param pruneFactor  the prune factor to use in ignoring chain pieces
     * @return a SmithWaterman object which can be null if no proper alignment could be generated
     */
@VisibleForTesting
final DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) {
    // while heads can be 0, tails absolutely cannot
    final int minTailPathLength = Math.max(1, minDanglingBranchLength);
    // find the lowest common ancestor path between this vertex and the diverging master path if available
    final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
    if (// add 1 to include the LCA
    altPath == null || isRefSource(altPath.get(0)) || altPath.size() < minTailPathLength + 1) {
        return null;
    }
    // now get the reference path from the LCA
    final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards, Optional.ofNullable(incomingEdgeOf(altPath.get(1))));
    // create the Smith-Waterman strings to use
    final byte[] refBases = getBasesForPath(refPath, false);
    final byte[] altBases = getBasesForPath(altPath, false);
    // run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting)
    final SWPairwiseAlignment alignment = new SWPairwiseAlignment(refBases, altBases, SWPairwiseAlignment.STANDARD_NGS, SWPairwiseAlignment.OverhangStrategy.LEADING_INDEL);
    return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar()));
}
Also used : SWPairwiseAlignment(org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with SWPairwiseAlignment

use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk by broadinstitute.

the class AlignmentUtils method createReadAlignedToRef.

/**
     * Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
     * via the alignment of haplotype (via its getCigar) method.
     *
     * @param originalRead the read we want to write aligned to the reference genome
     * @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
     * @param referenceStart the start of the reference that haplotype is aligned to.  Provides global coordinate frame.
     * @param isInformative true if the read is differentially informative for one of the haplotypes
     *
     * @throws IllegalArgumentException if {@code originalRead} is {@code null} or {@code haplotype} is {@code null} or it
     *   does not have a Cigar or the {@code referenceStart} is invalid (less than 1).
     *
     * @return a GATKRead aligned to reference. Never {@code null}.
     */
public static GATKRead createReadAlignedToRef(final GATKRead originalRead, final Haplotype haplotype, final Haplotype refHaplotype, final int referenceStart, final boolean isInformative) {
    Utils.nonNull(originalRead);
    Utils.nonNull(haplotype);
    Utils.nonNull(refHaplotype);
    Utils.nonNull(haplotype.getCigar());
    if (referenceStart < 1) {
        throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart);
    }
    // compute the smith-waterman alignment of read -> haplotype
    final SWPairwiseAlignment swPairwiseAlignment = new SWPairwiseAlignment(haplotype.getBases(), originalRead.getBases(), CigarUtils.NEW_SW_PARAMETERS);
    if (swPairwiseAlignment.getAlignmentStart2wrt1() == -1) {
        // sw can fail (reasons not clear) so if it happens just don't realign the read
        return originalRead;
    }
    final Cigar swCigar = consolidateCigar(swPairwiseAlignment.getCigar());
    // since we're modifying the read we need to clone it
    final GATKRead read = originalRead.copy();
    // only informative reads are given the haplotype tag to enhance visualization
    if (isInformative) {
        read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
    }
    // compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
    final Cigar extendedHaplotypeCigar = haplotype.getConsolidatedPaddedCigar(1000);
    final int readStartOnHaplotype = calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1());
    final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;
    read.setPosition(read.getContig(), readStartOnReference);
    // compute the read -> ref alignment by mapping read -> hap -> ref from the
    // SW of read -> hap mapped through the given by hap -> ref
    final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1);
    final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef);
    final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw);
    final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(), originalRead.getBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
    read.setCigar(readToRefCigar);
    if (readToRefCigar.getReadLength() != read.getLength()) {
        throw new GATKException("Cigar " + readToRefCigar + " with read length " + readToRefCigar.getReadLength() + " != read length " + read.getLength() + " for read " + read.toString() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength() + "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
    }
    return read;
}
Also used : Cigar(htsjdk.samtools.Cigar) SWPairwiseAlignment(org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 5 with SWPairwiseAlignment

use of org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment in project gatk by broadinstitute.

the class CigarUtils method calculateCigar.

/**
     * Calculate the cigar elements for this path against the reference sequence
     *
     * @param refSeq the reference sequence that all of the bases in this path should align to
     * @return a Cigar mapping this path to refSeq, or null if no reasonable alignment could be found
     */
public static Cigar calculateCigar(final byte[] refSeq, final byte[] altSeq) {
    Utils.nonNull(refSeq, "refSeq");
    Utils.nonNull(altSeq, "altSeq");
    if (altSeq.length == 0) {
        // horrible edge case from the unit tests, where this path has no bases
        return new Cigar(Arrays.asList(new CigarElement(refSeq.length, CigarOperator.D)));
    }
    // If two strings are equal (a O(n) check) then it's trivial to get CIGAR for them.
    if (Arrays.equals(refSeq, altSeq)) {
        final Cigar matching = new Cigar();
        matching.add(new CigarElement(refSeq.length, CigarOperator.MATCH_OR_MISMATCH));
        return matching;
    }
    final Cigar nonStandard;
    final String paddedRef = SW_PAD + new String(refSeq) + SW_PAD;
    final String paddedPath = SW_PAD + new String(altSeq) + SW_PAD;
    final SWPairwiseAlignment alignment = new SWPairwiseAlignment(paddedRef.getBytes(), paddedPath.getBytes(), NEW_SW_PARAMETERS);
    if (isSWFailure(alignment)) {
        return null;
    }
    // cut off the padding bases
    final int baseStart = SW_PAD.length();
    // -1 because it's inclusive
    final int baseEnd = paddedPath.length() - SW_PAD.length() - 1;
    nonStandard = AlignmentUtils.trimCigarByBases(alignment.getCigar(), baseStart, baseEnd);
    if (nonStandard.getReferenceLength() != refSeq.length) {
        nonStandard.add(new CigarElement(refSeq.length - nonStandard.getReferenceLength(), CigarOperator.D));
    }
    // finally, return the cigar with all indels left aligned
    return leftAlignCigarSequentially(nonStandard, refSeq, altSeq, 0, 0);
}
Also used : Cigar(htsjdk.samtools.Cigar) SWPairwiseAlignment(org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment) CigarElement(htsjdk.samtools.CigarElement)

Aggregations

SWPairwiseAlignment (org.broadinstitute.hellbender.utils.smithwaterman.SWPairwiseAlignment)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Cigar (htsjdk.samtools.Cigar)2 CigarElement (htsjdk.samtools.CigarElement)1 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)1 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)1 DataProvider (org.testng.annotations.DataProvider)1