Search in sources :

Example 91 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class PileupElementUnitTest method testPileupElementTest.

@Test(dataProvider = "PileupElementTest")
public void testPileupElementTest(final LIBSTest params) {
    final GATKRead read = params.makeRead();
    final AlignmentStateMachine state = new AlignmentStateMachine(read);
    final LIBS_position tester = new LIBS_position(read);
    while (state.stepForwardOnGenome() != null) {
        tester.stepForwardOnGenome();
        final PileupElement pe = state.makePileupElement();
        Assert.assertEquals(pe.getRead(), read);
        Assert.assertEquals(pe.getMappingQual(), read.getMappingQuality());
        Assert.assertEquals(pe.getOffset(), state.getReadOffset());
        Assert.assertEquals(pe.isDeletion(), state.getCigarOperator() == D);
        Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
        Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
        Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
        if (!hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset())) {
            Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
            Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
        }
        Assert.assertEquals(pe.getOffsetInCurrentCigar(), tester.getCurrentPositionOnOperatorBase0(), "CigarElement index failure");
        Assert.assertTrue(pe.getOffsetInCurrentCigar() >= 0, "Offset into current cigar too small");
        Assert.assertTrue(pe.getOffsetInCurrentCigar() < pe.getCurrentCigarElement().getLength(), "Offset into current cigar too big");
        Assert.assertNotNull(pe.toString());
        Assert.assertEquals(pe.atEndOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == state.getCurrentCigarElement().getLength() - 1, "atEndOfCurrentCigar failed");
        Assert.assertEquals(pe.atStartOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == 0, "atStartOfCurrentCigar failed");
        Assert.assertEquals(pe.getBase(), pe.isDeletion() ? PileupElement.DELETION_BASE : read.getBases()[state.getReadOffset()]);
        Assert.assertEquals(pe.getQual(), pe.isDeletion() ? PileupElement.DELETION_QUAL : read.getBaseQualities()[state.getReadOffset()]);
        Assert.assertEquals(pe.getCurrentCigarElement(), state.getCurrentCigarElement());
        Assert.assertEquals(pe.getCurrentCigarOffset(), state.getCurrentCigarElementOffset());
        final int lengthOfImmediatelyFollowingIndel = pe.getLengthOfImmediatelyFollowingIndel();
        final String basesOfImmediatelyFollowingInsertion = pe.getBasesOfImmediatelyFollowingInsertion();
        Assert.assertTrue(lengthOfImmediatelyFollowingIndel != 0 || basesOfImmediatelyFollowingInsertion == null);
        Assert.assertTrue(basesOfImmediatelyFollowingInsertion == null || basesOfImmediatelyFollowingInsertion.length() == lengthOfImmediatelyFollowingIndel);
        // Don't test -- pe.getBaseIndex();
        if (pe.atEndOfCurrentCigar() && state.getCurrentCigarElementOffset() < read.numCigarElements() - 1) {
            final CigarElement nextElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() + 1);
            if (nextElement.getOperator() == CigarOperator.I) {
                Assert.assertTrue(pe.getBetweenNextPosition().size() >= 1);
                Assert.assertEquals(pe.getBetweenNextPosition().get(0), nextElement);
            }
            if (nextElement.getOperator() == M) {
                Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
            }
        } else {
            Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
        }
        if (pe.atStartOfCurrentCigar() && state.getCurrentCigarElementOffset() > 0) {
            final CigarElement prevElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() - 1);
            if (prevElement.getOperator() == CigarOperator.I) {
                Assert.assertTrue(pe.getBetweenPrevPosition().size() >= 1);
                Assert.assertEquals(pe.getBetweenPrevPosition().get(pe.getBetweenPrevPosition().size() - 1), prevElement);
            }
            if (prevElement.getOperator() == M) {
                Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
            }
        } else {
            Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
        }
        // TODO -- add meaningful tests
        pe.getBaseInsertionQual();
        pe.getBaseDeletionQual();
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LIBS_position(org.broadinstitute.hellbender.utils.locusiterator.LIBS_position) CigarElement(htsjdk.samtools.CigarElement) AlignmentStateMachine(org.broadinstitute.hellbender.utils.locusiterator.AlignmentStateMachine) LocusIteratorByStateBaseTest(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByStateBaseTest) Test(org.testng.annotations.Test) LIBSTest(org.broadinstitute.hellbender.utils.locusiterator.LIBSTest)

Example 92 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class SWPairwiseAlignmentUnitTest method testForIdenticalAlignmentsWithDifferingFlankLengths.

@Test(enabled = true)
public void testForIdenticalAlignmentsWithDifferingFlankLengths() {
    //This test is designed to ensure that the indels are correctly placed
    //if the region flanking these indels is extended by a varying amount.
    //It checks for problems caused by floating point rounding leading to different
    //paths being selected.
    //Create two versions of the same sequence with different flanking regions.
    byte[] paddedRef = "GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".getBytes();
    byte[] paddedHap = "GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA--GGGCC---------------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".replace("-", "").getBytes();
    byte[] notPaddedRef = "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".getBytes();
    byte[] notPaddedHap = "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA---------GGGCC--------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".replace("-", "").getBytes();
    //a simplified version of the getCigar routine in the haplotype caller to align these
    final String SW_PAD = "NNNNNNNNNN";
    final String paddedsRef = SW_PAD + new String(paddedRef) + SW_PAD;
    final String paddedsHap = SW_PAD + new String(paddedHap) + SW_PAD;
    final String notPaddedsRef = SW_PAD + new String(notPaddedRef) + SW_PAD;
    final String notpaddedsHap = SW_PAD + new String(notPaddedHap) + SW_PAD;
    final SWPairwiseAlignment paddedAlignment = new SWPairwiseAlignment(paddedsRef.getBytes(), paddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS);
    final SWPairwiseAlignment notPaddedAlignment = new SWPairwiseAlignment(notPaddedsRef.getBytes(), notpaddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS);
    //Now verify that the two sequences have the same alignment and not match positions.
    Cigar rawPadded = paddedAlignment.getCigar();
    Cigar notPadded = notPaddedAlignment.getCigar();
    List<CigarElement> paddedC = rawPadded.getCigarElements();
    List<CigarElement> notPaddedC = notPadded.getCigarElements();
    Assert.assertEquals(paddedC.size(), notPaddedC.size());
    for (int i = 0; i < notPaddedC.size(); i++) {
        CigarElement pc = paddedC.get(i);
        CigarElement npc = notPaddedC.get(i);
        if (pc.getOperator() == CigarOperator.M && npc.getOperator() == CigarOperator.M) {
            continue;
        }
        int l1 = pc.getLength();
        int l2 = npc.getLength();
        Assert.assertEquals(l1, l2);
        Assert.assertEquals(pc.getOperator(), npc.getOperator());
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 93 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.

the class KBestHaplotypeFinderUnitTest method testBasicBubbleData.

@Test(dataProvider = "BasicBubbleDataProvider")
public void testBasicBubbleData(final int refBubbleLength, final int altBubbleLength) {
    // Construct the assembly graph
    SeqGraph graph = new SeqGraph(3);
    final String preRef = "ATGG";
    final String postRef = "GGGGC";
    SeqVertex v = new SeqVertex(preRef);
    SeqVertex v2Ref = new SeqVertex(Strings.repeat("A", refBubbleLength));
    SeqVertex v2Alt = new SeqVertex(Strings.repeat("A", altBubbleLength - 1) + "T");
    SeqVertex v3 = new SeqVertex(postRef);
    graph.addVertex(v);
    graph.addVertex(v2Ref);
    graph.addVertex(v2Alt);
    graph.addVertex(v3);
    graph.addEdge(v, v2Ref, new BaseEdge(true, 10));
    graph.addEdge(v2Ref, v3, new BaseEdge(true, 10));
    graph.addEdge(v, v2Alt, new BaseEdge(false, 5));
    graph.addEdge(v2Alt, v3, new BaseEdge(false, 5));
    // Construct the test path
    Path<SeqVertex, BaseEdge> path = new Path<>(v, graph);
    path = new Path<>(path, graph.getEdge(v, v2Alt));
    path = new Path<>(path, graph.getEdge(v2Alt, v3));
    // Construct the actual cigar string implied by the test path
    Cigar expectedCigar = new Cigar();
    expectedCigar.add(new CigarElement(preRef.length(), CigarOperator.M));
    if (refBubbleLength > altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
        expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
    } else if (refBubbleLength < altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
        expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
    } else {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    }
    expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M));
    final String ref = preRef + v2Ref.getSequenceString() + postRef;
    Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch");
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 94 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class BaseRecalibrationEngine method calculateIsSNPOrIndel.

/**
     * Locates all SNP and indel events, storing them in the provided snp, isIns, and isDel arrays, and returns
     * the total number of SNP/indel events.
     *
     * @param read read to inspect
     * @param ref source of reference bses
     * @param snp storage for snp events (must be of length read.getBases().length and initialized to all 0's)
     * @param isIns storage for insertion events (must be of length read.getBases().length and initialized to all 0's)
     * @param isDel storage for deletion events (must be of length read.getBases().length and initialized to all 0's)
     * @return the total number of SNP and indel events
     */
protected static int calculateIsSNPOrIndel(final GATKRead read, final ReferenceDataSource ref, int[] snp, int[] isIns, int[] isDel) {
    final byte[] refBases = ref.queryAndPrefetch(read.getContig(), read.getStart(), read.getEnd()).getBases();
    int readPos = 0;
    int refPos = 0;
    int nEvents = 0;
    for (final CigarElement ce : read.getCigarElements()) {
        final int elementLength = ce.getLength();
        switch(ce.getOperator()) {
            case M:
            case EQ:
            case X:
                for (int i = 0; i < elementLength; i++) {
                    int snpInt = (BaseUtils.basesAreEqual(read.getBase(readPos), refBases[refPos]) ? 0 : 1);
                    snp[readPos] = snpInt;
                    nEvents += snpInt;
                    readPos++;
                    refPos++;
                }
                break;
            case D:
                {
                    final int index = (read.isReverseStrand() ? readPos : readPos - 1);
                    updateIndel(isDel, index);
                    refPos += elementLength;
                    break;
                }
            case N:
                refPos += elementLength;
                break;
            case I:
                {
                    final boolean forwardStrandRead = !read.isReverseStrand();
                    if (forwardStrandRead) {
                        updateIndel(isIns, readPos - 1);
                    }
                    readPos += elementLength;
                    if (!forwardStrandRead) {
                        updateIndel(isIns, readPos);
                    }
                    break;
                }
            case // ReferenceContext doesn't have the soft clipped bases!
            S:
                readPos += elementLength;
                break;
            case H:
            case P:
                break;
            default:
                throw new GATKException("Unsupported cigar operator: " + ce.getOperator());
        }
    }
    // we don't sum those as we go because they might set the same place to 1 twice
    nEvents += MathUtils.sum(isDel) + MathUtils.sum(isIns);
    return nEvents;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 95 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class SWPairwiseAlignment method align.

/**
     * Aligns the alternate sequence to the reference sequence
     *
     * @param reference  ref sequence
     * @param alternate  alt sequence
     */
private void align(final byte[] reference, final byte[] alternate) {
    if (reference == null || reference.length == 0 || alternate == null || alternate.length == 0)
        throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation");
    // avoid running full Smith-Waterman if there is an exact match of alternate in reference
    int matchIndex = -1;
    if (overhangStrategy == OverhangStrategy.SOFTCLIP || overhangStrategy == OverhangStrategy.IGNORE) {
        // Use a substring search to find an exact match of the alternate in the reference
        // NOTE: This approach only works for SOFTCLIP and IGNORE overhang strategies
        matchIndex = Utils.lastIndexOf(reference, alternate);
    }
    if (matchIndex != -1) {
        // generate the alignment result when the substring search was successful
        final List<CigarElement> lce = new ArrayList<>(alternate.length);
        lce.add(makeElement(State.MATCH, alternate.length));
        alignmentResult = new SWPairwiseAlignmentResult(AlignmentUtils.consolidateCigar(new Cigar(lce)), matchIndex);
    } else {
        // run full Smith-Waterman
        final int n = reference.length + 1;
        final int m = alternate.length + 1;
        final int[][] sw = new int[n][m];
        if (keepScoringMatrix) {
            SW = sw;
        }
        final int[][] btrack = new int[n][m];
        calculateMatrix(reference, alternate, sw, btrack);
        // length of the segment (continuous matches, insertions or deletions)
        alignmentResult = calculateCigar(sw, btrack, overhangStrategy);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13