Search in sources :

Example 36 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.

the class KBestHaplotypeFinderUnitTest method testLeftAlignCigarSequentially.

@Test
public void testLeftAlignCigarSequentially() {
    String preRefString = "GATCGATCGATC";
    String postRefString = "TTT";
    String refString = "ATCGAGGAGAGCGCCCCG";
    String indelString1 = "X";
    String indelString2 = "YZ";
    int refIndel1 = 10;
    int refIndel2 = 12;
    for (final int indelSize1 : Arrays.asList(1, 2, 3, 4)) {
        for (final int indelOp1 : Arrays.asList(1, -1)) {
            for (final int indelSize2 : Arrays.asList(1, 2, 3, 4)) {
                for (final int indelOp2 : Arrays.asList(1, -1)) {
                    Cigar expectedCigar = new Cigar();
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
                    expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M));
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
                    expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M));
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    Cigar givenCigar = new Cigar();
                    givenCigar.add(new CigarElement(refString.length() + refIndel1 / 2, CigarOperator.M));
                    givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
                    givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1 / 2 - indelSize1) : refIndel1 / 2) + refString.length() + refIndel2 / 2 * 2, CigarOperator.M));
                    givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
                    givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 / 2 - indelSize2) * 2 : refIndel2 / 2 * 2) + refString.length(), CigarOperator.M));
                    String theRef = preRefString + refString + Strings.repeat(indelString1, refIndel1) + refString + Strings.repeat(indelString2, refIndel2) + refString + postRefString;
                    String theRead = refString + Strings.repeat(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Strings.repeat(indelString2, refIndel2 + indelOp2 * indelSize2) + refString;
                    Cigar calculatedCigar = CigarUtils.leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0);
                    Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!");
                }
            }
        }
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 37 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.

the class KBestHaplotypeFinderUnitTest method testRefAltSW.

@Test(dataProvider = "SystematicRefAltSWTestData")
public void testRefAltSW(final String prefix, final String end, final String refMid, final String altMid, final String midCigar) {
    // Construct the assembly graph
    SeqGraph graph = new SeqGraph(11);
    final int padSize = 0;
    SeqVertex top = new SeqVertex(Strings.repeat("N", padSize));
    SeqVertex ref = new SeqVertex(prefix + refMid + end);
    SeqVertex alt = new SeqVertex(prefix + altMid + end);
    SeqVertex bot = new SeqVertex(Strings.repeat("N", padSize));
    graph.addVertices(top, ref, alt, bot);
    graph.addEdges(() -> new BaseEdge(true, 1), top, ref, bot);
    graph.addEdges(() -> new BaseEdge(false, 1), top, alt, bot);
    // Construct the test path
    Path<SeqVertex, BaseEdge> path = makePath(Arrays.asList(top, alt, bot), graph);
    Cigar expected = new Cigar();
    expected.add(new CigarElement(padSize, CigarOperator.M));
    if (!prefix.equals(""))
        expected.add(new CigarElement(prefix.length(), CigarOperator.M));
    for (final CigarElement elt : TextCigarCodec.decode(midCigar).getCigarElements()) expected.add(elt);
    if (!end.equals(""))
        expected.add(new CigarElement(end.length(), CigarOperator.M));
    expected.add(new CigarElement(padSize, CigarOperator.M));
    expected = AlignmentUtils.consolidateCigar(expected);
    final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString();
    final Cigar pathCigar = path.calculateCigar(refString.getBytes());
    logger.warn("diffs: " + ref + " vs. " + alt + " cigar " + midCigar);
    logger.warn("Path " + path + " with cigar " + pathCigar);
    logger.warn("Expected cigar " + expected);
    Assert.assertEquals(pathCigar, expected, "Cigar mismatch: ref = " + refString + " vs alt = " + new String(path.getBases()));
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 38 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.

the class KBestHaplotypeFinderUnitTest method testTripleBubbleData.

@Test(dataProvider = "TripleBubbleDataProvider")
public void testTripleBubbleData(final int refBubbleLength, final int altBubbleLength, final boolean offRefBeginning, final boolean offRefEnding) {
    // Construct the assembly graph
    SeqGraph graph = new SeqGraph(11);
    final String preAltOption = "ATCGATCGATCGATCGATCG";
    final String postAltOption = "CCCC";
    final String preRef = "ATGG";
    final String postRef = "GGCCG";
    final String midRef1 = "TTCCT";
    final String midRef2 = "CCCAAAAAAAAAAAA";
    SeqVertex preV = new SeqVertex(preAltOption);
    SeqVertex v = new SeqVertex(preRef);
    SeqVertex v2Ref = new SeqVertex(Strings.repeat("A", refBubbleLength));
    SeqVertex v2Alt = new SeqVertex(Strings.repeat("A", altBubbleLength - 1) + "T");
    SeqVertex v4Ref = new SeqVertex(Strings.repeat("C", refBubbleLength));
    SeqVertex v4Alt = new SeqVertex(Strings.repeat("C", altBubbleLength - 1) + "T");
    SeqVertex v6Ref = new SeqVertex(Strings.repeat("G", refBubbleLength));
    SeqVertex v6Alt = new SeqVertex(Strings.repeat("G", altBubbleLength - 1) + "T");
    SeqVertex v3 = new SeqVertex(midRef1);
    SeqVertex v5 = new SeqVertex(midRef2);
    SeqVertex v7 = new SeqVertex(postRef);
    SeqVertex postV = new SeqVertex(postAltOption);
    final String ref = preRef + v2Ref.getSequenceString() + midRef1 + v4Ref.getSequenceString() + midRef2 + v6Ref.getSequenceString() + postRef;
    graph.addVertex(preV);
    graph.addVertex(v);
    graph.addVertex(v2Ref);
    graph.addVertex(v2Alt);
    graph.addVertex(v3);
    graph.addVertex(v4Ref);
    graph.addVertex(v4Alt);
    graph.addVertex(v5);
    graph.addVertex(v6Ref);
    graph.addVertex(v6Alt);
    graph.addVertex(v7);
    graph.addVertex(postV);
    graph.addEdge(preV, v, new BaseEdge(false, 1));
    graph.addEdge(v, v2Ref, new BaseEdge(true, 10));
    graph.addEdge(v2Ref, v3, new BaseEdge(true, 10));
    graph.addEdge(v, v2Alt, new BaseEdge(false, 5));
    graph.addEdge(v2Alt, v3, new BaseEdge(false, 5));
    graph.addEdge(v3, v4Ref, new BaseEdge(true, 10));
    graph.addEdge(v4Ref, v5, new BaseEdge(true, 10));
    graph.addEdge(v3, v4Alt, new BaseEdge(false, 5));
    graph.addEdge(v4Alt, v5, new BaseEdge(false, 5));
    graph.addEdge(v5, v6Ref, new BaseEdge(true, 11));
    graph.addEdge(v6Ref, v7, new BaseEdge(true, 11));
    graph.addEdge(v5, v6Alt, new BaseEdge(false, 55));
    graph.addEdge(v6Alt, v7, new BaseEdge(false, 55));
    graph.addEdge(v7, postV, new BaseEdge(false, 1));
    // Construct the test path
    Path<SeqVertex, BaseEdge> path = new Path<>((offRefBeginning ? preV : v), graph);
    if (offRefBeginning)
        path = new Path<>(path, graph.getEdge(preV, v));
    path = new Path<>(path, graph.getEdge(v, v2Alt));
    path = new Path<>(path, graph.getEdge(v2Alt, v3));
    path = new Path<>(path, graph.getEdge(v3, v4Ref));
    path = new Path<>(path, graph.getEdge(v4Ref, v5));
    path = new Path<>(path, graph.getEdge(v5, v6Alt));
    path = new Path<>(path, graph.getEdge(v6Alt, v7));
    if (offRefEnding)
        path = new Path<>(path, graph.getEdge(v7, postV));
    // Construct the actual cigar string implied by the test path
    Cigar expectedCigar = new Cigar();
    if (offRefBeginning) {
        expectedCigar.add(new CigarElement(preAltOption.length(), CigarOperator.I));
    }
    expectedCigar.add(new CigarElement(preRef.length(), CigarOperator.M));
    // first bubble
    if (refBubbleLength > altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
        expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
    } else if (refBubbleLength < altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
        expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
    } else {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    }
    expectedCigar.add(new CigarElement(midRef1.length(), CigarOperator.M));
    // second bubble is ref path
    expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    expectedCigar.add(new CigarElement(midRef2.length(), CigarOperator.M));
    // third bubble
    if (refBubbleLength > altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
        expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
    } else if (refBubbleLength < altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
        expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
    } else {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    }
    expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M));
    if (offRefEnding) {
        expectedCigar.add(new CigarElement(postAltOption.length(), CigarOperator.I));
    }
    Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch: ref = " + ref + " alt " + new String(path.getBases()));
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 39 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.

the class ReadThreadingAssemblerUnitTest method assemble.

private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
    final Haplotype refHaplotype = new Haplotype(refBases, true);
    final Cigar c = new Cigar();
    c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
    refHaplotype.setCigar(c);
    final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
    activeRegion.addAll(reads);
    //        logger.warn("Assembling " + activeRegion + " with " + engine);
    final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
    return assemblyResultSet.getHaplotypeList();
}
Also used : Cigar(htsjdk.samtools.Cigar) AssemblyRegion(org.broadinstitute.hellbender.engine.AssemblyRegion) AssemblyResultSet(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyResultSet) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) KBestHaplotype(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotype) CigarElement(htsjdk.samtools.CigarElement)

Example 40 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class NDNCigarReadTransformer method refactorNDNtoN.

/**
     * Refactor cigar strings that contain N-D-N elements to one N element (with total length of the three refactored elements).
     */
@VisibleForTesting
protected static Cigar refactorNDNtoN(final Cigar originalCigar) {
    final Cigar refactoredCigar = new Cigar();
    final int cigarLength = originalCigar.numCigarElements();
    for (int i = 0; i < cigarLength; i++) {
        final CigarElement element = originalCigar.getCigarElement(i);
        if (element.getOperator() == CigarOperator.N && thereAreAtLeast2MoreElements(i, cigarLength)) {
            final CigarElement nextElement = originalCigar.getCigarElement(i + 1);
            final CigarElement nextNextElement = originalCigar.getCigarElement(i + 2);
            // if it is N-D-N replace with N (with the total length) otherwise just add the first N.
            if (nextElement.getOperator() == CigarOperator.D && nextNextElement.getOperator() == CigarOperator.N) {
                final int threeElementsLength = element.getLength() + nextElement.getLength() + nextNextElement.getLength();
                final CigarElement refactoredElement = new CigarElement(threeElementsLength, CigarOperator.N);
                refactoredCigar.add(refactoredElement);
                //skip the elements that were refactored
                i += 2;
            } else {
                // add only the first N
                refactoredCigar.add(element);
            }
        } else {
            // add any non-N element
            refactoredCigar.add(element);
        }
    }
    return refactoredCigar;
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13