Search in sources :

Example 41 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class KBestHaplotypeFinderUnitTest method testRefAltSW.

@Test(dataProvider = "SystematicRefAltSWTestData")
public void testRefAltSW(final String prefix, final String end, final String refMid, final String altMid, final String midCigar) {
    // Construct the assembly graph
    SeqGraph graph = new SeqGraph(11);
    final int padSize = 0;
    SeqVertex top = new SeqVertex(Strings.repeat("N", padSize));
    SeqVertex ref = new SeqVertex(prefix + refMid + end);
    SeqVertex alt = new SeqVertex(prefix + altMid + end);
    SeqVertex bot = new SeqVertex(Strings.repeat("N", padSize));
    graph.addVertices(top, ref, alt, bot);
    graph.addEdges(() -> new BaseEdge(true, 1), top, ref, bot);
    graph.addEdges(() -> new BaseEdge(false, 1), top, alt, bot);
    // Construct the test path
    Path<SeqVertex, BaseEdge> path = makePath(Arrays.asList(top, alt, bot), graph);
    Cigar expected = new Cigar();
    expected.add(new CigarElement(padSize, CigarOperator.M));
    if (!prefix.equals(""))
        expected.add(new CigarElement(prefix.length(), CigarOperator.M));
    for (final CigarElement elt : TextCigarCodec.decode(midCigar).getCigarElements()) expected.add(elt);
    if (!end.equals(""))
        expected.add(new CigarElement(end.length(), CigarOperator.M));
    expected.add(new CigarElement(padSize, CigarOperator.M));
    expected = AlignmentUtils.consolidateCigar(expected);
    final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString();
    final Cigar pathCigar = path.calculateCigar(refString.getBytes());
    logger.warn("diffs: " + ref + " vs. " + alt + " cigar " + midCigar);
    logger.warn("Path " + path + " with cigar " + pathCigar);
    logger.warn("Expected cigar " + expected);
    Assert.assertEquals(pathCigar, expected, "Cigar mismatch: ref = " + refString + " vs alt = " + new String(path.getBases()));
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 42 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class KBestHaplotypeFinderUnitTest method testLeftAlignCigarSequentially.

@Test
public void testLeftAlignCigarSequentially() {
    String preRefString = "GATCGATCGATC";
    String postRefString = "TTT";
    String refString = "ATCGAGGAGAGCGCCCCG";
    String indelString1 = "X";
    String indelString2 = "YZ";
    int refIndel1 = 10;
    int refIndel2 = 12;
    for (final int indelSize1 : Arrays.asList(1, 2, 3, 4)) {
        for (final int indelOp1 : Arrays.asList(1, -1)) {
            for (final int indelSize2 : Arrays.asList(1, 2, 3, 4)) {
                for (final int indelOp2 : Arrays.asList(1, -1)) {
                    Cigar expectedCigar = new Cigar();
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
                    expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M));
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
                    expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M));
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    Cigar givenCigar = new Cigar();
                    givenCigar.add(new CigarElement(refString.length() + refIndel1 / 2, CigarOperator.M));
                    givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
                    givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1 / 2 - indelSize1) : refIndel1 / 2) + refString.length() + refIndel2 / 2 * 2, CigarOperator.M));
                    givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
                    givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 / 2 - indelSize2) * 2 : refIndel2 / 2 * 2) + refString.length(), CigarOperator.M));
                    String theRef = preRefString + refString + Strings.repeat(indelString1, refIndel1) + refString + Strings.repeat(indelString2, refIndel2) + refString + postRefString;
                    String theRead = refString + Strings.repeat(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Strings.repeat(indelString2, refIndel2 + indelOp2 * indelSize2) + refString;
                    Cigar calculatedCigar = CigarUtils.leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0);
                    Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!");
                }
            }
        }
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 43 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class KBestHaplotypeFinderUnitTest method testTripleBubbleData.

@Test(dataProvider = "TripleBubbleDataProvider")
public void testTripleBubbleData(final int refBubbleLength, final int altBubbleLength, final boolean offRefBeginning, final boolean offRefEnding) {
    // Construct the assembly graph
    SeqGraph graph = new SeqGraph(11);
    final String preAltOption = "ATCGATCGATCGATCGATCG";
    final String postAltOption = "CCCC";
    final String preRef = "ATGG";
    final String postRef = "GGCCG";
    final String midRef1 = "TTCCT";
    final String midRef2 = "CCCAAAAAAAAAAAA";
    SeqVertex preV = new SeqVertex(preAltOption);
    SeqVertex v = new SeqVertex(preRef);
    SeqVertex v2Ref = new SeqVertex(Strings.repeat("A", refBubbleLength));
    SeqVertex v2Alt = new SeqVertex(Strings.repeat("A", altBubbleLength - 1) + "T");
    SeqVertex v4Ref = new SeqVertex(Strings.repeat("C", refBubbleLength));
    SeqVertex v4Alt = new SeqVertex(Strings.repeat("C", altBubbleLength - 1) + "T");
    SeqVertex v6Ref = new SeqVertex(Strings.repeat("G", refBubbleLength));
    SeqVertex v6Alt = new SeqVertex(Strings.repeat("G", altBubbleLength - 1) + "T");
    SeqVertex v3 = new SeqVertex(midRef1);
    SeqVertex v5 = new SeqVertex(midRef2);
    SeqVertex v7 = new SeqVertex(postRef);
    SeqVertex postV = new SeqVertex(postAltOption);
    final String ref = preRef + v2Ref.getSequenceString() + midRef1 + v4Ref.getSequenceString() + midRef2 + v6Ref.getSequenceString() + postRef;
    graph.addVertex(preV);
    graph.addVertex(v);
    graph.addVertex(v2Ref);
    graph.addVertex(v2Alt);
    graph.addVertex(v3);
    graph.addVertex(v4Ref);
    graph.addVertex(v4Alt);
    graph.addVertex(v5);
    graph.addVertex(v6Ref);
    graph.addVertex(v6Alt);
    graph.addVertex(v7);
    graph.addVertex(postV);
    graph.addEdge(preV, v, new BaseEdge(false, 1));
    graph.addEdge(v, v2Ref, new BaseEdge(true, 10));
    graph.addEdge(v2Ref, v3, new BaseEdge(true, 10));
    graph.addEdge(v, v2Alt, new BaseEdge(false, 5));
    graph.addEdge(v2Alt, v3, new BaseEdge(false, 5));
    graph.addEdge(v3, v4Ref, new BaseEdge(true, 10));
    graph.addEdge(v4Ref, v5, new BaseEdge(true, 10));
    graph.addEdge(v3, v4Alt, new BaseEdge(false, 5));
    graph.addEdge(v4Alt, v5, new BaseEdge(false, 5));
    graph.addEdge(v5, v6Ref, new BaseEdge(true, 11));
    graph.addEdge(v6Ref, v7, new BaseEdge(true, 11));
    graph.addEdge(v5, v6Alt, new BaseEdge(false, 55));
    graph.addEdge(v6Alt, v7, new BaseEdge(false, 55));
    graph.addEdge(v7, postV, new BaseEdge(false, 1));
    // Construct the test path
    Path<SeqVertex, BaseEdge> path = new Path<>((offRefBeginning ? preV : v), graph);
    if (offRefBeginning)
        path = new Path<>(path, graph.getEdge(preV, v));
    path = new Path<>(path, graph.getEdge(v, v2Alt));
    path = new Path<>(path, graph.getEdge(v2Alt, v3));
    path = new Path<>(path, graph.getEdge(v3, v4Ref));
    path = new Path<>(path, graph.getEdge(v4Ref, v5));
    path = new Path<>(path, graph.getEdge(v5, v6Alt));
    path = new Path<>(path, graph.getEdge(v6Alt, v7));
    if (offRefEnding)
        path = new Path<>(path, graph.getEdge(v7, postV));
    // Construct the actual cigar string implied by the test path
    Cigar expectedCigar = new Cigar();
    if (offRefBeginning) {
        expectedCigar.add(new CigarElement(preAltOption.length(), CigarOperator.I));
    }
    expectedCigar.add(new CigarElement(preRef.length(), CigarOperator.M));
    // first bubble
    if (refBubbleLength > altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
        expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
    } else if (refBubbleLength < altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
        expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
    } else {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    }
    expectedCigar.add(new CigarElement(midRef1.length(), CigarOperator.M));
    // second bubble is ref path
    expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    expectedCigar.add(new CigarElement(midRef2.length(), CigarOperator.M));
    // third bubble
    if (refBubbleLength > altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
        expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
    } else if (refBubbleLength < altBubbleLength) {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
        expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
    } else {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    }
    expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M));
    if (offRefEnding) {
        expectedCigar.add(new CigarElement(postAltOption.length(), CigarOperator.I));
    }
    Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch: ref = " + ref + " alt " + new String(path.getBases()));
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 44 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class Civar method toCigar.

/**
     * Transforms a civar into the equivalent Cigar.
     * @return never {@code null}.
     */
public Cigar toCigar(final int templateLength) {
    int minSeqLen = minimumTemplateSequenceSize();
    Utils.validateArg(expands() || templateLength == minSeqLen, "the sequence provided does not match this Civar size " + templateLength + " != " + minSeqLen);
    Utils.validateArg(templateLength >= minSeqLen, "the sequence provided is too small for this Civar " + templateLength + " < " + minSeqLen);
    int starCount = starCount();
    int paddingTotal = templateLength - minSeqLen;
    int starPadding = starCount == 0 ? 0 : paddingTotal / starCount;
    int excessPadding = starCount == 0 ? paddingTotal : paddingTotal % starCount;
    // We first get the equivalent cigar elements for the elements in the Civar.
    final List<CigarElement> cigarElements = new LinkedList<>();
    for (final Element e : elements()) {
        final int size = e.size(starPadding, excessPadding);
        excessPadding -= e.excessPaddingUsed(excessPadding);
        switch(e.operator()) {
            case EMBEDDED:
                cigarElements.addAll(e.embedded.toCigar(size).getCigarElements());
                break;
            case MATCH:
            case TRANSITION:
            case COMPLEMENT:
            case TRANSVERSION:
                cigarElements.add(new CigarElement(size, CigarOperator.M));
                break;
            case INSERTION:
                cigarElements.add(new CigarElement(size, CigarOperator.I));
                break;
            case DELETION:
                cigarElements.add(new CigarElement(size, CigarOperator.D));
                break;
            default:
        }
    }
    // No we look for consequitive elements with the same operator and we merge them.
    final ListIterator<CigarElement> it = cigarElements.listIterator();
    while (it.hasNext()) {
        final CigarElement thisElement = it.next();
        if (!it.hasNext())
            continue;
        final CigarElement nextElement = it.next();
        if (thisElement.getOperator() == nextElement.getOperator()) {
            final int nextLength = nextElement.getLength();
            it.remove();
            it.previous();
            it.set(new CigarElement(thisElement.getLength() + nextLength, thisElement.getOperator()));
        } else
            it.previous();
    }
    return new Cigar(cigarElements);
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement) CigarElement(htsjdk.samtools.CigarElement)

Example 45 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class CigarConversionUtilsUnitTest method testConvertCigarUnitToSAMCigarElement.

@Test(dataProvider = "MatchingCigarUnitsAndElementsData")
public void testConvertCigarUnitToSAMCigarElement(final CigarUnit cigarUnit, final CigarElement cigarElement) {
    final CigarElement convertedElement = CigarConversionUtils.convertCigarUnitToSAMCigarElement(cigarUnit);
    Assert.assertEquals(convertedElement, cigarElement, "CigarUnit -> CigarElement conversion failed");
}
Also used : CigarElement(htsjdk.samtools.CigarElement) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13