use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class KBestHaplotypeFinderUnitTest method testRefAltSW.
@Test(dataProvider = "SystematicRefAltSWTestData")
public void testRefAltSW(final String prefix, final String end, final String refMid, final String altMid, final String midCigar) {
// Construct the assembly graph
SeqGraph graph = new SeqGraph(11);
final int padSize = 0;
SeqVertex top = new SeqVertex(Strings.repeat("N", padSize));
SeqVertex ref = new SeqVertex(prefix + refMid + end);
SeqVertex alt = new SeqVertex(prefix + altMid + end);
SeqVertex bot = new SeqVertex(Strings.repeat("N", padSize));
graph.addVertices(top, ref, alt, bot);
graph.addEdges(() -> new BaseEdge(true, 1), top, ref, bot);
graph.addEdges(() -> new BaseEdge(false, 1), top, alt, bot);
// Construct the test path
Path<SeqVertex, BaseEdge> path = makePath(Arrays.asList(top, alt, bot), graph);
Cigar expected = new Cigar();
expected.add(new CigarElement(padSize, CigarOperator.M));
if (!prefix.equals(""))
expected.add(new CigarElement(prefix.length(), CigarOperator.M));
for (final CigarElement elt : TextCigarCodec.decode(midCigar).getCigarElements()) expected.add(elt);
if (!end.equals(""))
expected.add(new CigarElement(end.length(), CigarOperator.M));
expected.add(new CigarElement(padSize, CigarOperator.M));
expected = AlignmentUtils.consolidateCigar(expected);
final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString();
final Cigar pathCigar = path.calculateCigar(refString.getBytes());
logger.warn("diffs: " + ref + " vs. " + alt + " cigar " + midCigar);
logger.warn("Path " + path + " with cigar " + pathCigar);
logger.warn("Expected cigar " + expected);
Assert.assertEquals(pathCigar, expected, "Cigar mismatch: ref = " + refString + " vs alt = " + new String(path.getBases()));
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class KBestHaplotypeFinderUnitTest method testLeftAlignCigarSequentially.
@Test
public void testLeftAlignCigarSequentially() {
String preRefString = "GATCGATCGATC";
String postRefString = "TTT";
String refString = "ATCGAGGAGAGCGCCCCG";
String indelString1 = "X";
String indelString2 = "YZ";
int refIndel1 = 10;
int refIndel2 = 12;
for (final int indelSize1 : Arrays.asList(1, 2, 3, 4)) {
for (final int indelOp1 : Arrays.asList(1, -1)) {
for (final int indelSize2 : Arrays.asList(1, 2, 3, 4)) {
for (final int indelOp2 : Arrays.asList(1, -1)) {
Cigar expectedCigar = new Cigar();
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M));
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M));
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
Cigar givenCigar = new Cigar();
givenCigar.add(new CigarElement(refString.length() + refIndel1 / 2, CigarOperator.M));
givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1 / 2 - indelSize1) : refIndel1 / 2) + refString.length() + refIndel2 / 2 * 2, CigarOperator.M));
givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 / 2 - indelSize2) * 2 : refIndel2 / 2 * 2) + refString.length(), CigarOperator.M));
String theRef = preRefString + refString + Strings.repeat(indelString1, refIndel1) + refString + Strings.repeat(indelString2, refIndel2) + refString + postRefString;
String theRead = refString + Strings.repeat(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Strings.repeat(indelString2, refIndel2 + indelOp2 * indelSize2) + refString;
Cigar calculatedCigar = CigarUtils.leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0);
Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!");
}
}
}
}
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class KBestHaplotypeFinderUnitTest method testTripleBubbleData.
@Test(dataProvider = "TripleBubbleDataProvider")
public void testTripleBubbleData(final int refBubbleLength, final int altBubbleLength, final boolean offRefBeginning, final boolean offRefEnding) {
// Construct the assembly graph
SeqGraph graph = new SeqGraph(11);
final String preAltOption = "ATCGATCGATCGATCGATCG";
final String postAltOption = "CCCC";
final String preRef = "ATGG";
final String postRef = "GGCCG";
final String midRef1 = "TTCCT";
final String midRef2 = "CCCAAAAAAAAAAAA";
SeqVertex preV = new SeqVertex(preAltOption);
SeqVertex v = new SeqVertex(preRef);
SeqVertex v2Ref = new SeqVertex(Strings.repeat("A", refBubbleLength));
SeqVertex v2Alt = new SeqVertex(Strings.repeat("A", altBubbleLength - 1) + "T");
SeqVertex v4Ref = new SeqVertex(Strings.repeat("C", refBubbleLength));
SeqVertex v4Alt = new SeqVertex(Strings.repeat("C", altBubbleLength - 1) + "T");
SeqVertex v6Ref = new SeqVertex(Strings.repeat("G", refBubbleLength));
SeqVertex v6Alt = new SeqVertex(Strings.repeat("G", altBubbleLength - 1) + "T");
SeqVertex v3 = new SeqVertex(midRef1);
SeqVertex v5 = new SeqVertex(midRef2);
SeqVertex v7 = new SeqVertex(postRef);
SeqVertex postV = new SeqVertex(postAltOption);
final String ref = preRef + v2Ref.getSequenceString() + midRef1 + v4Ref.getSequenceString() + midRef2 + v6Ref.getSequenceString() + postRef;
graph.addVertex(preV);
graph.addVertex(v);
graph.addVertex(v2Ref);
graph.addVertex(v2Alt);
graph.addVertex(v3);
graph.addVertex(v4Ref);
graph.addVertex(v4Alt);
graph.addVertex(v5);
graph.addVertex(v6Ref);
graph.addVertex(v6Alt);
graph.addVertex(v7);
graph.addVertex(postV);
graph.addEdge(preV, v, new BaseEdge(false, 1));
graph.addEdge(v, v2Ref, new BaseEdge(true, 10));
graph.addEdge(v2Ref, v3, new BaseEdge(true, 10));
graph.addEdge(v, v2Alt, new BaseEdge(false, 5));
graph.addEdge(v2Alt, v3, new BaseEdge(false, 5));
graph.addEdge(v3, v4Ref, new BaseEdge(true, 10));
graph.addEdge(v4Ref, v5, new BaseEdge(true, 10));
graph.addEdge(v3, v4Alt, new BaseEdge(false, 5));
graph.addEdge(v4Alt, v5, new BaseEdge(false, 5));
graph.addEdge(v5, v6Ref, new BaseEdge(true, 11));
graph.addEdge(v6Ref, v7, new BaseEdge(true, 11));
graph.addEdge(v5, v6Alt, new BaseEdge(false, 55));
graph.addEdge(v6Alt, v7, new BaseEdge(false, 55));
graph.addEdge(v7, postV, new BaseEdge(false, 1));
// Construct the test path
Path<SeqVertex, BaseEdge> path = new Path<>((offRefBeginning ? preV : v), graph);
if (offRefBeginning)
path = new Path<>(path, graph.getEdge(preV, v));
path = new Path<>(path, graph.getEdge(v, v2Alt));
path = new Path<>(path, graph.getEdge(v2Alt, v3));
path = new Path<>(path, graph.getEdge(v3, v4Ref));
path = new Path<>(path, graph.getEdge(v4Ref, v5));
path = new Path<>(path, graph.getEdge(v5, v6Alt));
path = new Path<>(path, graph.getEdge(v6Alt, v7));
if (offRefEnding)
path = new Path<>(path, graph.getEdge(v7, postV));
// Construct the actual cigar string implied by the test path
Cigar expectedCigar = new Cigar();
if (offRefBeginning) {
expectedCigar.add(new CigarElement(preAltOption.length(), CigarOperator.I));
}
expectedCigar.add(new CigarElement(preRef.length(), CigarOperator.M));
// first bubble
if (refBubbleLength > altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
} else if (refBubbleLength < altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
} else {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
}
expectedCigar.add(new CigarElement(midRef1.length(), CigarOperator.M));
// second bubble is ref path
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
expectedCigar.add(new CigarElement(midRef2.length(), CigarOperator.M));
// third bubble
if (refBubbleLength > altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
} else if (refBubbleLength < altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
} else {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
}
expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M));
if (offRefEnding) {
expectedCigar.add(new CigarElement(postAltOption.length(), CigarOperator.I));
}
Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch: ref = " + ref + " alt " + new String(path.getBases()));
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class Civar method toCigar.
/**
* Transforms a civar into the equivalent Cigar.
* @return never {@code null}.
*/
public Cigar toCigar(final int templateLength) {
int minSeqLen = minimumTemplateSequenceSize();
Utils.validateArg(expands() || templateLength == minSeqLen, "the sequence provided does not match this Civar size " + templateLength + " != " + minSeqLen);
Utils.validateArg(templateLength >= minSeqLen, "the sequence provided is too small for this Civar " + templateLength + " < " + minSeqLen);
int starCount = starCount();
int paddingTotal = templateLength - minSeqLen;
int starPadding = starCount == 0 ? 0 : paddingTotal / starCount;
int excessPadding = starCount == 0 ? paddingTotal : paddingTotal % starCount;
// We first get the equivalent cigar elements for the elements in the Civar.
final List<CigarElement> cigarElements = new LinkedList<>();
for (final Element e : elements()) {
final int size = e.size(starPadding, excessPadding);
excessPadding -= e.excessPaddingUsed(excessPadding);
switch(e.operator()) {
case EMBEDDED:
cigarElements.addAll(e.embedded.toCigar(size).getCigarElements());
break;
case MATCH:
case TRANSITION:
case COMPLEMENT:
case TRANSVERSION:
cigarElements.add(new CigarElement(size, CigarOperator.M));
break;
case INSERTION:
cigarElements.add(new CigarElement(size, CigarOperator.I));
break;
case DELETION:
cigarElements.add(new CigarElement(size, CigarOperator.D));
break;
default:
}
}
// No we look for consequitive elements with the same operator and we merge them.
final ListIterator<CigarElement> it = cigarElements.listIterator();
while (it.hasNext()) {
final CigarElement thisElement = it.next();
if (!it.hasNext())
continue;
final CigarElement nextElement = it.next();
if (thisElement.getOperator() == nextElement.getOperator()) {
final int nextLength = nextElement.getLength();
it.remove();
it.previous();
it.set(new CigarElement(thisElement.getLength() + nextLength, thisElement.getOperator()));
} else
it.previous();
}
return new Cigar(cigarElements);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class CigarConversionUtilsUnitTest method testConvertCigarUnitToSAMCigarElement.
@Test(dataProvider = "MatchingCigarUnitsAndElementsData")
public void testConvertCigarUnitToSAMCigarElement(final CigarUnit cigarUnit, final CigarElement cigarElement) {
final CigarElement convertedElement = CigarConversionUtils.convertCigarUnitToSAMCigarElement(cigarUnit);
Assert.assertEquals(convertedElement, cigarElement, "CigarUnit -> CigarElement conversion failed");
}
Aggregations