use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.
the class KBestHaplotypeFinderUnitTest method testLeftAlignCigarSequentially.
@Test
public void testLeftAlignCigarSequentially() {
String preRefString = "GATCGATCGATC";
String postRefString = "TTT";
String refString = "ATCGAGGAGAGCGCCCCG";
String indelString1 = "X";
String indelString2 = "YZ";
int refIndel1 = 10;
int refIndel2 = 12;
for (final int indelSize1 : Arrays.asList(1, 2, 3, 4)) {
for (final int indelOp1 : Arrays.asList(1, -1)) {
for (final int indelSize2 : Arrays.asList(1, 2, 3, 4)) {
for (final int indelOp2 : Arrays.asList(1, -1)) {
Cigar expectedCigar = new Cigar();
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M));
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M));
expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
Cigar givenCigar = new Cigar();
givenCigar.add(new CigarElement(refString.length() + refIndel1 / 2, CigarOperator.M));
givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1 / 2 - indelSize1) : refIndel1 / 2) + refString.length() + refIndel2 / 2 * 2, CigarOperator.M));
givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 / 2 - indelSize2) * 2 : refIndel2 / 2 * 2) + refString.length(), CigarOperator.M));
String theRef = preRefString + refString + Strings.repeat(indelString1, refIndel1) + refString + Strings.repeat(indelString2, refIndel2) + refString + postRefString;
String theRead = refString + Strings.repeat(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Strings.repeat(indelString2, refIndel2 + indelOp2 * indelSize2) + refString;
Cigar calculatedCigar = CigarUtils.leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0);
Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!");
}
}
}
}
}
use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.
the class KBestHaplotypeFinderUnitTest method testRefAltSW.
@Test(dataProvider = "SystematicRefAltSWTestData")
public void testRefAltSW(final String prefix, final String end, final String refMid, final String altMid, final String midCigar) {
// Construct the assembly graph
SeqGraph graph = new SeqGraph(11);
final int padSize = 0;
SeqVertex top = new SeqVertex(Strings.repeat("N", padSize));
SeqVertex ref = new SeqVertex(prefix + refMid + end);
SeqVertex alt = new SeqVertex(prefix + altMid + end);
SeqVertex bot = new SeqVertex(Strings.repeat("N", padSize));
graph.addVertices(top, ref, alt, bot);
graph.addEdges(() -> new BaseEdge(true, 1), top, ref, bot);
graph.addEdges(() -> new BaseEdge(false, 1), top, alt, bot);
// Construct the test path
Path<SeqVertex, BaseEdge> path = makePath(Arrays.asList(top, alt, bot), graph);
Cigar expected = new Cigar();
expected.add(new CigarElement(padSize, CigarOperator.M));
if (!prefix.equals(""))
expected.add(new CigarElement(prefix.length(), CigarOperator.M));
for (final CigarElement elt : TextCigarCodec.decode(midCigar).getCigarElements()) expected.add(elt);
if (!end.equals(""))
expected.add(new CigarElement(end.length(), CigarOperator.M));
expected.add(new CigarElement(padSize, CigarOperator.M));
expected = AlignmentUtils.consolidateCigar(expected);
final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString();
final Cigar pathCigar = path.calculateCigar(refString.getBytes());
logger.warn("diffs: " + ref + " vs. " + alt + " cigar " + midCigar);
logger.warn("Path " + path + " with cigar " + pathCigar);
logger.warn("Expected cigar " + expected);
Assert.assertEquals(pathCigar, expected, "Cigar mismatch: ref = " + refString + " vs alt = " + new String(path.getBases()));
}
use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.
the class KBestHaplotypeFinderUnitTest method testTripleBubbleData.
@Test(dataProvider = "TripleBubbleDataProvider")
public void testTripleBubbleData(final int refBubbleLength, final int altBubbleLength, final boolean offRefBeginning, final boolean offRefEnding) {
// Construct the assembly graph
SeqGraph graph = new SeqGraph(11);
final String preAltOption = "ATCGATCGATCGATCGATCG";
final String postAltOption = "CCCC";
final String preRef = "ATGG";
final String postRef = "GGCCG";
final String midRef1 = "TTCCT";
final String midRef2 = "CCCAAAAAAAAAAAA";
SeqVertex preV = new SeqVertex(preAltOption);
SeqVertex v = new SeqVertex(preRef);
SeqVertex v2Ref = new SeqVertex(Strings.repeat("A", refBubbleLength));
SeqVertex v2Alt = new SeqVertex(Strings.repeat("A", altBubbleLength - 1) + "T");
SeqVertex v4Ref = new SeqVertex(Strings.repeat("C", refBubbleLength));
SeqVertex v4Alt = new SeqVertex(Strings.repeat("C", altBubbleLength - 1) + "T");
SeqVertex v6Ref = new SeqVertex(Strings.repeat("G", refBubbleLength));
SeqVertex v6Alt = new SeqVertex(Strings.repeat("G", altBubbleLength - 1) + "T");
SeqVertex v3 = new SeqVertex(midRef1);
SeqVertex v5 = new SeqVertex(midRef2);
SeqVertex v7 = new SeqVertex(postRef);
SeqVertex postV = new SeqVertex(postAltOption);
final String ref = preRef + v2Ref.getSequenceString() + midRef1 + v4Ref.getSequenceString() + midRef2 + v6Ref.getSequenceString() + postRef;
graph.addVertex(preV);
graph.addVertex(v);
graph.addVertex(v2Ref);
graph.addVertex(v2Alt);
graph.addVertex(v3);
graph.addVertex(v4Ref);
graph.addVertex(v4Alt);
graph.addVertex(v5);
graph.addVertex(v6Ref);
graph.addVertex(v6Alt);
graph.addVertex(v7);
graph.addVertex(postV);
graph.addEdge(preV, v, new BaseEdge(false, 1));
graph.addEdge(v, v2Ref, new BaseEdge(true, 10));
graph.addEdge(v2Ref, v3, new BaseEdge(true, 10));
graph.addEdge(v, v2Alt, new BaseEdge(false, 5));
graph.addEdge(v2Alt, v3, new BaseEdge(false, 5));
graph.addEdge(v3, v4Ref, new BaseEdge(true, 10));
graph.addEdge(v4Ref, v5, new BaseEdge(true, 10));
graph.addEdge(v3, v4Alt, new BaseEdge(false, 5));
graph.addEdge(v4Alt, v5, new BaseEdge(false, 5));
graph.addEdge(v5, v6Ref, new BaseEdge(true, 11));
graph.addEdge(v6Ref, v7, new BaseEdge(true, 11));
graph.addEdge(v5, v6Alt, new BaseEdge(false, 55));
graph.addEdge(v6Alt, v7, new BaseEdge(false, 55));
graph.addEdge(v7, postV, new BaseEdge(false, 1));
// Construct the test path
Path<SeqVertex, BaseEdge> path = new Path<>((offRefBeginning ? preV : v), graph);
if (offRefBeginning)
path = new Path<>(path, graph.getEdge(preV, v));
path = new Path<>(path, graph.getEdge(v, v2Alt));
path = new Path<>(path, graph.getEdge(v2Alt, v3));
path = new Path<>(path, graph.getEdge(v3, v4Ref));
path = new Path<>(path, graph.getEdge(v4Ref, v5));
path = new Path<>(path, graph.getEdge(v5, v6Alt));
path = new Path<>(path, graph.getEdge(v6Alt, v7));
if (offRefEnding)
path = new Path<>(path, graph.getEdge(v7, postV));
// Construct the actual cigar string implied by the test path
Cigar expectedCigar = new Cigar();
if (offRefBeginning) {
expectedCigar.add(new CigarElement(preAltOption.length(), CigarOperator.I));
}
expectedCigar.add(new CigarElement(preRef.length(), CigarOperator.M));
// first bubble
if (refBubbleLength > altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
} else if (refBubbleLength < altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
} else {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
}
expectedCigar.add(new CigarElement(midRef1.length(), CigarOperator.M));
// second bubble is ref path
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
expectedCigar.add(new CigarElement(midRef2.length(), CigarOperator.M));
// third bubble
if (refBubbleLength > altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
} else if (refBubbleLength < altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
} else {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
}
expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M));
if (offRefEnding) {
expectedCigar.add(new CigarElement(postAltOption.length(), CigarOperator.I));
}
Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch: ref = " + ref + " alt " + new String(path.getBases()));
}
use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.
the class ReadThreadingAssemblerUnitTest method assemble.
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
final Haplotype refHaplotype = new Haplotype(refBases, true);
final Cigar c = new Cigar();
c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
refHaplotype.setCigar(c);
final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
activeRegion.addAll(reads);
// logger.warn("Assembling " + activeRegion + " with " + engine);
final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
return assemblyResultSet.getHaplotypeList();
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class NDNCigarReadTransformer method refactorNDNtoN.
/**
* Refactor cigar strings that contain N-D-N elements to one N element (with total length of the three refactored elements).
*/
@VisibleForTesting
protected static Cigar refactorNDNtoN(final Cigar originalCigar) {
final Cigar refactoredCigar = new Cigar();
final int cigarLength = originalCigar.numCigarElements();
for (int i = 0; i < cigarLength; i++) {
final CigarElement element = originalCigar.getCigarElement(i);
if (element.getOperator() == CigarOperator.N && thereAreAtLeast2MoreElements(i, cigarLength)) {
final CigarElement nextElement = originalCigar.getCigarElement(i + 1);
final CigarElement nextNextElement = originalCigar.getCigarElement(i + 2);
// if it is N-D-N replace with N (with the total length) otherwise just add the first N.
if (nextElement.getOperator() == CigarOperator.D && nextNextElement.getOperator() == CigarOperator.N) {
final int threeElementsLength = element.getLength() + nextElement.getLength() + nextNextElement.getLength();
final CigarElement refactoredElement = new CigarElement(threeElementsLength, CigarOperator.N);
refactoredCigar.add(refactoredElement);
//skip the elements that were refactored
i += 2;
} else {
// add only the first N
refactoredCigar.add(element);
}
} else {
// add any non-N element
refactoredCigar.add(element);
}
}
return refactoredCigar;
}
Aggregations