use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class PileupElementUnitTest method testPileupElementTest.
@Test(dataProvider = "PileupElementTest")
public void testPileupElementTest(final LIBSTest params) {
final GATKRead read = params.makeRead();
final AlignmentStateMachine state = new AlignmentStateMachine(read);
final LIBS_position tester = new LIBS_position(read);
while (state.stepForwardOnGenome() != null) {
tester.stepForwardOnGenome();
final PileupElement pe = state.makePileupElement();
Assert.assertEquals(pe.getRead(), read);
Assert.assertEquals(pe.getMappingQual(), read.getMappingQuality());
Assert.assertEquals(pe.getOffset(), state.getReadOffset());
Assert.assertEquals(pe.isDeletion(), state.getCigarOperator() == D);
Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
if (!hasNeighboringPaddedOps(params.getElements(), pe.getCurrentCigarOffset())) {
Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
}
Assert.assertEquals(pe.getOffsetInCurrentCigar(), tester.getCurrentPositionOnOperatorBase0(), "CigarElement index failure");
Assert.assertTrue(pe.getOffsetInCurrentCigar() >= 0, "Offset into current cigar too small");
Assert.assertTrue(pe.getOffsetInCurrentCigar() < pe.getCurrentCigarElement().getLength(), "Offset into current cigar too big");
Assert.assertNotNull(pe.toString());
Assert.assertEquals(pe.atEndOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == state.getCurrentCigarElement().getLength() - 1, "atEndOfCurrentCigar failed");
Assert.assertEquals(pe.atStartOfCurrentCigar(), state.getOffsetIntoCurrentCigarElement() == 0, "atStartOfCurrentCigar failed");
Assert.assertEquals(pe.getBase(), pe.isDeletion() ? PileupElement.DELETION_BASE : read.getBases()[state.getReadOffset()]);
Assert.assertEquals(pe.getQual(), pe.isDeletion() ? PileupElement.DELETION_QUAL : read.getBaseQualities()[state.getReadOffset()]);
Assert.assertEquals(pe.getCurrentCigarElement(), state.getCurrentCigarElement());
Assert.assertEquals(pe.getCurrentCigarOffset(), state.getCurrentCigarElementOffset());
final int lengthOfImmediatelyFollowingIndel = pe.getLengthOfImmediatelyFollowingIndel();
final String basesOfImmediatelyFollowingInsertion = pe.getBasesOfImmediatelyFollowingInsertion();
Assert.assertTrue(lengthOfImmediatelyFollowingIndel != 0 || basesOfImmediatelyFollowingInsertion == null);
Assert.assertTrue(basesOfImmediatelyFollowingInsertion == null || basesOfImmediatelyFollowingInsertion.length() == lengthOfImmediatelyFollowingIndel);
// Don't test -- pe.getBaseIndex();
if (pe.atEndOfCurrentCigar() && state.getCurrentCigarElementOffset() < read.numCigarElements() - 1) {
final CigarElement nextElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() + 1);
if (nextElement.getOperator() == CigarOperator.I) {
Assert.assertTrue(pe.getBetweenNextPosition().size() >= 1);
Assert.assertEquals(pe.getBetweenNextPosition().get(0), nextElement);
}
if (nextElement.getOperator() == M) {
Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
}
} else {
Assert.assertTrue(pe.getBetweenNextPosition().isEmpty());
}
if (pe.atStartOfCurrentCigar() && state.getCurrentCigarElementOffset() > 0) {
final CigarElement prevElement = read.getCigar().getCigarElement(state.getCurrentCigarElementOffset() - 1);
if (prevElement.getOperator() == CigarOperator.I) {
Assert.assertTrue(pe.getBetweenPrevPosition().size() >= 1);
Assert.assertEquals(pe.getBetweenPrevPosition().get(pe.getBetweenPrevPosition().size() - 1), prevElement);
}
if (prevElement.getOperator() == M) {
Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
}
} else {
Assert.assertTrue(pe.getBetweenPrevPosition().isEmpty());
}
// TODO -- add meaningful tests
pe.getBaseInsertionQual();
pe.getBaseDeletionQual();
}
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class SWPairwiseAlignmentUnitTest method testForIdenticalAlignmentsWithDifferingFlankLengths.
@Test(enabled = true)
public void testForIdenticalAlignmentsWithDifferingFlankLengths() {
//This test is designed to ensure that the indels are correctly placed
//if the region flanking these indels is extended by a varying amount.
//It checks for problems caused by floating point rounding leading to different
//paths being selected.
//Create two versions of the same sequence with different flanking regions.
byte[] paddedRef = "GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".getBytes();
byte[] paddedHap = "GCGTCGCAGTCTTAAGGCCCCGCCTTTTCAGACAGCTTCCGCTGGGCCTGGGCCGCTGCGGGGCGGTCACGGCCCCTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA--GGGCC---------------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGACCGGGCCGAGCCGGGGGAAGGGCTCCGGTGACT".replace("-", "").getBytes();
byte[] notPaddedRef = "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGAGGGGGCCCGGGGCCGCGTCCCTGGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".getBytes();
byte[] notPaddedHap = "CTTTAAGCCTGAGCCCCGCCCCCTGGCTCCCCGCCCCCTCTTCTCCCCTCCCCCAAGCCAGCACCTGGTGCCCCGGCGGGTCGTGCGGCGCGGCGCTCCGCGGTGAGCGCCTGACCCCGA---------GGGCC--------GGGCCCTCCCCACCCTTGCGGTGGCCTCGCGGGTCCCAGGGGCGGGGCTGGAGCGGCAGCAGGGCCGGGGAGATGGGCGGTGGGGAGCGCGGGAGGGA".replace("-", "").getBytes();
//a simplified version of the getCigar routine in the haplotype caller to align these
final String SW_PAD = "NNNNNNNNNN";
final String paddedsRef = SW_PAD + new String(paddedRef) + SW_PAD;
final String paddedsHap = SW_PAD + new String(paddedHap) + SW_PAD;
final String notPaddedsRef = SW_PAD + new String(notPaddedRef) + SW_PAD;
final String notpaddedsHap = SW_PAD + new String(notPaddedHap) + SW_PAD;
final SWPairwiseAlignment paddedAlignment = new SWPairwiseAlignment(paddedsRef.getBytes(), paddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS);
final SWPairwiseAlignment notPaddedAlignment = new SWPairwiseAlignment(notPaddedsRef.getBytes(), notpaddedsHap.getBytes(), CigarUtils.NEW_SW_PARAMETERS);
//Now verify that the two sequences have the same alignment and not match positions.
Cigar rawPadded = paddedAlignment.getCigar();
Cigar notPadded = notPaddedAlignment.getCigar();
List<CigarElement> paddedC = rawPadded.getCigarElements();
List<CigarElement> notPaddedC = notPadded.getCigarElements();
Assert.assertEquals(paddedC.size(), notPaddedC.size());
for (int i = 0; i < notPaddedC.size(); i++) {
CigarElement pc = paddedC.get(i);
CigarElement npc = notPaddedC.get(i);
if (pc.getOperator() == CigarOperator.M && npc.getOperator() == CigarOperator.M) {
continue;
}
int l1 = pc.getLength();
int l2 = npc.getLength();
Assert.assertEquals(l1, l2);
Assert.assertEquals(pc.getOperator(), npc.getOperator());
}
}
use of htsjdk.samtools.CigarElement in project gatk-protected by broadinstitute.
the class KBestHaplotypeFinderUnitTest method testBasicBubbleData.
@Test(dataProvider = "BasicBubbleDataProvider")
public void testBasicBubbleData(final int refBubbleLength, final int altBubbleLength) {
// Construct the assembly graph
SeqGraph graph = new SeqGraph(3);
final String preRef = "ATGG";
final String postRef = "GGGGC";
SeqVertex v = new SeqVertex(preRef);
SeqVertex v2Ref = new SeqVertex(Strings.repeat("A", refBubbleLength));
SeqVertex v2Alt = new SeqVertex(Strings.repeat("A", altBubbleLength - 1) + "T");
SeqVertex v3 = new SeqVertex(postRef);
graph.addVertex(v);
graph.addVertex(v2Ref);
graph.addVertex(v2Alt);
graph.addVertex(v3);
graph.addEdge(v, v2Ref, new BaseEdge(true, 10));
graph.addEdge(v2Ref, v3, new BaseEdge(true, 10));
graph.addEdge(v, v2Alt, new BaseEdge(false, 5));
graph.addEdge(v2Alt, v3, new BaseEdge(false, 5));
// Construct the test path
Path<SeqVertex, BaseEdge> path = new Path<>(v, graph);
path = new Path<>(path, graph.getEdge(v, v2Alt));
path = new Path<>(path, graph.getEdge(v2Alt, v3));
// Construct the actual cigar string implied by the test path
Cigar expectedCigar = new Cigar();
expectedCigar.add(new CigarElement(preRef.length(), CigarOperator.M));
if (refBubbleLength > altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
} else if (refBubbleLength < altBubbleLength) {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
} else {
expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
}
expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M));
final String ref = preRef + v2Ref.getSequenceString() + postRef;
Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch");
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class BaseRecalibrationEngine method calculateIsSNPOrIndel.
/**
* Locates all SNP and indel events, storing them in the provided snp, isIns, and isDel arrays, and returns
* the total number of SNP/indel events.
*
* @param read read to inspect
* @param ref source of reference bses
* @param snp storage for snp events (must be of length read.getBases().length and initialized to all 0's)
* @param isIns storage for insertion events (must be of length read.getBases().length and initialized to all 0's)
* @param isDel storage for deletion events (must be of length read.getBases().length and initialized to all 0's)
* @return the total number of SNP and indel events
*/
protected static int calculateIsSNPOrIndel(final GATKRead read, final ReferenceDataSource ref, int[] snp, int[] isIns, int[] isDel) {
final byte[] refBases = ref.queryAndPrefetch(read.getContig(), read.getStart(), read.getEnd()).getBases();
int readPos = 0;
int refPos = 0;
int nEvents = 0;
for (final CigarElement ce : read.getCigarElements()) {
final int elementLength = ce.getLength();
switch(ce.getOperator()) {
case M:
case EQ:
case X:
for (int i = 0; i < elementLength; i++) {
int snpInt = (BaseUtils.basesAreEqual(read.getBase(readPos), refBases[refPos]) ? 0 : 1);
snp[readPos] = snpInt;
nEvents += snpInt;
readPos++;
refPos++;
}
break;
case D:
{
final int index = (read.isReverseStrand() ? readPos : readPos - 1);
updateIndel(isDel, index);
refPos += elementLength;
break;
}
case N:
refPos += elementLength;
break;
case I:
{
final boolean forwardStrandRead = !read.isReverseStrand();
if (forwardStrandRead) {
updateIndel(isIns, readPos - 1);
}
readPos += elementLength;
if (!forwardStrandRead) {
updateIndel(isIns, readPos);
}
break;
}
case // ReferenceContext doesn't have the soft clipped bases!
S:
readPos += elementLength;
break;
case H:
case P:
break;
default:
throw new GATKException("Unsupported cigar operator: " + ce.getOperator());
}
}
// we don't sum those as we go because they might set the same place to 1 twice
nEvents += MathUtils.sum(isDel) + MathUtils.sum(isIns);
return nEvents;
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class SWPairwiseAlignment method align.
/**
* Aligns the alternate sequence to the reference sequence
*
* @param reference ref sequence
* @param alternate alt sequence
*/
private void align(final byte[] reference, final byte[] alternate) {
if (reference == null || reference.length == 0 || alternate == null || alternate.length == 0)
throw new IllegalArgumentException("Non-null, non-empty sequences are required for the Smith-Waterman calculation");
// avoid running full Smith-Waterman if there is an exact match of alternate in reference
int matchIndex = -1;
if (overhangStrategy == OverhangStrategy.SOFTCLIP || overhangStrategy == OverhangStrategy.IGNORE) {
// Use a substring search to find an exact match of the alternate in the reference
// NOTE: This approach only works for SOFTCLIP and IGNORE overhang strategies
matchIndex = Utils.lastIndexOf(reference, alternate);
}
if (matchIndex != -1) {
// generate the alignment result when the substring search was successful
final List<CigarElement> lce = new ArrayList<>(alternate.length);
lce.add(makeElement(State.MATCH, alternate.length));
alignmentResult = new SWPairwiseAlignmentResult(AlignmentUtils.consolidateCigar(new Cigar(lce)), matchIndex);
} else {
// run full Smith-Waterman
final int n = reference.length + 1;
final int m = alternate.length + 1;
final int[][] sw = new int[n][m];
if (keepScoringMatrix) {
SW = sw;
}
final int[][] btrack = new int[n][m];
calculateMatrix(reference, alternate, sw, btrack);
// length of the segment (continuous matches, insertions or deletions)
alignmentResult = calculateCigar(sw, btrack, overhangStrategy);
}
}
Aggregations