use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class BAQ method calculateQueryRange.
/**
* Determine the appropriate start and stop offsets in the reads for the bases given the cigar string
* @param read
* @return
*/
private final Pair<Integer, Integer> calculateQueryRange(final GATKRead read) {
int queryStart = -1, queryStop = -1;
int readI = 0;
// iterate over the cigar elements to determine the start and stop of the read bases for the BAQ calculation
for (CigarElement elt : read.getCigarElements()) {
switch(elt.getOperator()) {
// cannot handle these
case N:
return null;
// ignore pads, hard clips, and deletions
case H:
// ignore pads, hard clips, and deletions
case P:
// ignore pads, hard clips, and deletions
case D:
break;
case I:
case S:
case M:
case EQ:
case X:
int prev = readI;
readI += elt.getLength();
if (elt.getOperator() != CigarOperator.S) {
if (queryStart == -1) {
queryStart = prev;
}
queryStop = readI;
}
// queryStart or queryStop
break;
default:
throw new GATKException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getName());
}
}
if (queryStop == queryStart) {
//System.err.printf("WARNING -- read is completely clipped away: " + read.format());
return null;
}
return new MutablePair<>(queryStart, queryStop);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class ClippingOp method getNewAlignmentStartOffset.
/**
* Given two cigar strings corresponding to read before and after soft-clipping, returns an integer
* corresponding to the number of reference bases that the new string must be offset by in order to have the
* correct start according to the reference.
*
* @param clippedCigar the new cigar string after clipping
* @param oldCigar the cigar string of the read before it was soft clipped
*/
@VisibleForTesting
static int getNewAlignmentStartOffset(final Cigar clippedCigar, final Cigar oldCigar) {
// The number of read bases consumed on the new cigar before reference bases are consumed
int readBasesBeforeReference = 0;
// The number of read bases consumed on the old cigar before reference bases are consumed
int basesBeforeReferenceOld = 0;
// A measure of the reference offset between the oldCigar and the clippedCigar
int curReadCounter = 0;
for (final CigarElement e : clippedCigar.getCigarElements()) {
if (!e.getOperator().consumesReferenceBases()) {
if (e.getOperator().consumesReadBases()) {
readBasesBeforeReference += e.getLength();
}
} else {
if (!e.getOperator().consumesReadBases()) {
// Accounting for any D or N cigar operators at the front of the string
basesBeforeReferenceOld -= e.getLength();
} else {
break;
}
}
}
for (final CigarElement e : oldCigar.getCigarElements()) {
int curRefLength = e.getLength();
int curReadLength = e.getLength();
if (!e.getOperator().consumesReadBases()) {
curReadLength = 0;
}
boolean truncated = false;
if (curReadCounter + curReadLength > readBasesBeforeReference) {
curReadLength = readBasesBeforeReference - curReadCounter;
curRefLength = readBasesBeforeReference - curReadCounter;
truncated = true;
}
if (!e.getOperator().consumesReferenceBases()) {
curRefLength = 0;
}
curReadCounter += curReadLength;
basesBeforeReferenceOld += curRefLength;
if (curReadCounter > readBasesBeforeReference || truncated) {
break;
}
}
// if oldNum is negative it means some of the preceding N/Ds were trimmed but not all so we take absolute value
return Math.abs(basesBeforeReferenceOld);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class ClippingOp method hardClipCigar.
private CigarShift hardClipCigar(final Cigar cigar, final int start, final int stop) {
final Cigar newCigar = new Cigar();
int index = 0;
int totalHardClipCount = stop - start + 1;
// caused by hard clipping deletions
int alignmentShift = 0;
// hard clip the beginning of the cigar string
if (start == 0) {
final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
CigarElement cigarElement = cigarElementIterator.next();
// Skip all leading hard clips
while (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
totalHardClipCount += cigarElement.getLength();
if (cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
} else {
throw new GATKException("Read is entirely hardclipped, shouldn't be trying to clip it's cigar string");
}
}
// keep clipping until we hit stop
while (index <= stop) {
int shift = 0;
if (cigarElement.getOperator().consumesReadBases()) {
shift = cigarElement.getLength();
}
// we're still clipping or just finished perfectly
if (index + shift == stop + 1) {
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
} else // element goes beyond what we need to clip
if (index + shift > stop + 1) {
final int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
index += shift;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
if (index <= stop && cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
} else {
break;
}
}
// add the remaining cigar elements
while (cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
}
} else // hard clip the end of the cigar string
{
final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
CigarElement cigarElement = cigarElementIterator.next();
// Keep marching on until we find the start
while (index < start) {
int shift = 0;
if (cigarElement.getOperator().consumesReadBases()) {
shift = cigarElement.getLength();
}
// we haven't gotten to the start yet, keep everything as is.
if (index + shift < start) {
newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
} else // element goes beyond our clip starting position
{
final int elementLengthAfterChopping = start - index;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
totalHardClipCount += elementLengthAfterChopping;
} else // otherwise, maintain what's left of this last operator
{
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
}
index += shift;
if (index < start && cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
} else {
break;
}
}
// check if we are hard clipping indels
while (cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
// if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
totalHardClipCount += cigarElement.getLength();
}
}
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}
return cleanHardClippedCigar(newCigar);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class ReadClipper method hardClipSoftClippedBases.
/**
* Will hard clip every soft clipped bases in the read.
*
* @return a new read without the soft clipped bases
*/
private GATKRead hardClipSoftClippedBases() {
if (read.isEmpty()) {
return read;
}
int readIndex = 0;
// first position to hard clip (inclusive)
int cutLeft = -1;
// first position to hard clip (inclusive)
int cutRight = -1;
// trigger to stop clipping the left tail and start cutting the right tail
boolean rightTail = false;
for (final CigarElement cigarElement : read.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
if (rightTail) {
cutRight = readIndex;
} else {
cutLeft = readIndex + cigarElement.getLength() - 1;
}
} else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
rightTail = true;
}
if (cigarElement.getOperator().consumesReadBases()) {
readIndex += cigarElement.getLength();
}
}
// It is extremely important that we cut the end first otherwise the read coordinates change.
if (cutRight >= 0) {
this.addOp(new ClippingOp(cutRight, read.getLength() - 1));
}
if (cutLeft >= 0) {
this.addOp(new ClippingOp(0, cutLeft));
}
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class PileupElementUnitTest method testCreatePileupForReadAndOffset1.
@Test
public void testCreatePileupForReadAndOffset1() {
final GATKRead read = ArtificialReadUtils.createArtificialRead("100M");
final PileupElement pe0 = PileupElement.createPileupForReadAndOffset(read, 0);
final List<CigarElement> betweenNextPosition = pe0.getBetweenNextPosition();
Assert.assertTrue(betweenNextPosition.isEmpty());
Assert.assertEquals(pe0.getOffset(), 0);
final PileupElement pe10 = PileupElement.createPileupForReadAndOffset(read, 10);
Assert.assertEquals(pe10.getOffset(), 10);
}
Aggregations