use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class HostAlignmentReadFilter method test.
public boolean test(final Cigar cigar, final int numMismatches) {
int numMatches = -numMismatches;
int numCov = 0;
final List<CigarElement> cigarElements = cigar.getCigarElements();
for (final CigarElement e : cigarElements) {
if (e.getOperator().equals(CigarOperator.MATCH_OR_MISMATCH)) {
numMatches += e.getLength();
numCov += e.getLength();
} else if (e.getOperator().equals(CigarOperator.INSERTION)) {
numCov += e.getLength();
} else if (e.getOperator().equals(CigarOperator.DELETION)) {
numMatches -= e.getLength();
}
}
return numCov < minCoverage || numMatches < minIdentity;
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class BAQ method calcBAQFromHMM.
// we need to pad ref by at least the bandwidth / 2 on either side
@SuppressWarnings("fallthrough")
public BAQCalculationResult calcBAQFromHMM(GATKRead read, byte[] ref, int refOffset) {
// todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
Pair<Integer, Integer> queryRange = calculateQueryRange(read);
// read has Ns, or is completely clipped away
if (queryRange == null)
return null;
int queryStart = queryRange.getLeft();
int queryEnd = queryRange.getRight();
BAQCalculationResult baqResult = calcBAQFromHMM(ref, read.getBases(), read.getBaseQualities(), queryStart, queryEnd);
// cap quals
int readI = 0, refI = 0;
for (CigarElement elt : read.getCigarElements()) {
int l = elt.getLength();
switch(elt.getOperator()) {
case // cannot handle these
N:
return null;
case H:
case // ignore pads and hard clips
P:
break;
// move the reference too, in addition to I
case S:
refI += l;
case I:
// todo -- is it really the case that we want to treat I and S the same?
for (int i = readI; i < readI + l; i++) baqResult.bq[i] = baqResult.rawQuals[i];
readI += l;
break;
case D:
refI += l;
break;
case M:
case EQ:
case //all three operators are equivalent here.
X:
for (int i = readI; i < readI + l; i++) {
int expectedPos = refI - refOffset + (i - readI);
baqResult.bq[i] = capBaseByBAQ(baqResult.rawQuals[i], baqResult.bq[i], baqResult.state[i], expectedPos);
}
readI += l;
refI += l;
break;
default:
throw new GATKException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getName());
}
}
if (// odd cigar string
readI != read.getLength())
System.arraycopy(baqResult.rawQuals, 0, baqResult.bq, 0, baqResult.bq.length);
return baqResult;
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class ClippingOp method softClip.
/**
* Given a cigar string, soft clip up to startClipEnd and soft clip starting at endClipBegin
*/
private Cigar softClip(final Cigar __cigar, final int __startClipEnd, final int __endClipBegin, final boolean runAsserts) {
if (__endClipBegin <= __startClipEnd) {
//whole thing should be soft clipped
int cigarLength = 0;
for (final CigarElement e : __cigar.getCigarElements()) {
cigarLength += e.getLength();
}
final Cigar newCigar = new Cigar();
newCigar.add(new CigarElement(cigarLength, CigarOperator.SOFT_CLIP));
if (runAsserts) {
assert newCigar.isValid(null, -1) == null;
}
return newCigar;
}
int curLength = 0;
final Vector<CigarElement> newElements = new Vector<>();
for (final CigarElement curElem : __cigar.getCigarElements()) {
if (!curElem.getOperator().consumesReadBases()) {
if (curElem.getOperator() == CigarOperator.HARD_CLIP || curLength > __startClipEnd && curLength < __endClipBegin) {
newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
}
continue;
}
final int s = curLength;
final int e = curLength + curElem.getLength();
if (e <= __startClipEnd || s >= __endClipBegin) {
//must turn this entire thing into a clip
newElements.add(new CigarElement(curElem.getLength(), CigarOperator.SOFT_CLIP));
} else if (s >= __startClipEnd && e <= __endClipBegin) {
//same thing
newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
} else {
//we are clipping in the middle of this guy
CigarElement newStart = null;
CigarElement newMid = null;
CigarElement newEnd = null;
int midLength = curElem.getLength();
if (s < __startClipEnd) {
newStart = new CigarElement(__startClipEnd - s, CigarOperator.SOFT_CLIP);
midLength -= newStart.getLength();
}
if (e > __endClipBegin) {
newEnd = new CigarElement(e - __endClipBegin, CigarOperator.SOFT_CLIP);
midLength -= newEnd.getLength();
}
if (runAsserts) {
assert midLength >= 0;
}
if (midLength > 0) {
newMid = new CigarElement(midLength, curElem.getOperator());
}
if (newStart != null) {
newElements.add(newStart);
}
if (newMid != null) {
newElements.add(newMid);
}
if (newEnd != null) {
newElements.add(newEnd);
}
}
curLength += curElem.getLength();
}
final Vector<CigarElement> finalNewElements = new Vector<>();
CigarElement lastElement = null;
for (final CigarElement elem : newElements) {
if (lastElement == null || lastElement.getOperator() != elem.getOperator()) {
if (lastElement != null) {
finalNewElements.add(lastElement);
}
lastElement = elem;
} else {
lastElement = new CigarElement(lastElement.getLength() + elem.getLength(), lastElement.getOperator());
}
}
if (lastElement != null) {
finalNewElements.add(lastElement);
}
final Cigar newCigar = new Cigar(finalNewElements);
if (runAsserts) {
assert newCigar.isValid(null, -1) == null;
}
return newCigar;
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class ClippingOp method applyREVERT_SOFTCLIPPED_BASES.
private GATKRead applyREVERT_SOFTCLIPPED_BASES(final GATKRead read) {
GATKRead unclipped = read.copy();
final Cigar unclippedCigar = new Cigar();
int matchesCount = 0;
for (final CigarElement element : read.getCigarElements()) {
if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) {
matchesCount += element.getLength();
} else if (matchesCount > 0) {
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
matchesCount = 0;
unclippedCigar.add(element);
} else {
unclippedCigar.add(element);
}
}
if (matchesCount > 0) {
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
}
unclipped.setCigar(unclippedCigar);
final int newStart = read.getStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar);
if (newStart <= 0) {
// if the start of the unclipped read occurs before the contig,
// we must hard clip away the bases since we cannot represent reads with
// negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned)
// We cannot set the read to temporarily have a negative start position, as our Read
// interface will not allow it. Instead, since we know that the final start position will
// be 1 after the hard clip operation, set it to 1 explicitly. We have to set it twice:
// once before the hard clip (to reset the alignment stop / read length in read implementations
// that cache these values, such as SAMRecord), and again after the hard clip.
unclipped.setPosition(unclipped.getContig(), 1);
unclipped = applyHARDCLIP_BASES(unclipped, 0, -newStart);
unclipped.setPosition(unclipped.getContig(), 1);
return unclipped;
} else {
unclipped.setPosition(unclipped.getContig(), newStart);
return unclipped;
}
}
use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.
the class ClippingOp method cleanHardClippedCigar.
/**
* Checks if a hard clipped cigar left a read starting or ending with deletions or gap (N)
* and cleans it up accordingly.
*
* @param cigar the original cigar
* @return an object with the shifts (see CigarShift class)
*/
private CigarShift cleanHardClippedCigar(final Cigar cigar) {
final Cigar cleanCigar = new Cigar();
int shiftFromStart = 0;
int shiftFromEnd = 0;
Stack<CigarElement> cigarStack = new Stack<>();
final Stack<CigarElement> inverseCigarStack = new Stack<>();
for (final CigarElement cigarElement : cigar.getCigarElements()) {
cigarStack.push(cigarElement);
}
for (int i = 1; i <= 2; i++) {
final int shift = 0;
int totalHardClip = 0;
boolean readHasStarted = false;
boolean addedHardClips = false;
while (!cigarStack.empty()) {
final CigarElement cigarElement = cigarStack.pop();
if (!readHasStarted && cigarElement.getOperator() != CigarOperator.DELETION && cigarElement.getOperator() != CigarOperator.SKIPPED_REGION && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
readHasStarted = true;
} else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
totalHardClip += cigarElement.getLength();
} else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION) {
totalHardClip += cigarElement.getLength();
} else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.SKIPPED_REGION) {
totalHardClip += cigarElement.getLength();
}
if (readHasStarted) {
if (i == 1) {
if (!addedHardClips) {
if (totalHardClip > 0) {
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
}
addedHardClips = true;
}
inverseCigarStack.push(cigarElement);
} else {
if (!addedHardClips) {
if (totalHardClip > 0) {
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
}
addedHardClips = true;
}
cleanCigar.add(cigarElement);
}
}
}
// first pass (i=1) is from end to start of the cigar elements
if (i == 1) {
shiftFromEnd = shift;
cigarStack = inverseCigarStack;
} else // second pass (i=2) is from start to end with the end already cleaned
{
shiftFromStart = shift;
}
}
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
}
Aggregations