Search in sources :

Example 1 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class HostAlignmentReadFilter method test.

public boolean test(final Cigar cigar, final int numMismatches) {
    int numMatches = -numMismatches;
    int numCov = 0;
    final List<CigarElement> cigarElements = cigar.getCigarElements();
    for (final CigarElement e : cigarElements) {
        if (e.getOperator().equals(CigarOperator.MATCH_OR_MISMATCH)) {
            numMatches += e.getLength();
            numCov += e.getLength();
        } else if (e.getOperator().equals(CigarOperator.INSERTION)) {
            numCov += e.getLength();
        } else if (e.getOperator().equals(CigarOperator.DELETION)) {
            numMatches -= e.getLength();
        }
    }
    return numCov < minCoverage || numMatches < minIdentity;
}
Also used : CigarElement(htsjdk.samtools.CigarElement)

Example 2 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class BAQ method calcBAQFromHMM.

// we need to pad ref by at least the bandwidth / 2 on either side
@SuppressWarnings("fallthrough")
public BAQCalculationResult calcBAQFromHMM(GATKRead read, byte[] ref, int refOffset) {
    // todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read
    Pair<Integer, Integer> queryRange = calculateQueryRange(read);
    // read has Ns, or is completely clipped away
    if (queryRange == null)
        return null;
    int queryStart = queryRange.getLeft();
    int queryEnd = queryRange.getRight();
    BAQCalculationResult baqResult = calcBAQFromHMM(ref, read.getBases(), read.getBaseQualities(), queryStart, queryEnd);
    // cap quals
    int readI = 0, refI = 0;
    for (CigarElement elt : read.getCigarElements()) {
        int l = elt.getLength();
        switch(elt.getOperator()) {
            case // cannot handle these
            N:
                return null;
            case H:
            case // ignore pads and hard clips
            P:
                break;
            // move the reference too, in addition to I
            case S:
                refI += l;
            case I:
                // todo -- is it really the case that we want to treat I and S the same?
                for (int i = readI; i < readI + l; i++) baqResult.bq[i] = baqResult.rawQuals[i];
                readI += l;
                break;
            case D:
                refI += l;
                break;
            case M:
            case EQ:
            case //all three operators are equivalent here.
            X:
                for (int i = readI; i < readI + l; i++) {
                    int expectedPos = refI - refOffset + (i - readI);
                    baqResult.bq[i] = capBaseByBAQ(baqResult.rawQuals[i], baqResult.bq[i], baqResult.state[i], expectedPos);
                }
                readI += l;
                refI += l;
                break;
            default:
                throw new GATKException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getName());
        }
    }
    if (// odd cigar string
    readI != read.getLength())
        System.arraycopy(baqResult.rawQuals, 0, baqResult.bq, 0, baqResult.bq.length);
    return baqResult;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 3 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class ClippingOp method softClip.

/**
     * Given a cigar string, soft clip up to startClipEnd and soft clip starting at endClipBegin
     */
private Cigar softClip(final Cigar __cigar, final int __startClipEnd, final int __endClipBegin, final boolean runAsserts) {
    if (__endClipBegin <= __startClipEnd) {
        //whole thing should be soft clipped
        int cigarLength = 0;
        for (final CigarElement e : __cigar.getCigarElements()) {
            cigarLength += e.getLength();
        }
        final Cigar newCigar = new Cigar();
        newCigar.add(new CigarElement(cigarLength, CigarOperator.SOFT_CLIP));
        if (runAsserts) {
            assert newCigar.isValid(null, -1) == null;
        }
        return newCigar;
    }
    int curLength = 0;
    final Vector<CigarElement> newElements = new Vector<>();
    for (final CigarElement curElem : __cigar.getCigarElements()) {
        if (!curElem.getOperator().consumesReadBases()) {
            if (curElem.getOperator() == CigarOperator.HARD_CLIP || curLength > __startClipEnd && curLength < __endClipBegin) {
                newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
            }
            continue;
        }
        final int s = curLength;
        final int e = curLength + curElem.getLength();
        if (e <= __startClipEnd || s >= __endClipBegin) {
            //must turn this entire thing into a clip
            newElements.add(new CigarElement(curElem.getLength(), CigarOperator.SOFT_CLIP));
        } else if (s >= __startClipEnd && e <= __endClipBegin) {
            //same thing
            newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
        } else {
            //we are clipping in the middle of this guy
            CigarElement newStart = null;
            CigarElement newMid = null;
            CigarElement newEnd = null;
            int midLength = curElem.getLength();
            if (s < __startClipEnd) {
                newStart = new CigarElement(__startClipEnd - s, CigarOperator.SOFT_CLIP);
                midLength -= newStart.getLength();
            }
            if (e > __endClipBegin) {
                newEnd = new CigarElement(e - __endClipBegin, CigarOperator.SOFT_CLIP);
                midLength -= newEnd.getLength();
            }
            if (runAsserts) {
                assert midLength >= 0;
            }
            if (midLength > 0) {
                newMid = new CigarElement(midLength, curElem.getOperator());
            }
            if (newStart != null) {
                newElements.add(newStart);
            }
            if (newMid != null) {
                newElements.add(newMid);
            }
            if (newEnd != null) {
                newElements.add(newEnd);
            }
        }
        curLength += curElem.getLength();
    }
    final Vector<CigarElement> finalNewElements = new Vector<>();
    CigarElement lastElement = null;
    for (final CigarElement elem : newElements) {
        if (lastElement == null || lastElement.getOperator() != elem.getOperator()) {
            if (lastElement != null) {
                finalNewElements.add(lastElement);
            }
            lastElement = elem;
        } else {
            lastElement = new CigarElement(lastElement.getLength() + elem.getLength(), lastElement.getOperator());
        }
    }
    if (lastElement != null) {
        finalNewElements.add(lastElement);
    }
    final Cigar newCigar = new Cigar(finalNewElements);
    if (runAsserts) {
        assert newCigar.isValid(null, -1) == null;
    }
    return newCigar;
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement)

Example 4 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class ClippingOp method applyREVERT_SOFTCLIPPED_BASES.

private GATKRead applyREVERT_SOFTCLIPPED_BASES(final GATKRead read) {
    GATKRead unclipped = read.copy();
    final Cigar unclippedCigar = new Cigar();
    int matchesCount = 0;
    for (final CigarElement element : read.getCigarElements()) {
        if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) {
            matchesCount += element.getLength();
        } else if (matchesCount > 0) {
            unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
            matchesCount = 0;
            unclippedCigar.add(element);
        } else {
            unclippedCigar.add(element);
        }
    }
    if (matchesCount > 0) {
        unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
    }
    unclipped.setCigar(unclippedCigar);
    final int newStart = read.getStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar);
    if (newStart <= 0) {
        // if the start of the unclipped read occurs before the contig,
        // we must hard clip away the bases since we cannot represent reads with
        // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned)
        // We cannot set the read to temporarily have a negative start position, as our Read
        // interface will not allow it. Instead, since we know that the final start position will
        // be 1 after the hard clip operation, set it to 1 explicitly. We have to set it twice:
        // once before the hard clip (to reset the alignment stop / read length in read implementations
        // that cache these values, such as SAMRecord), and again after the hard clip.
        unclipped.setPosition(unclipped.getContig(), 1);
        unclipped = applyHARDCLIP_BASES(unclipped, 0, -newStart);
        unclipped.setPosition(unclipped.getContig(), 1);
        return unclipped;
    } else {
        unclipped.setPosition(unclipped.getContig(), newStart);
        return unclipped;
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement)

Example 5 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class ClippingOp method cleanHardClippedCigar.

/**
     * Checks if a hard clipped cigar left a read starting or ending with deletions or gap (N)
     * and cleans it up accordingly.
     *
     * @param cigar the original cigar
     * @return an object with the shifts (see CigarShift class)
     */
private CigarShift cleanHardClippedCigar(final Cigar cigar) {
    final Cigar cleanCigar = new Cigar();
    int shiftFromStart = 0;
    int shiftFromEnd = 0;
    Stack<CigarElement> cigarStack = new Stack<>();
    final Stack<CigarElement> inverseCigarStack = new Stack<>();
    for (final CigarElement cigarElement : cigar.getCigarElements()) {
        cigarStack.push(cigarElement);
    }
    for (int i = 1; i <= 2; i++) {
        final int shift = 0;
        int totalHardClip = 0;
        boolean readHasStarted = false;
        boolean addedHardClips = false;
        while (!cigarStack.empty()) {
            final CigarElement cigarElement = cigarStack.pop();
            if (!readHasStarted && cigarElement.getOperator() != CigarOperator.DELETION && cigarElement.getOperator() != CigarOperator.SKIPPED_REGION && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
                readHasStarted = true;
            } else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
                totalHardClip += cigarElement.getLength();
            } else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION) {
                totalHardClip += cigarElement.getLength();
            } else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.SKIPPED_REGION) {
                totalHardClip += cigarElement.getLength();
            }
            if (readHasStarted) {
                if (i == 1) {
                    if (!addedHardClips) {
                        if (totalHardClip > 0) {
                            inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
                        }
                        addedHardClips = true;
                    }
                    inverseCigarStack.push(cigarElement);
                } else {
                    if (!addedHardClips) {
                        if (totalHardClip > 0) {
                            cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
                        }
                        addedHardClips = true;
                    }
                    cleanCigar.add(cigarElement);
                }
            }
        }
        // first pass  (i=1) is from end to start of the cigar elements
        if (i == 1) {
            shiftFromEnd = shift;
            cigarStack = inverseCigarStack;
        } else // second pass (i=2) is from start to end with the end already cleaned
        {
            shiftFromStart = shift;
        }
    }
    return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
}
Also used : Cigar(htsjdk.samtools.Cigar) CigarElement(htsjdk.samtools.CigarElement)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13