Search in sources :

Example 86 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class BAQ method calculateQueryRange.

/**
     * Determine the appropriate start and stop offsets in the reads for the bases given the cigar string
     * @param read
     * @return
     */
private final Pair<Integer, Integer> calculateQueryRange(final GATKRead read) {
    int queryStart = -1, queryStop = -1;
    int readI = 0;
    // iterate over the cigar elements to determine the start and stop of the read bases for the BAQ calculation
    for (CigarElement elt : read.getCigarElements()) {
        switch(elt.getOperator()) {
            // cannot handle these
            case N:
                return null;
            // ignore pads, hard clips, and deletions
            case H:
            // ignore pads, hard clips, and deletions
            case P:
            // ignore pads, hard clips, and deletions
            case D:
                break;
            case I:
            case S:
            case M:
            case EQ:
            case X:
                int prev = readI;
                readI += elt.getLength();
                if (elt.getOperator() != CigarOperator.S) {
                    if (queryStart == -1) {
                        queryStart = prev;
                    }
                    queryStop = readI;
                }
                // queryStart or queryStop
                break;
            default:
                throw new GATKException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getName());
        }
    }
    if (queryStop == queryStart) {
        //System.err.printf("WARNING -- read is completely clipped away: " + read.format());
        return null;
    }
    return new MutablePair<>(queryStart, queryStop);
}
Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 87 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class ClippingOp method getNewAlignmentStartOffset.

/**
     * Given two cigar strings corresponding to read before and after soft-clipping, returns an integer
     * corresponding to the number of reference bases that the new string must be offset by in order to have the
     * correct start according to the reference.
     *
     * @param clippedCigar the new cigar string after clipping
     * @param oldCigar the cigar string of the read before it was soft clipped
     */
@VisibleForTesting
static int getNewAlignmentStartOffset(final Cigar clippedCigar, final Cigar oldCigar) {
    // The number of read bases consumed on the new cigar before reference bases are consumed
    int readBasesBeforeReference = 0;
    // The number of read bases consumed on the old cigar before reference bases are consumed
    int basesBeforeReferenceOld = 0;
    // A measure of the reference offset between the oldCigar and the clippedCigar
    int curReadCounter = 0;
    for (final CigarElement e : clippedCigar.getCigarElements()) {
        if (!e.getOperator().consumesReferenceBases()) {
            if (e.getOperator().consumesReadBases()) {
                readBasesBeforeReference += e.getLength();
            }
        } else {
            if (!e.getOperator().consumesReadBases()) {
                // Accounting for any D or N cigar operators at the front of the string
                basesBeforeReferenceOld -= e.getLength();
            } else {
                break;
            }
        }
    }
    for (final CigarElement e : oldCigar.getCigarElements()) {
        int curRefLength = e.getLength();
        int curReadLength = e.getLength();
        if (!e.getOperator().consumesReadBases()) {
            curReadLength = 0;
        }
        boolean truncated = false;
        if (curReadCounter + curReadLength > readBasesBeforeReference) {
            curReadLength = readBasesBeforeReference - curReadCounter;
            curRefLength = readBasesBeforeReference - curReadCounter;
            truncated = true;
        }
        if (!e.getOperator().consumesReferenceBases()) {
            curRefLength = 0;
        }
        curReadCounter += curReadLength;
        basesBeforeReferenceOld += curRefLength;
        if (curReadCounter > readBasesBeforeReference || truncated) {
            break;
        }
    }
    // if oldNum is negative it means some of the preceding N/Ds were trimmed but not all so we take absolute value
    return Math.abs(basesBeforeReferenceOld);
}
Also used : CigarElement(htsjdk.samtools.CigarElement) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 88 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class ClippingOp method hardClipCigar.

private CigarShift hardClipCigar(final Cigar cigar, final int start, final int stop) {
    final Cigar newCigar = new Cigar();
    int index = 0;
    int totalHardClipCount = stop - start + 1;
    // caused by hard clipping deletions
    int alignmentShift = 0;
    // hard clip the beginning of the cigar string
    if (start == 0) {
        final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
        CigarElement cigarElement = cigarElementIterator.next();
        // Skip all leading hard clips
        while (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
            totalHardClipCount += cigarElement.getLength();
            if (cigarElementIterator.hasNext()) {
                cigarElement = cigarElementIterator.next();
            } else {
                throw new GATKException("Read is entirely hardclipped, shouldn't be trying to clip it's cigar string");
            }
        }
        // keep clipping until we hit stop
        while (index <= stop) {
            int shift = 0;
            if (cigarElement.getOperator().consumesReadBases()) {
                shift = cigarElement.getLength();
            }
            // we're still clipping or just finished perfectly
            if (index + shift == stop + 1) {
                alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
                newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
            } else // element goes beyond what we need to clip
            if (index + shift > stop + 1) {
                final int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
                alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
                newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
                newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
            }
            index += shift;
            alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
            if (index <= stop && cigarElementIterator.hasNext()) {
                cigarElement = cigarElementIterator.next();
            } else {
                break;
            }
        }
        // add the remaining cigar elements
        while (cigarElementIterator.hasNext()) {
            cigarElement = cigarElementIterator.next();
            newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
        }
    } else // hard clip the end of the cigar string
    {
        final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
        CigarElement cigarElement = cigarElementIterator.next();
        // Keep marching on until we find the start
        while (index < start) {
            int shift = 0;
            if (cigarElement.getOperator().consumesReadBases()) {
                shift = cigarElement.getLength();
            }
            // we haven't gotten to the start yet, keep everything as is.
            if (index + shift < start) {
                newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
            } else // element goes beyond our clip starting position
            {
                final int elementLengthAfterChopping = start - index;
                alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
                // if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
                if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
                    totalHardClipCount += elementLengthAfterChopping;
                } else // otherwise, maintain what's left of this last operator
                {
                    newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
                }
            }
            index += shift;
            if (index < start && cigarElementIterator.hasNext()) {
                cigarElement = cigarElementIterator.next();
            } else {
                break;
            }
        }
        // check if we are hard clipping indels
        while (cigarElementIterator.hasNext()) {
            cigarElement = cigarElementIterator.next();
            alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
            // if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
            if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
                totalHardClipCount += cigarElement.getLength();
            }
        }
        newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
    }
    return cleanHardClippedCigar(newCigar);
}
Also used : Cigar(htsjdk.samtools.Cigar) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 89 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class ReadClipper method hardClipSoftClippedBases.

/**
     * Will hard clip every soft clipped bases in the read.
     *
     * @return a new read without the soft clipped bases
     */
private GATKRead hardClipSoftClippedBases() {
    if (read.isEmpty()) {
        return read;
    }
    int readIndex = 0;
    // first position to hard clip (inclusive)
    int cutLeft = -1;
    // first position to hard clip (inclusive)
    int cutRight = -1;
    // trigger to stop clipping the left tail and start cutting the right tail
    boolean rightTail = false;
    for (final CigarElement cigarElement : read.getCigarElements()) {
        if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
            if (rightTail) {
                cutRight = readIndex;
            } else {
                cutLeft = readIndex + cigarElement.getLength() - 1;
            }
        } else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
            rightTail = true;
        }
        if (cigarElement.getOperator().consumesReadBases()) {
            readIndex += cigarElement.getLength();
        }
    }
    // It is extremely important that we cut the end first otherwise the read coordinates change.
    if (cutRight >= 0) {
        this.addOp(new ClippingOp(cutRight, read.getLength() - 1));
    }
    if (cutLeft >= 0) {
        this.addOp(new ClippingOp(0, cutLeft));
    }
    return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
Also used : CigarElement(htsjdk.samtools.CigarElement)

Example 90 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class PileupElementUnitTest method testCreatePileupForReadAndOffset1.

@Test
public void testCreatePileupForReadAndOffset1() {
    final GATKRead read = ArtificialReadUtils.createArtificialRead("100M");
    final PileupElement pe0 = PileupElement.createPileupForReadAndOffset(read, 0);
    final List<CigarElement> betweenNextPosition = pe0.getBetweenNextPosition();
    Assert.assertTrue(betweenNextPosition.isEmpty());
    Assert.assertEquals(pe0.getOffset(), 0);
    final PileupElement pe10 = PileupElement.createPileupForReadAndOffset(read, 10);
    Assert.assertEquals(pe10.getOffset(), 10);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) CigarElement(htsjdk.samtools.CigarElement) LocusIteratorByStateBaseTest(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByStateBaseTest) Test(org.testng.annotations.Test) LIBSTest(org.broadinstitute.hellbender.utils.locusiterator.LIBSTest)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13