Search in sources :

Example 21 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class SVVariantDiscoveryUtils method getNumSoftClippingBases.

/**
     * @return the number of soft clipped bases as indicated in the input {@code cigarElements}, either from the beginning
     * of the list ({@code fromStart==true}) or from the end of the list ({@code fromStart==false}).
     *
     * @throws IllegalArgumentException if fails check by {@link #validateCigar(List)}
     */
@VisibleForTesting
public static int getNumSoftClippingBases(final boolean fromStart, final List<CigarElement> cigarElements) {
    validateCigar(cigarElements);
    // because 'H' can only be the 1st/last operation according to the spec, and also
    // "S may only have H operations between them and the ends of the CIGAR string",
    // 'S' could only be the 1st operation or the 2nd operation next to the 'H' sitting at the end
    // no two 'S' operations should sit next to each other
    final int endIndex = fromStart ? 0 : cigarElements.size() - 1;
    CigarElement element = cigarElements.get(endIndex);
    if (element.getOperator().isClipping()) {
        if (element.getOperator() == CigarOperator.S) {
            return element.getLength();
        } else {
            final CigarElement mayBeSoftClipping = cigarElements.get(fromStart ? 1 : cigarElements.size() - 2);
            return mayBeSoftClipping.getOperator() == CigarOperator.S ? mayBeSoftClipping.getLength() : 0;
        }
    } else {
        return 0;
    }
}
Also used : CigarElement(htsjdk.samtools.CigarElement) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 22 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class ReadClassifier method hasInitialSoftClip.

private static boolean hasInitialSoftClip(final List<CigarElement> cigarElements, final GATKRead read) {
    final ListIterator<CigarElement> itr = cigarElements.listIterator();
    if (!itr.hasNext())
        return false;
    CigarElement firstEle = itr.next();
    if (firstEle.getOperator() == CigarOperator.HARD_CLIP && itr.hasNext()) {
        firstEle = itr.next();
    }
    final int clipStart = firstEle.getLength() - MIN_SOFT_CLIP_LEN;
    return firstEle.getOperator() == CigarOperator.SOFT_CLIP && clipStart >= 0 && isHighQualityRegion(read.getBaseQualities(), clipStart);
}
Also used : CigarElement(htsjdk.samtools.CigarElement)

Example 23 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class AlignmentUtils method createIndelString.

/**
     * Create the string (really a byte array) representation of an indel-containing cigar against the reference.
     *
     * @param cigar             the indel-containing cigar
     * @param indexOfIndel      the index of the indel cigar element
     * @param refSeq            the reference sequence
     * @param readSeq           the read sequence for the cigar
     * @param refIndex          the starting reference index into refSeq
     * @param readIndex         the starting read index into readSeq
     * @return non-null byte array which is the indel representation against the reference
     */
private static byte[] createIndelString(final Cigar cigar, final int indexOfIndel, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) {
    CigarElement indel = cigar.getCigarElement(indexOfIndel);
    int indelLength = indel.getLength();
    int totalRefBases = 0;
    for (int i = 0; i < indexOfIndel; i++) {
        CigarElement ce = cigar.getCigarElement(i);
        int length = ce.getLength();
        switch(ce.getOperator()) {
            case M:
            case EQ:
            case X:
                readIndex += length;
                refIndex += length;
                totalRefBases += length;
                break;
            case S:
                readIndex += length;
                break;
            case N:
                refIndex += length;
                totalRefBases += length;
                break;
            default:
                break;
        }
    }
    // sometimes, when there are very large known indels, we won't have enough reference sequence to cover them
    if (totalRefBases + indelLength > refSeq.length)
        indelLength -= (totalRefBases + indelLength - refSeq.length);
    // the indel-based reference string
    byte[] alt = new byte[refSeq.length + (indelLength * (indel.getOperator() == CigarOperator.D ? -1 : 1))];
    // add the bases before the indel, making sure it's not aligned off the end of the reference
    if (refIndex > alt.length || refIndex > refSeq.length)
        return null;
    System.arraycopy(refSeq, 0, alt, 0, refIndex);
    int currentPos = refIndex;
    // take care of the indel
    if (indel.getOperator() == CigarOperator.D) {
        refIndex += indelLength;
    } else {
        System.arraycopy(readSeq, readIndex, alt, currentPos, indelLength);
        currentPos += indelLength;
    }
    // add the bases after the indel, making sure it's not aligned off the end of the reference
    if (refSeq.length - refIndex > alt.length - currentPos)
        return null;
    System.arraycopy(refSeq, refIndex, alt, currentPos, refSeq.length - refIndex);
    return alt;
}
Also used : CigarElement(htsjdk.samtools.CigarElement)

Example 24 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class AlignmentUtils method calcFirstBaseMatchingReferenceInCigar.

/**
     * Get the offset (base 0) of the first reference aligned base in Cigar that occurs after readStartByBaseOfCigar base of the cigar
     *
     * The main purpose of this routine is to find a good start position for a read given it's cigar.  The real
     * challenge is that the starting base might be inside an insertion, in which case the read actually starts
     * at the next M/EQ/X operator.
     *
     * @param cigar a non-null cigar
     * @param readStartByBaseOfCigar finds the first base after this (0 indexed) that aligns to the reference genome (M, EQ, X)
     * @throws IllegalStateException if no such base can be found
     * @return an offset into cigar
     */
public static int calcFirstBaseMatchingReferenceInCigar(final Cigar cigar, int readStartByBaseOfCigar) {
    if (cigar == null)
        throw new IllegalArgumentException("cigar cannot be null");
    if (readStartByBaseOfCigar >= cigar.getReadLength())
        throw new IllegalArgumentException("readStartByBaseOfCigar " + readStartByBaseOfCigar + " must be <= readLength " + cigar.getReadLength());
    int hapOffset = 0, refOffset = 0;
    for (final CigarElement ce : cigar.getCigarElements()) {
        for (int i = 0; i < ce.getLength(); i++) {
            switch(ce.getOperator()) {
                case M:
                case EQ:
                case X:
                    if (hapOffset >= readStartByBaseOfCigar)
                        return refOffset;
                    hapOffset++;
                    refOffset++;
                    break;
                case I:
                case S:
                    hapOffset++;
                    break;
                case D:
                    refOffset++;
                    break;
                default:
                    throw new IllegalStateException("calcFirstBaseMatchingReferenceInCigar does not support cigar " + ce.getOperator() + " in cigar " + cigar);
            }
        }
    }
    throw new IllegalStateException("Never found appropriate matching state for cigar " + cigar + " given start of " + readStartByBaseOfCigar);
}
Also used : CigarElement(htsjdk.samtools.CigarElement)

Example 25 with CigarElement

use of htsjdk.samtools.CigarElement in project gatk by broadinstitute.

the class AlignmentUtils method getMismatchCount.

/**
     * Count how many bases mismatch the reference.  Indels are not considered mismatching.
     *
     * @param r                   the sam record to check against
     * @param refSeq              the byte array representing the reference sequence
     * @param refIndex            the index in the reference byte array of the read's first base (the reference index
     *                            is matching the alignment start, there may be tons of soft-clipped bases before/after
     *                            that so it's wrong to compare with getLength() here.).  Note that refIndex is
     *                            zero based, not 1 based
     * @param startOnRead         the index in the read's bases from which we start counting
     * @param nReadBases          the number of bases after (but including) startOnRead that we check
     * @return non-null object representing the mismatch count
     */
@SuppressWarnings("fallthrough")
public static MismatchCount getMismatchCount(GATKRead r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) {
    Utils.nonNull(r);
    Utils.nonNull(refSeq);
    if (refIndex < 0)
        throw new IllegalArgumentException("attempting to calculate the mismatch count with a reference index that is negative");
    if (startOnRead < 0)
        throw new IllegalArgumentException("attempting to calculate the mismatch count with a read start that is negative");
    if (nReadBases < 0)
        throw new IllegalArgumentException("attempting to calculate the mismatch count for a negative number of read bases");
    if (refSeq.length - refIndex < (r.getEnd() - r.getStart()))
        throw new IllegalArgumentException("attempting to calculate the mismatch count against a reference string that is smaller than the read");
    MismatchCount mc = new MismatchCount();
    int readIdx = 0;
    // index of the last base on read we want to count (note we are including soft-clipped bases with this math)
    final int endOnRead = startOnRead + nReadBases - 1;
    final byte[] readSeq = r.getBases();
    final Cigar c = r.getCigar();
    final byte[] readQuals = r.getBaseQualities();
    for (final CigarElement ce : c.getCigarElements()) {
        if (readIdx > endOnRead)
            break;
        final int elementLength = ce.getLength();
        switch(ce.getOperator()) {
            case X:
                mc.numMismatches += elementLength;
                for (int j = 0; j < elementLength; j++) mc.mismatchQualities += readQuals[readIdx + j];
            case EQ:
                refIndex += elementLength;
                readIdx += elementLength;
                break;
            case M:
                for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) {
                    if (refIndex >= refSeq.length)
                        // TODO : It should never happen, we should throw exception here
                        continue;
                    if (readIdx < startOnRead)
                        continue;
                    if (readIdx > endOnRead)
                        break;
                    byte refChr = refSeq[refIndex];
                    byte readChr = readSeq[readIdx];
                    //    continue; // do not count Ns/Xs/etc ?
                    if (readChr != refChr) {
                        mc.numMismatches++;
                        mc.mismatchQualities += readQuals[readIdx];
                    }
                }
                break;
            case I:
            case S:
                readIdx += elementLength;
                break;
            case D:
            case N:
                refIndex += elementLength;
                break;
            case H:
            case P:
                break;
            default:
                throw new GATKException("The " + ce.getOperator() + " cigar element is not currently supported");
        }
    }
    return mc;
}
Also used : Cigar(htsjdk.samtools.Cigar) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Aggregations

CigarElement (htsjdk.samtools.CigarElement)164 Cigar (htsjdk.samtools.Cigar)97 ArrayList (java.util.ArrayList)50 SAMRecord (htsjdk.samtools.SAMRecord)49 CigarOperator (htsjdk.samtools.CigarOperator)34 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)32 SamReader (htsjdk.samtools.SamReader)31 SAMFileHeader (htsjdk.samtools.SAMFileHeader)24 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)22 Test (org.testng.annotations.Test)19 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)17 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)17 File (java.io.File)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Interval (htsjdk.samtools.util.Interval)14 IOException (java.io.IOException)14 VisibleForTesting (com.google.common.annotations.VisibleForTesting)13 List (java.util.List)13 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)13