Search in sources :

Example 56 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class BAQ method calcBAQFromHMM.

public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd) {
    if (queryStart < 0)
        throw new GATKException("BUG: queryStart < 0: " + queryStart);
    if (queryEnd < 0)
        throw new GATKException("BUG: queryEnd < 0: " + queryEnd);
    if (queryEnd < queryStart)
        throw new GATKException("BUG: queryStart < queryEnd : " + queryStart + " end =" + queryEnd);
    // note -- assumes ref is offset from the *CLIPPED* start
    BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref);
    int queryLen = queryEnd - queryStart;
    hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
    return baqResult;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 57 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class BAQ method calculateQueryRange.

/**
     * Determine the appropriate start and stop offsets in the reads for the bases given the cigar string
     * @param read
     * @return
     */
private final Pair<Integer, Integer> calculateQueryRange(final GATKRead read) {
    int queryStart = -1, queryStop = -1;
    int readI = 0;
    // iterate over the cigar elements to determine the start and stop of the read bases for the BAQ calculation
    for (CigarElement elt : read.getCigarElements()) {
        switch(elt.getOperator()) {
            // cannot handle these
            case N:
                return null;
            // ignore pads, hard clips, and deletions
            case H:
            // ignore pads, hard clips, and deletions
            case P:
            // ignore pads, hard clips, and deletions
            case D:
                break;
            case I:
            case S:
            case M:
            case EQ:
            case X:
                int prev = readI;
                readI += elt.getLength();
                if (elt.getOperator() != CigarOperator.S) {
                    if (queryStart == -1) {
                        queryStart = prev;
                    }
                    queryStop = readI;
                }
                // queryStart or queryStop
                break;
            default:
                throw new GATKException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getName());
        }
    }
    if (queryStop == queryStart) {
        //System.err.printf("WARNING -- read is completely clipped away: " + read.format());
        return null;
    }
    return new MutablePair<>(queryStart, queryStop);
}
Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Example 58 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class BAQ method hmm_glocal.

// ####################################################################################################
//
// NOTE -- THIS CODE IS SYNCHRONIZED WITH CODE IN THE SAMTOOLS REPOSITORY.  CHANGES TO THIS CODE SHOULD BE
// NOTE -- PUSHED BACK TO HENG LI
//
// ####################################################################################################
public int hmm_glocal(final byte[] ref, final byte[] query, int qstart, int l_query, final byte[] _iqual, int[] state, byte[] q) {
    if (ref == null)
        throw new GATKException("BUG: ref sequence is null");
    if (query == null)
        throw new GATKException("BUG: query sequence is null");
    if (_iqual == null)
        throw new GATKException("BUG: query quality vector is null");
    if (query.length != _iqual.length)
        throw new GATKException("BUG: read sequence length != qual length");
    if (l_query < 1)
        throw new GATKException("BUG: length of query sequence < 0: " + l_query);
    if (qstart < 0)
        throw new GATKException("BUG: query sequence start < 0: " + qstart);
    //if ( q != null && q.length != state.length ) throw new GATKException("BUG: BAQ quality length != read sequence length");
    //if ( state != null && state.length != l_query ) throw new GATKException("BUG: state length != read sequence length");
    int i, k;
    /*** initialization ***/
    // change coordinates
    final int l_ref = ref.length;
    // set band width
    int bw2, bw = l_ref > l_query ? l_ref : l_query;
    if (cb < Math.abs(l_ref - l_query)) {
        bw = Math.abs(l_ref - l_query) + 3;
    //System.out.printf("SC  cb=%d, bw=%d%n", cb, bw);
    }
    if (bw > cb)
        bw = cb;
    if (bw < Math.abs(l_ref - l_query)) {
        //int bwOld = bw;
        bw = Math.abs(l_ref - l_query);
    //System.out.printf("old bw is %d, new is %d%n", bwOld, bw);
    }
    //System.out.printf("c->bw = %d, bw = %d, l_ref = %d, l_query = %d\n", cb, bw, l_ref, l_query);
    bw2 = bw * 2 + 1;
    // allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
    double[][] f = new double[l_query + 1][bw2 * 3 + 6];
    double[][] b = new double[l_query + 1][bw2 * 3 + 6];
    double[] s = new double[l_query + 2];
    // initialize transition probabilities
    double sM, sI, bM, bI;
    sM = sI = 1. / (2 * l_query + 2);
    // (bM+bI)*l_ref==1
    bM = (1 - cd) / l_ref;
    // (bM+bI)*l_ref==1
    bI = cd / l_ref;
    double[] m = new double[9];
    m[0 * 3 + 0] = (1 - cd - cd) * (1 - sM);
    m[0 * 3 + 1] = m[0 * 3 + 2] = cd * (1 - sM);
    m[1 * 3 + 0] = (1 - ce) * (1 - sI);
    m[1 * 3 + 1] = ce * (1 - sI);
    m[1 * 3 + 2] = 0.;
    m[2 * 3 + 0] = 1 - ce;
    m[2 * 3 + 1] = 0.;
    m[2 * 3 + 2] = ce;
    /*** forward ***/
    // f[0]
    f[0][set_u(bw, 0, 0)] = s[0] = 1.;
    {
        // f[1]
        double[] fi = f[1];
        double sum;
        int beg = 1, end = l_ref < bw + 1 ? l_ref : bw + 1, _beg, _end;
        for (k = beg, sum = 0.; k <= end; ++k) {
            int u;
            double e = calcEpsilon(ref[k - 1], query[qstart], _iqual[qstart]);
            u = set_u(bw, 1, k);
            fi[u + 0] = e * bM;
            fi[u + 1] = EI * bI;
            sum += fi[u] + fi[u + 1];
        }
        // rescale
        s[1] = sum;
        _beg = set_u(bw, 1, beg);
        _end = set_u(bw, 1, end);
        _end += 2;
        for (k = _beg; k <= _end; ++k) fi[k] /= sum;
    }
    // f[2..l_query]
    for (i = 2; i <= l_query; ++i) {
        double[] fi = f[i], fi1 = f[i - 1];
        double sum;
        int beg = 1, end = l_ref, x, _beg, _end;
        byte qyi = query[qstart + i - 1];
        // band start
        x = i - bw;
        // band start
        beg = beg > x ? beg : x;
        // band end
        x = i + bw;
        // band end
        end = end < x ? end : x;
        for (k = beg, sum = 0.; k <= end; ++k) {
            int u, v11, v01, v10;
            double e = calcEpsilon(ref[k - 1], qyi, _iqual[qstart + i - 1]);
            u = set_u(bw, i, k);
            v11 = set_u(bw, i - 1, k - 1);
            v10 = set_u(bw, i - 1, k);
            v01 = set_u(bw, i, k - 1);
            fi[u + 0] = e * (m[0] * fi1[v11 + 0] + m[3] * fi1[v11 + 1] + m[6] * fi1[v11 + 2]);
            fi[u + 1] = EI * (m[1] * fi1[v10 + 0] + m[4] * fi1[v10 + 1]);
            fi[u + 2] = m[2] * fi[v01 + 0] + m[8] * fi[v01 + 2];
            sum += fi[u] + fi[u + 1] + fi[u + 2];
        //System.out.println("("+i+","+k+";"+u+"): "+fi[u]+","+fi[u+1]+","+fi[u+2]);
        }
        // rescale
        s[i] = sum;
        _beg = set_u(bw, i, beg);
        _end = set_u(bw, i, end);
        _end += 2;
        for (k = _beg, sum = 1. / sum; k <= _end; ++k) fi[k] *= sum;
    }
    {
        // f[l_query+1]
        double sum;
        for (k = 1, sum = 0.; k <= l_ref; ++k) {
            int u = set_u(bw, l_query, k);
            if (u < 3 || u >= bw2 * 3 + 3)
                continue;
            sum += f[l_query][u + 0] * sM + f[l_query][u + 1] * sI;
        }
        // the last scaling factor
        s[l_query + 1] = sum;
    }
    // b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
    for (k = 1; k <= l_ref; ++k) {
        int u = set_u(bw, l_query, k);
        double[] bi = b[l_query];
        if (u < 3 || u >= bw2 * 3 + 3)
            continue;
        bi[u + 0] = sM / s[l_query] / s[l_query + 1];
        bi[u + 1] = sI / s[l_query] / s[l_query + 1];
    }
    // b[l_query-1..1]
    for (i = l_query - 1; i >= 1; --i) {
        int beg = 1, end = l_ref, x, _beg, _end;
        double[] bi = b[i], bi1 = b[i + 1];
        double y = (i > 1) ? 1. : 0.;
        byte qyi1 = query[qstart + i];
        x = i - bw;
        beg = beg > x ? beg : x;
        x = i + bw;
        end = end < x ? end : x;
        for (k = end; k >= beg; --k) {
            int u, v11, v01, v10;
            u = set_u(bw, i, k);
            v11 = set_u(bw, i + 1, k + 1);
            v10 = set_u(bw, i + 1, k);
            v01 = set_u(bw, i, k + 1);
            final double e = (k >= l_ref ? 0 : calcEpsilon(ref[k], qyi1, _iqual[qstart + i])) * bi1[v11];
            // bi1[v11] has been folded into e.
            bi[u + 0] = e * m[0] + EI * m[1] * bi1[v10 + 1] + m[2] * bi[v01 + 2];
            bi[u + 1] = e * m[3] + EI * m[4] * bi1[v10 + 1];
            bi[u + 2] = (e * m[6] + m[8] * bi[v01 + 2]) * y;
        }
        // rescale
        _beg = set_u(bw, i, beg);
        _end = set_u(bw, i, end);
        _end += 2;
        for (k = _beg, y = 1. / s[i]; k <= _end; ++k) bi[k] *= y;
    }
    double pb;
    {
        // b[0]
        int beg = 1, end = l_ref < bw + 1 ? l_ref : bw + 1;
        double sum = 0.;
        for (k = end; k >= beg; --k) {
            int u = set_u(bw, 1, k);
            double e = calcEpsilon(ref[k - 1], query[qstart], _iqual[qstart]);
            if (u < 3 || u >= bw2 * 3 + 3)
                continue;
            sum += e * b[1][u + 0] * bM + EI * b[1][u + 1] * bI;
        }
        // if everything works as is expected, pb == 1.0
        pb = b[0][set_u(bw, 0, 0)] = sum / s[0];
    }
    /*** MAP ***/
    for (i = 1; i <= l_query; ++i) {
        double sum = 0., max = 0.;
        final double[] fi = f[i], bi = b[i];
        int beg = 1, end = l_ref, x, max_k = -1;
        x = i - bw;
        beg = beg > x ? beg : x;
        x = i + bw;
        end = end < x ? end : x;
        for (k = beg; k <= end; ++k) {
            final int u = set_u(bw, i, k);
            double z;
            sum += (z = fi[u + 0] * bi[u + 0]);
            if (z > max) {
                max = z;
                max_k = (k - 1) << 2 | 0;
            }
            sum += (z = fi[u + 1] * bi[u + 1]);
            if (z > max) {
                max = z;
                max_k = (k - 1) << 2 | 1;
            }
        }
        // if everything works as is expected, sum == 1.0
        max /= sum;
        // if everything works as is expected, sum == 1.0
        sum *= s[i];
        if (state != null)
            state[qstart + i - 1] = max_k;
        if (q != null) {
            // = 10*log10(1-max)
            k = (int) (-4.343 * Math.log(1. - max) + .499);
            q[qstart + i - 1] = (byte) (k > 100 ? 99 : (k < minBaseQual ? minBaseQual : k));
        }
    //System.out.println("("+pb+","+sum+")"+" ("+(i-1)+","+(max_k>>2)+","+(max_k&3)+","+max+")");
    }
    return 0;
}
Also used : GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 59 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class Utils method transformParallel.

/**
     * Like Guava's {@link Iterators#transform(Iterator, com.google.common.base.Function)}, but runs a fixed number
     * ({@code numThreads}) of transformations in parallel, while maintaining ordering of the output iterator.
     * This is useful if the transformations are CPU intensive.
     */
public static <F, T> Iterator<T> transformParallel(final Iterator<F> fromIterator, final Function<F, T> function, final int numThreads) {
    Utils.nonNull(fromIterator, "fromIterator");
    Utils.nonNull(function, "function");
    Utils.validateArg(numThreads >= 1, "numThreads must be at least 1");
    if (numThreads == 1) {
        // defer to Guava for single-threaded case
        return Iterators.transform(fromIterator, new com.google.common.base.Function<F, T>() {

            @Nullable
            @Override
            public T apply(@Nullable final F input) {
                return function.apply(input);
            }
        });
    }
    // use an executor service for the multi-threaded case
    final ExecutorService executorService = Executors.newFixedThreadPool(numThreads);
    final Queue<Future<T>> futures = new LinkedList<>();
    return new AbstractIterator<T>() {

        @Override
        protected T computeNext() {
            try {
                while (fromIterator.hasNext()) {
                    if (futures.size() == numThreads) {
                        return futures.remove().get();
                    }
                    final F next = fromIterator.next();
                    final Future<T> future = executorService.submit(() -> function.apply(next));
                    futures.add(future);
                }
                if (!futures.isEmpty()) {
                    return futures.remove().get();
                }
                executorService.shutdown();
                return endOfData();
            } catch (InterruptedException | ExecutionException e) {
                throw new GATKException("Problem running task", e);
            }
        }
    };
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) AbstractIterator(com.google.common.collect.AbstractIterator) ExecutionException(java.util.concurrent.ExecutionException) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Nullable(javax.annotation.Nullable)

Example 60 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class ClippingOp method hardClipCigar.

private CigarShift hardClipCigar(final Cigar cigar, final int start, final int stop) {
    final Cigar newCigar = new Cigar();
    int index = 0;
    int totalHardClipCount = stop - start + 1;
    // caused by hard clipping deletions
    int alignmentShift = 0;
    // hard clip the beginning of the cigar string
    if (start == 0) {
        final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
        CigarElement cigarElement = cigarElementIterator.next();
        // Skip all leading hard clips
        while (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
            totalHardClipCount += cigarElement.getLength();
            if (cigarElementIterator.hasNext()) {
                cigarElement = cigarElementIterator.next();
            } else {
                throw new GATKException("Read is entirely hardclipped, shouldn't be trying to clip it's cigar string");
            }
        }
        // keep clipping until we hit stop
        while (index <= stop) {
            int shift = 0;
            if (cigarElement.getOperator().consumesReadBases()) {
                shift = cigarElement.getLength();
            }
            // we're still clipping or just finished perfectly
            if (index + shift == stop + 1) {
                alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
                newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
            } else // element goes beyond what we need to clip
            if (index + shift > stop + 1) {
                final int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
                alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
                newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
                newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
            }
            index += shift;
            alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
            if (index <= stop && cigarElementIterator.hasNext()) {
                cigarElement = cigarElementIterator.next();
            } else {
                break;
            }
        }
        // add the remaining cigar elements
        while (cigarElementIterator.hasNext()) {
            cigarElement = cigarElementIterator.next();
            newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
        }
    } else // hard clip the end of the cigar string
    {
        final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
        CigarElement cigarElement = cigarElementIterator.next();
        // Keep marching on until we find the start
        while (index < start) {
            int shift = 0;
            if (cigarElement.getOperator().consumesReadBases()) {
                shift = cigarElement.getLength();
            }
            // we haven't gotten to the start yet, keep everything as is.
            if (index + shift < start) {
                newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
            } else // element goes beyond our clip starting position
            {
                final int elementLengthAfterChopping = start - index;
                alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
                // if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
                if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
                    totalHardClipCount += elementLengthAfterChopping;
                } else // otherwise, maintain what's left of this last operator
                {
                    newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
                }
            }
            index += shift;
            if (index < start && cigarElementIterator.hasNext()) {
                cigarElement = cigarElementIterator.next();
            } else {
                break;
            }
        }
        // check if we are hard clipping indels
        while (cigarElementIterator.hasNext()) {
            cigarElement = cigarElementIterator.next();
            alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
            // if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
            if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
                totalHardClipCount += cigarElement.getLength();
            }
        }
        newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
    }
    return cleanHardClippedCigar(newCigar);
}
Also used : Cigar(htsjdk.samtools.Cigar) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CigarElement(htsjdk.samtools.CigarElement)

Aggregations

GATKException (org.broadinstitute.hellbender.exceptions.GATKException)96 IOException (java.io.IOException)19 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 CigarElement (htsjdk.samtools.CigarElement)12 ArrayList (java.util.ArrayList)10 UserException (org.broadinstitute.hellbender.exceptions.UserException)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 Cigar (htsjdk.samtools.Cigar)7 File (java.io.File)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 OutputStream (java.io.OutputStream)5 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Utils (org.broadinstitute.hellbender.utils.Utils)4 Tuple2 (scala.Tuple2)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 BufferedOutputStream (java.io.BufferedOutputStream)3 InputStream (java.io.InputStream)3 BigInteger (java.math.BigInteger)3 java.util (java.util)3