use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class BAQ method calcBAQFromHMM.
public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd) {
if (queryStart < 0)
throw new GATKException("BUG: queryStart < 0: " + queryStart);
if (queryEnd < 0)
throw new GATKException("BUG: queryEnd < 0: " + queryEnd);
if (queryEnd < queryStart)
throw new GATKException("BUG: queryStart < queryEnd : " + queryStart + " end =" + queryEnd);
// note -- assumes ref is offset from the *CLIPPED* start
BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref);
int queryLen = queryEnd - queryStart;
hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq);
return baqResult;
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class BAQ method calculateQueryRange.
/**
* Determine the appropriate start and stop offsets in the reads for the bases given the cigar string
* @param read
* @return
*/
private final Pair<Integer, Integer> calculateQueryRange(final GATKRead read) {
int queryStart = -1, queryStop = -1;
int readI = 0;
// iterate over the cigar elements to determine the start and stop of the read bases for the BAQ calculation
for (CigarElement elt : read.getCigarElements()) {
switch(elt.getOperator()) {
// cannot handle these
case N:
return null;
// ignore pads, hard clips, and deletions
case H:
// ignore pads, hard clips, and deletions
case P:
// ignore pads, hard clips, and deletions
case D:
break;
case I:
case S:
case M:
case EQ:
case X:
int prev = readI;
readI += elt.getLength();
if (elt.getOperator() != CigarOperator.S) {
if (queryStart == -1) {
queryStart = prev;
}
queryStop = readI;
}
// queryStart or queryStop
break;
default:
throw new GATKException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getName());
}
}
if (queryStop == queryStart) {
//System.err.printf("WARNING -- read is completely clipped away: " + read.format());
return null;
}
return new MutablePair<>(queryStart, queryStop);
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class BAQ method hmm_glocal.
// ####################################################################################################
//
// NOTE -- THIS CODE IS SYNCHRONIZED WITH CODE IN THE SAMTOOLS REPOSITORY. CHANGES TO THIS CODE SHOULD BE
// NOTE -- PUSHED BACK TO HENG LI
//
// ####################################################################################################
public int hmm_glocal(final byte[] ref, final byte[] query, int qstart, int l_query, final byte[] _iqual, int[] state, byte[] q) {
if (ref == null)
throw new GATKException("BUG: ref sequence is null");
if (query == null)
throw new GATKException("BUG: query sequence is null");
if (_iqual == null)
throw new GATKException("BUG: query quality vector is null");
if (query.length != _iqual.length)
throw new GATKException("BUG: read sequence length != qual length");
if (l_query < 1)
throw new GATKException("BUG: length of query sequence < 0: " + l_query);
if (qstart < 0)
throw new GATKException("BUG: query sequence start < 0: " + qstart);
//if ( q != null && q.length != state.length ) throw new GATKException("BUG: BAQ quality length != read sequence length");
//if ( state != null && state.length != l_query ) throw new GATKException("BUG: state length != read sequence length");
int i, k;
/*** initialization ***/
// change coordinates
final int l_ref = ref.length;
// set band width
int bw2, bw = l_ref > l_query ? l_ref : l_query;
if (cb < Math.abs(l_ref - l_query)) {
bw = Math.abs(l_ref - l_query) + 3;
//System.out.printf("SC cb=%d, bw=%d%n", cb, bw);
}
if (bw > cb)
bw = cb;
if (bw < Math.abs(l_ref - l_query)) {
//int bwOld = bw;
bw = Math.abs(l_ref - l_query);
//System.out.printf("old bw is %d, new is %d%n", bwOld, bw);
}
//System.out.printf("c->bw = %d, bw = %d, l_ref = %d, l_query = %d\n", cb, bw, l_ref, l_query);
bw2 = bw * 2 + 1;
// allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
double[][] f = new double[l_query + 1][bw2 * 3 + 6];
double[][] b = new double[l_query + 1][bw2 * 3 + 6];
double[] s = new double[l_query + 2];
// initialize transition probabilities
double sM, sI, bM, bI;
sM = sI = 1. / (2 * l_query + 2);
// (bM+bI)*l_ref==1
bM = (1 - cd) / l_ref;
// (bM+bI)*l_ref==1
bI = cd / l_ref;
double[] m = new double[9];
m[0 * 3 + 0] = (1 - cd - cd) * (1 - sM);
m[0 * 3 + 1] = m[0 * 3 + 2] = cd * (1 - sM);
m[1 * 3 + 0] = (1 - ce) * (1 - sI);
m[1 * 3 + 1] = ce * (1 - sI);
m[1 * 3 + 2] = 0.;
m[2 * 3 + 0] = 1 - ce;
m[2 * 3 + 1] = 0.;
m[2 * 3 + 2] = ce;
/*** forward ***/
// f[0]
f[0][set_u(bw, 0, 0)] = s[0] = 1.;
{
// f[1]
double[] fi = f[1];
double sum;
int beg = 1, end = l_ref < bw + 1 ? l_ref : bw + 1, _beg, _end;
for (k = beg, sum = 0.; k <= end; ++k) {
int u;
double e = calcEpsilon(ref[k - 1], query[qstart], _iqual[qstart]);
u = set_u(bw, 1, k);
fi[u + 0] = e * bM;
fi[u + 1] = EI * bI;
sum += fi[u] + fi[u + 1];
}
// rescale
s[1] = sum;
_beg = set_u(bw, 1, beg);
_end = set_u(bw, 1, end);
_end += 2;
for (k = _beg; k <= _end; ++k) fi[k] /= sum;
}
// f[2..l_query]
for (i = 2; i <= l_query; ++i) {
double[] fi = f[i], fi1 = f[i - 1];
double sum;
int beg = 1, end = l_ref, x, _beg, _end;
byte qyi = query[qstart + i - 1];
// band start
x = i - bw;
// band start
beg = beg > x ? beg : x;
// band end
x = i + bw;
// band end
end = end < x ? end : x;
for (k = beg, sum = 0.; k <= end; ++k) {
int u, v11, v01, v10;
double e = calcEpsilon(ref[k - 1], qyi, _iqual[qstart + i - 1]);
u = set_u(bw, i, k);
v11 = set_u(bw, i - 1, k - 1);
v10 = set_u(bw, i - 1, k);
v01 = set_u(bw, i, k - 1);
fi[u + 0] = e * (m[0] * fi1[v11 + 0] + m[3] * fi1[v11 + 1] + m[6] * fi1[v11 + 2]);
fi[u + 1] = EI * (m[1] * fi1[v10 + 0] + m[4] * fi1[v10 + 1]);
fi[u + 2] = m[2] * fi[v01 + 0] + m[8] * fi[v01 + 2];
sum += fi[u] + fi[u + 1] + fi[u + 2];
//System.out.println("("+i+","+k+";"+u+"): "+fi[u]+","+fi[u+1]+","+fi[u+2]);
}
// rescale
s[i] = sum;
_beg = set_u(bw, i, beg);
_end = set_u(bw, i, end);
_end += 2;
for (k = _beg, sum = 1. / sum; k <= _end; ++k) fi[k] *= sum;
}
{
// f[l_query+1]
double sum;
for (k = 1, sum = 0.; k <= l_ref; ++k) {
int u = set_u(bw, l_query, k);
if (u < 3 || u >= bw2 * 3 + 3)
continue;
sum += f[l_query][u + 0] * sM + f[l_query][u + 1] * sI;
}
// the last scaling factor
s[l_query + 1] = sum;
}
// b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
for (k = 1; k <= l_ref; ++k) {
int u = set_u(bw, l_query, k);
double[] bi = b[l_query];
if (u < 3 || u >= bw2 * 3 + 3)
continue;
bi[u + 0] = sM / s[l_query] / s[l_query + 1];
bi[u + 1] = sI / s[l_query] / s[l_query + 1];
}
// b[l_query-1..1]
for (i = l_query - 1; i >= 1; --i) {
int beg = 1, end = l_ref, x, _beg, _end;
double[] bi = b[i], bi1 = b[i + 1];
double y = (i > 1) ? 1. : 0.;
byte qyi1 = query[qstart + i];
x = i - bw;
beg = beg > x ? beg : x;
x = i + bw;
end = end < x ? end : x;
for (k = end; k >= beg; --k) {
int u, v11, v01, v10;
u = set_u(bw, i, k);
v11 = set_u(bw, i + 1, k + 1);
v10 = set_u(bw, i + 1, k);
v01 = set_u(bw, i, k + 1);
final double e = (k >= l_ref ? 0 : calcEpsilon(ref[k], qyi1, _iqual[qstart + i])) * bi1[v11];
// bi1[v11] has been folded into e.
bi[u + 0] = e * m[0] + EI * m[1] * bi1[v10 + 1] + m[2] * bi[v01 + 2];
bi[u + 1] = e * m[3] + EI * m[4] * bi1[v10 + 1];
bi[u + 2] = (e * m[6] + m[8] * bi[v01 + 2]) * y;
}
// rescale
_beg = set_u(bw, i, beg);
_end = set_u(bw, i, end);
_end += 2;
for (k = _beg, y = 1. / s[i]; k <= _end; ++k) bi[k] *= y;
}
double pb;
{
// b[0]
int beg = 1, end = l_ref < bw + 1 ? l_ref : bw + 1;
double sum = 0.;
for (k = end; k >= beg; --k) {
int u = set_u(bw, 1, k);
double e = calcEpsilon(ref[k - 1], query[qstart], _iqual[qstart]);
if (u < 3 || u >= bw2 * 3 + 3)
continue;
sum += e * b[1][u + 0] * bM + EI * b[1][u + 1] * bI;
}
// if everything works as is expected, pb == 1.0
pb = b[0][set_u(bw, 0, 0)] = sum / s[0];
}
/*** MAP ***/
for (i = 1; i <= l_query; ++i) {
double sum = 0., max = 0.;
final double[] fi = f[i], bi = b[i];
int beg = 1, end = l_ref, x, max_k = -1;
x = i - bw;
beg = beg > x ? beg : x;
x = i + bw;
end = end < x ? end : x;
for (k = beg; k <= end; ++k) {
final int u = set_u(bw, i, k);
double z;
sum += (z = fi[u + 0] * bi[u + 0]);
if (z > max) {
max = z;
max_k = (k - 1) << 2 | 0;
}
sum += (z = fi[u + 1] * bi[u + 1]);
if (z > max) {
max = z;
max_k = (k - 1) << 2 | 1;
}
}
// if everything works as is expected, sum == 1.0
max /= sum;
// if everything works as is expected, sum == 1.0
sum *= s[i];
if (state != null)
state[qstart + i - 1] = max_k;
if (q != null) {
// = 10*log10(1-max)
k = (int) (-4.343 * Math.log(1. - max) + .499);
q[qstart + i - 1] = (byte) (k > 100 ? 99 : (k < minBaseQual ? minBaseQual : k));
}
//System.out.println("("+pb+","+sum+")"+" ("+(i-1)+","+(max_k>>2)+","+(max_k&3)+","+max+")");
}
return 0;
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class Utils method transformParallel.
/**
* Like Guava's {@link Iterators#transform(Iterator, com.google.common.base.Function)}, but runs a fixed number
* ({@code numThreads}) of transformations in parallel, while maintaining ordering of the output iterator.
* This is useful if the transformations are CPU intensive.
*/
public static <F, T> Iterator<T> transformParallel(final Iterator<F> fromIterator, final Function<F, T> function, final int numThreads) {
Utils.nonNull(fromIterator, "fromIterator");
Utils.nonNull(function, "function");
Utils.validateArg(numThreads >= 1, "numThreads must be at least 1");
if (numThreads == 1) {
// defer to Guava for single-threaded case
return Iterators.transform(fromIterator, new com.google.common.base.Function<F, T>() {
@Nullable
@Override
public T apply(@Nullable final F input) {
return function.apply(input);
}
});
}
// use an executor service for the multi-threaded case
final ExecutorService executorService = Executors.newFixedThreadPool(numThreads);
final Queue<Future<T>> futures = new LinkedList<>();
return new AbstractIterator<T>() {
@Override
protected T computeNext() {
try {
while (fromIterator.hasNext()) {
if (futures.size() == numThreads) {
return futures.remove().get();
}
final F next = fromIterator.next();
final Future<T> future = executorService.submit(() -> function.apply(next));
futures.add(future);
}
if (!futures.isEmpty()) {
return futures.remove().get();
}
executorService.shutdown();
return endOfData();
} catch (InterruptedException | ExecutionException e) {
throw new GATKException("Problem running task", e);
}
}
};
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class ClippingOp method hardClipCigar.
private CigarShift hardClipCigar(final Cigar cigar, final int start, final int stop) {
final Cigar newCigar = new Cigar();
int index = 0;
int totalHardClipCount = stop - start + 1;
// caused by hard clipping deletions
int alignmentShift = 0;
// hard clip the beginning of the cigar string
if (start == 0) {
final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
CigarElement cigarElement = cigarElementIterator.next();
// Skip all leading hard clips
while (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
totalHardClipCount += cigarElement.getLength();
if (cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
} else {
throw new GATKException("Read is entirely hardclipped, shouldn't be trying to clip it's cigar string");
}
}
// keep clipping until we hit stop
while (index <= stop) {
int shift = 0;
if (cigarElement.getOperator().consumesReadBases()) {
shift = cigarElement.getLength();
}
// we're still clipping or just finished perfectly
if (index + shift == stop + 1) {
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
} else // element goes beyond what we need to clip
if (index + shift > stop + 1) {
final int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
index += shift;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
if (index <= stop && cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
} else {
break;
}
}
// add the remaining cigar elements
while (cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
}
} else // hard clip the end of the cigar string
{
final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
CigarElement cigarElement = cigarElementIterator.next();
// Keep marching on until we find the start
while (index < start) {
int shift = 0;
if (cigarElement.getOperator().consumesReadBases()) {
shift = cigarElement.getLength();
}
// we haven't gotten to the start yet, keep everything as is.
if (index + shift < start) {
newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
} else // element goes beyond our clip starting position
{
final int elementLengthAfterChopping = start - index;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
totalHardClipCount += elementLengthAfterChopping;
} else // otherwise, maintain what's left of this last operator
{
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
}
index += shift;
if (index < start && cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
} else {
break;
}
}
// check if we are hard clipping indels
while (cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
// if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
totalHardClipCount += cigarElement.getLength();
}
}
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}
return cleanHardClippedCigar(newCigar);
}
Aggregations