use of org.broadinstitute.hellbender.exceptions.UserException.MalformedRead in project gatk by broadinstitute.
the class BQSRReadTransformer method apply.
/**
* Recalibrates the base qualities of a read
* <p>
* It updates the base qualities of the read with the new recalibrated qualities (for all event types)
* <p>
* Implements a serial recalibration of the reads using the combinational table.
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
* <p>
* Given the full recalibration table, we perform the following preprocessing steps:
* <p>
* - calculate the global quality score shift across all data [DeltaQ]
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
* - The final shift equation is:
* <p>
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
*
* @param originalRead the read to recalibrate
*/
@Override
public GATKRead apply(final GATKRead originalRead) {
final GATKRead read = useOriginalBaseQualities ? ReadUtils.resetOriginalBaseQualities(originalRead) : originalRead;
if (emitOriginalQuals && !read.hasAttribute(SAMTag.OQ.name())) {
// Save the old qualities if the tag isn't already taken in the read
try {
read.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(read.getBaseQualities()));
} catch (final IllegalArgumentException e) {
throw new MalformedRead(read, "illegal base quality encountered; " + e.getMessage());
}
}
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, header, covariates, false, keyCache);
//clear indel qualities
read.clearAttribute(ReadUtils.BQSR_BASE_INSERTION_QUALITIES);
read.clearAttribute(ReadUtils.BQSR_BASE_DELETION_QUALITIES);
// get the keyset for this base using the error model
final int[][] fullReadKeySet = readCovariates.getKeySet(EventType.BASE_SUBSTITUTION);
// the rg key is constant over the whole read, the global deltaQ is too
final int rgKey = fullReadKeySet[0][0];
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get2Keys(rgKey, BASE_SUBSTITUTION_INDEX);
if (empiricalQualRG == null) {
return read;
}
final byte[] quals = read.getBaseQualities();
final int readLength = quals.length;
final double epsilon = globalQScorePrior > 0.0 ? globalQScorePrior : empiricalQualRG.getEstimatedQReported();
final NestedIntegerArray<RecalDatum> qualityScoreTable = recalibrationTables.getQualityScoreTable();
final List<Byte> quantizedQuals = quantizationInfo.getQuantizedQuals();
//Note: this loop is under very heavy use in applyBQSR. Keep it slim.
for (int offset = 0; offset < readLength; offset++) {
// only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
if (quals[offset] < preserveQLessThan) {
continue;
}
//clear the array
Arrays.fill(empiricalQualCovsArgs, null);
final int[] keySet = fullReadKeySet[offset];
final RecalDatum empiricalQualQS = qualityScoreTable.get3Keys(keySet[0], keySet[1], BASE_SUBSTITUTION_INDEX);
for (int i = specialCovariateCount; i < totalCovariateCount; i++) {
if (keySet[i] >= 0) {
empiricalQualCovsArgs[i - specialCovariateCount] = recalibrationTables.getTable(i).get4Keys(keySet[0], keySet[1], keySet[i], BASE_SUBSTITUTION_INDEX);
}
}
final double recalibratedQualDouble = hierarchicalBayesianQualityEstimate(epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovsArgs);
final byte recalibratedQualityScore = quantizedQuals.get(getRecalibratedQual(recalibratedQualDouble));
// Bin to static quals
quals[offset] = staticQuantizedMapping == null ? recalibratedQualityScore : staticQuantizedMapping[recalibratedQualityScore];
}
read.setBaseQualities(quals);
return read;
}
Aggregations