use of org.broadinstitute.hellbender.utils.recalibration.covariates.ReadCovariates in project gatk by broadinstitute.
the class RecalUtils method computeCovariates.
/**
* Computes all requested covariates for every offset in the given read
* by calling covariate.getValues(..).
*
* It populates an array of covariate values where result[i][j] is the covariate
* value for the ith position in the read and the jth covariate in
* reqeustedCovariates list.
*
* @param read The read for which to compute covariate values.
* @param header SAM header for the read
* @param covariates The list of requested covariates.
* @param recordIndelValues should we compute covariates for indel BQSR?
* @return a matrix with all the covariates calculated for every base in the read
*/
public static ReadCovariates computeCovariates(final GATKRead read, final SAMFileHeader header, final StandardCovariateList covariates, final boolean recordIndelValues, final CovariateKeyCache keyCache) {
final ReadCovariates readCovariates = new ReadCovariates(read.getLength(), covariates.size(), keyCache);
computeCovariates(read, header, covariates, readCovariates, recordIndelValues);
return readCovariates;
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.ReadCovariates in project gatk by broadinstitute.
the class BaseRecalibrationEngine method processRead.
/**
* For each read at this locus get the various covariate values and increment that location in the map based on
* whether or not the base matches the reference at this particular location
*/
public void processRead(final GATKRead originalRead, final ReferenceDataSource refDS, final Iterable<? extends Locatable> knownSites) {
final ReadTransformer transform = makeReadTransform();
final GATKRead read = transform.apply(originalRead);
if (read.isEmpty()) {
// the whole read was inside the adaptor so skip it
return;
}
RecalUtils.parsePlatformForRead(read, readsHeader, recalArgs);
int[] isSNP = new int[read.getLength()];
int[] isInsertion = new int[isSNP.length];
int[] isDeletion = new int[isSNP.length];
//Note: this function modifies the isSNP, isInsertion and isDeletion arguments so it can't be skipped, BAQ or no BAQ
final int nErrors = calculateIsSNPOrIndel(read, refDS, isSNP, isInsertion, isDeletion);
// note for efficiency reasons we don't compute the BAQ array unless we actually have
// some error to marginalize over. For ILMN data ~85% of reads have no error
final byte[] baqArray = (nErrors == 0 || !recalArgs.enableBAQ) ? flatBAQArray(read) : calculateBAQArray(read, refDS);
if (baqArray != null) {
// some reads just can't be BAQ'ed
final ReadCovariates covariates = RecalUtils.computeCovariates(read, readsHeader, this.covariates, true, keyCache);
// skip known sites of variation as well as low quality and non-regular bases
final boolean[] skip = calculateSkipArray(read, knownSites);
final double[] snpErrors = calculateFractionalErrorArray(isSNP, baqArray);
final double[] insertionErrors = calculateFractionalErrorArray(isInsertion, baqArray);
final double[] deletionErrors = calculateFractionalErrorArray(isDeletion, baqArray);
// aggregate all of the info into our info object, and update the data
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skip, snpErrors, insertionErrors, deletionErrors);
updateRecalTablesForRead(info);
}
numReadsProcessed++;
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.ReadCovariates in project gatk by broadinstitute.
the class BQSRReadTransformer method apply.
/**
* Recalibrates the base qualities of a read
* <p>
* It updates the base qualities of the read with the new recalibrated qualities (for all event types)
* <p>
* Implements a serial recalibration of the reads using the combinational table.
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
* <p>
* Given the full recalibration table, we perform the following preprocessing steps:
* <p>
* - calculate the global quality score shift across all data [DeltaQ]
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
* - The final shift equation is:
* <p>
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
*
* @param originalRead the read to recalibrate
*/
@Override
public GATKRead apply(final GATKRead originalRead) {
final GATKRead read = useOriginalBaseQualities ? ReadUtils.resetOriginalBaseQualities(originalRead) : originalRead;
if (emitOriginalQuals && !read.hasAttribute(SAMTag.OQ.name())) {
// Save the old qualities if the tag isn't already taken in the read
try {
read.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(read.getBaseQualities()));
} catch (final IllegalArgumentException e) {
throw new MalformedRead(read, "illegal base quality encountered; " + e.getMessage());
}
}
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, header, covariates, false, keyCache);
//clear indel qualities
read.clearAttribute(ReadUtils.BQSR_BASE_INSERTION_QUALITIES);
read.clearAttribute(ReadUtils.BQSR_BASE_DELETION_QUALITIES);
// get the keyset for this base using the error model
final int[][] fullReadKeySet = readCovariates.getKeySet(EventType.BASE_SUBSTITUTION);
// the rg key is constant over the whole read, the global deltaQ is too
final int rgKey = fullReadKeySet[0][0];
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get2Keys(rgKey, BASE_SUBSTITUTION_INDEX);
if (empiricalQualRG == null) {
return read;
}
final byte[] quals = read.getBaseQualities();
final int readLength = quals.length;
final double epsilon = globalQScorePrior > 0.0 ? globalQScorePrior : empiricalQualRG.getEstimatedQReported();
final NestedIntegerArray<RecalDatum> qualityScoreTable = recalibrationTables.getQualityScoreTable();
final List<Byte> quantizedQuals = quantizationInfo.getQuantizedQuals();
//Note: this loop is under very heavy use in applyBQSR. Keep it slim.
for (int offset = 0; offset < readLength; offset++) {
// only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
if (quals[offset] < preserveQLessThan) {
continue;
}
//clear the array
Arrays.fill(empiricalQualCovsArgs, null);
final int[] keySet = fullReadKeySet[offset];
final RecalDatum empiricalQualQS = qualityScoreTable.get3Keys(keySet[0], keySet[1], BASE_SUBSTITUTION_INDEX);
for (int i = specialCovariateCount; i < totalCovariateCount; i++) {
if (keySet[i] >= 0) {
empiricalQualCovsArgs[i - specialCovariateCount] = recalibrationTables.getTable(i).get4Keys(keySet[0], keySet[1], keySet[i], BASE_SUBSTITUTION_INDEX);
}
}
final double recalibratedQualDouble = hierarchicalBayesianQualityEstimate(epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovsArgs);
final byte recalibratedQualityScore = quantizedQuals.get(getRecalibratedQual(recalibratedQualDouble));
// Bin to static quals
quals[offset] = staticQuantizedMapping == null ? recalibratedQualityScore : staticQuantizedMapping[recalibratedQualityScore];
}
read.setBaseQualities(quals);
return read;
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.ReadCovariates in project gatk by broadinstitute.
the class ReadRecalibrationInfoUnitTest method testReadInfo.
@Test(dataProvider = "InfoProvider")
public void testReadInfo(final int readLength, final boolean includeIndelErrors) {
final ReadCovariates covariates = new ReadCovariates(readLength, 2, new CovariateKeyCache());
final byte[] bases = new byte[readLength];
final byte[] baseQuals = new byte[readLength];
final byte[] insertionQuals = new byte[readLength];
final byte[] deletionQuals = new byte[readLength];
final boolean[] skips = new boolean[readLength];
final double[] snpErrors = new double[readLength];
final double[] insertionErrors = new double[readLength];
final double[] deletionsErrors = new double[readLength];
for (int i = 0; i < readLength; i++) {
bases[i] = 'A';
baseQuals[i] = (byte) (i % SAMUtils.MAX_PHRED_SCORE);
insertionQuals[i] = (byte) ((i + 1) % SAMUtils.MAX_PHRED_SCORE);
deletionQuals[i] = (byte) ((i + 2) % SAMUtils.MAX_PHRED_SCORE);
skips[i] = i % 2 == 0;
snpErrors[i] = 1.0 / (i + 1);
insertionErrors[i] = 0.5 / (i + 1);
deletionsErrors[i] = 0.3 / (i + 1);
}
final EnumMap<EventType, double[]> errors = new EnumMap<>(EventType.class);
errors.put(EventType.BASE_SUBSTITUTION, snpErrors);
errors.put(EventType.BASE_INSERTION, insertionErrors);
errors.put(EventType.BASE_DELETION, deletionsErrors);
final EnumMap<EventType, byte[]> quals = new EnumMap<>(EventType.class);
quals.put(EventType.BASE_SUBSTITUTION, baseQuals);
quals.put(EventType.BASE_INSERTION, insertionQuals);
quals.put(EventType.BASE_DELETION, deletionQuals);
final GATKRead read = ArtificialReadUtils.createArtificialRead(bases, baseQuals, readLength + "M");
if (includeIndelErrors) {
ReadUtils.setInsertionBaseQualities(read, insertionQuals);
ReadUtils.setDeletionBaseQualities(read, deletionQuals);
}
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skips, snpErrors, insertionErrors, deletionsErrors);
Assert.assertEquals(info.getCovariatesValues(), covariates);
Assert.assertEquals(info.getRead(), read);
for (int i = 0; i < readLength; i++) {
Assert.assertEquals(info.skip(i), skips[i]);
for (final EventType et : EventType.values()) {
Assert.assertEquals(info.getErrorFraction(et, i), errors.get(et)[i]);
final byte expectedQual = et == EventType.BASE_SUBSTITUTION || includeIndelErrors ? quals.get(et)[i] : ReadUtils.DEFAULT_INSERTION_DELETION_QUAL;
Assert.assertEquals(info.getQual(et, i), expectedQual);
}
}
}
use of org.broadinstitute.hellbender.utils.recalibration.covariates.ReadCovariates in project gatk by broadinstitute.
the class BaseRecalibrationEngine method updateRecalTablesForRead.
/**
* Update the recalibration statistics using the information in recalInfo
* @param recalInfo data structure holding information about the recalibration values for a single read
*/
private void updateRecalTablesForRead(final ReadRecalibrationInfo recalInfo) {
Utils.validate(!finalized, "FinalizeData() has already been called");
final GATKRead read = recalInfo.getRead();
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
final NestedIntegerArray<RecalDatum> qualityScoreTable = recalTables.getQualityScoreTable();
final int nCovariates = covariates.size();
final int nSpecialCovariates = covariates.numberOfSpecialCovariates();
final int readLength = read.getLength();
for (int offset = 0; offset < readLength; offset++) {
if (!recalInfo.skip(offset)) {
for (int idx = 0; idx < cachedEventTypes.length; idx++) {
//Note: we loop explicitly over cached values for speed
final EventType eventType = cachedEventTypes[idx];
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.ordinal();
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
final int key0 = keys[0];
final int key1 = keys[1];
RecalUtils.incrementDatumOrPutIfNecessary3keys(qualityScoreTable, qual, isError, key0, key1, eventIndex);
for (int i = nSpecialCovariates; i < nCovariates; i++) {
final int keyi = keys[i];
if (keyi >= 0) {
RecalUtils.incrementDatumOrPutIfNecessary4keys(recalTables.getTable(i), qual, isError, key0, key1, keyi, eventIndex);
}
}
}
}
}
}
Aggregations