use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class BAQ method encodeBQTag.
public static String encodeBQTag(GATKRead read, byte[] baq) {
// Offset to base alignment quality (BAQ), of the same length as the read sequence.
// At the i-th read base, BAQi = Qi - (BQi - 64) where Qi is the i-th base quality.
// so BQi = Qi - BAQi + 64
final byte[] bqTag = new byte[baq.length];
final byte[] baseQualities = read.getBaseQualities();
for (int i = 0; i < bqTag.length; i++) {
final int bq = baseQualities[i] + 64;
final int baq_i = baq[i];
final int tag = bq - baq_i;
// problem with the calculation of the correction factor; this is our problem
if (tag < 0)
throw new GATKException("BAQ tag calculation error. BAQ value above base quality at " + read);
// the original quality is too high, almost certainly due to using the wrong encoding in the BAM file
if (tag > Byte.MAX_VALUE)
throw new UserException.MisencodedQualityScoresRead(read, "we encountered an extremely high quality score (" + (int) baseQualities[i] + ") with BAQ correction factor of " + baq_i);
bqTag[i] = (byte) tag;
}
return new String(bqTag);
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class ClippingOp method applySOFTCLIP_BASES.
private GATKRead applySOFTCLIP_BASES(final GATKRead readCopied, final boolean runAsserts) {
if (readCopied.isUnmapped()) {
// we can't process unmapped reads
throw new UserException("Read Clipper cannot soft clip unmapped reads");
}
int myStop = stop;
if ((stop + 1 - start) == readCopied.getLength()) {
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
// just decrement stop
myStop--;
}
if (start > 0 && myStop != readCopied.getLength() - 1) {
throw new GATKException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", readCopied.getName(), start, myStop));
}
final Cigar oldCigar = readCopied.getCigar();
int scLeft = 0;
int scRight = readCopied.getLength();
if (start == 0) {
scLeft = myStop + 1;
} else {
scRight = start;
}
final Cigar newCigar = softClip(oldCigar, scLeft, scRight, runAsserts);
readCopied.setCigar(newCigar);
final int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
final int newStart = readCopied.getStart() + newClippedStart;
readCopied.setPosition(readCopied.getContig(), newStart);
return readCopied;
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class ReferenceAPISourceUnitTest method setupPipeline.
private Pipeline setupPipeline() {
// We create GATKGCSOptions instead of DataflowPipelineOptions to keep track of the secrets
// so we can read the reference.
final GATKGCSOptions options = PipelineOptionsFactory.as(GATKGCSOptions.class);
options.setProject(getGCPTestProject());
if (getGCPTestApiKey() != null) {
options.setApiKey(getGCPTestApiKey());
// put a serialized version of the credentials in the pipelineOptions, so we can get to it later.
try {
GenomicsFactory.OfflineAuth auth = GCSOptions.Methods.createGCSAuth(options);
GATKGCSOptions.Methods.setOfflineAuth(options, auth);
} catch (Exception x) {
throw new GATKException("Error with credentials", x);
}
}
// We don't use GATKTestPipeline because we need specific options.
return TestPipeline.create(options);
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk-protected by broadinstitute.
the class RCBSSegmenter method writeSegmentFile.
/**
* Create a segmentation file using CBS in R.
*
* <p>https://www.bioconductor.org/packages/release/bioc/manuals/DNAcopy/man/DNAcopy.pdf</p>
*
* <p>Please see the above documentation for a more detailed description of the parameters.</p>
*
* <p>IMPORTANT: There is no check that the weights have the same number of entries as the tnFile.</p>
*
* Wraps the call:
* segment(x, weights = NULL, alpha = 0.01, nperm = 10000, p.method =
* c("hybrid", "perm"), min.width=2, kmax=25, nmin=200,
* eta=0.05, sbdry=NULL, trim = 0.025, undo.splits =
* c("none", "prune", "sdundo"), undo.prune=0.05,
* undo.SD=3, verbose=1)
*
* <p> Note that sbdry and verbose are unavailable through this interface. </p>
*
* @param sampleName Name of the sample being run through the segmenter. Never {@code null}
* @param tnFile Tangent-normalized targets file. Never {@code null}
* @param outputFile Full path to the outputted segment file. Never {@code null}
* @param log whether the tnFile input has already been put into log2CR. Never {@code null}
* @param minWidth minimum length for a segment
* @param weightFile File containing weights for each target (doubles; one per line). Must be the same length as what is in the tnFile (note that this is not
* enforced here). Typically, 1/var(target) is what is used. All values must be greater than 0.
* Use {@code null} if weighting is not desired. These values should not be log space.
*/
public static void writeSegmentFile(final String sampleName, final String tnFile, final String outputFile, final Boolean log, final File weightFile, final double alpha, final int nperm, final PMethod pmethod, final int minWidth, final int kmax, final int nmin, final double eta, final double trim, final UndoSplits undoSplits, final double undoPrune, final int undoSD) {
String logArg = log ? "TRUE" : "FALSE";
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, RCBSSegmenter.class));
/*--args is needed for Rscript to recognize other arguments properly*/
executor.addArgs("--args", "--sample_name=" + sampleName, "--targets_file=" + tnFile, "--output_file=" + outputFile, "--log2_input=" + logArg, "--min_width=" + String.valueOf(minWidth), "--alpha=" + String.valueOf(alpha), "--nperm=" + String.valueOf(nperm), "--pmethod=" + pmethod.toString(), "--kmax=" + String.valueOf(kmax), "--nmin=" + String.valueOf(nmin), "--eta=" + String.valueOf(eta), "--trim=" + String.valueOf(trim), "--undosplits=" + undoSplits.toString(), "--undoprune=" + String.valueOf(undoPrune), "--undoSD=" + String.valueOf(undoSD));
if (weightFile != null) {
final double[] weights = ParamUtils.readValuesFromFile(weightFile);
// Check to make sure that no weights are zero.
if (!DoubleStream.of(weights).allMatch(d -> d > 0 && !Double.isNaN(d) && Double.isFinite(d))) {
throw new GATKException("A weight for a target was zero or less, which is not allowed. If you truly want zero, you must remove the target from consideration.");
}
// Add the argument to specify weights
executor.addArgs("--weights_file=" + weightFile.getAbsolutePath());
}
executor.exec();
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class GeneralPloidyExactAFCalculator method computeLofK.
/**
* Compute likelihood of a particular AC conformation and update AFresult object
* @param set Set of AC counts to compute
* @param firstGLs Original pool likelihoods before combining
* @param secondGL New GL vector with additional pool
* @param log10AlleleFrequencyPriors Allele frequency priors
* @param numAlleles Number of alleles (including ref)
* @param ploidy1 Ploidy of original pool (combined)
* @param ploidy2 Ploidy of new pool
* @return log-likelihood of requested conformation
*/
private double computeLofK(final ExactACset set, final CombinedPoolLikelihoods firstGLs, final double[] secondGL, final double[] log10AlleleFrequencyPriors, final int numAlleles, final int ploidy1, final int ploidy2, final StateTracker stateTracker) {
final int newPloidy = ploidy1 + ploidy2;
// sanity check
int totalAltK = set.getACsum();
if (newPloidy != totalAltK) {
throw new GATKException("BUG: inconsistent sizes of set.getACsum and passed ploidy values");
}
totalAltK -= set.getACcounts().getCounts()[0];
// special case for k = 0 over all k
if (totalAltK == 0) {
// all-ref case
final double log10Lof0 = firstGLs.getGLOfACZero() + secondGL[HOM_REF_INDEX];
set.getLog10Likelihoods()[0] = log10Lof0;
stateTracker.setLog10LikelihoodOfAFzero(log10Lof0);
stateTracker.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]);
return log10Lof0;
} else {
// initialize result with denominator
// ExactACset holds by convention the conformation of all alleles, and the sum of all allele count is just the ploidy.
// To compute n!/k1!k2!k3!... we need to compute first n!/(k2!k3!...) and then further divide by k1! where k1=ploidy-sum_k_i
final int[] currentCount = set.getACcounts().getCounts();
final double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy2, numAlleles);
for (int PLIndex = 0; PLIndex < glCalc.genotypeCount(); PLIndex++) {
final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(PLIndex);
final int[] acCount2 = alleleCounts.alleleCountsByIndex(numAlleles - 1);
final int[] acCount1 = MathUtils.vectorDiff(currentCount, acCount2);
// for conformation to be valid, all elements of g2 have to be <= elements of current AC set
if (isValidConformation(acCount1, ploidy1) && firstGLs.hasConformation(acCount1)) {
final double gl2 = secondGL[PLIndex];
if (!Double.isInfinite(gl2)) {
final double firstGL = firstGLs.getLikelihoodOfConformation(acCount1);
final double num1 = MathUtils.log10MultinomialCoefficient(ploidy1, acCount1);
final double num2 = MathUtils.log10MultinomialCoefficient(ploidy2, acCount2);
final double sum = firstGL + gl2 + num1 + num2;
set.getLog10Likelihoods()[0] = MathUtils.approximateLog10SumLog10(set.getLog10Likelihoods()[0], sum);
}
}
}
set.getLog10Likelihoods()[0] += denom;
}
double log10LofK = set.getLog10Likelihoods()[0];
// update the MLE if necessary
final int[] altCounts = Arrays.copyOfRange(set.getACcounts().getCounts(), 1, set.getACcounts().getCounts().length);
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
stateTracker.updateMLEifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
// apply the priors over each alternate allele
for (final int ACcount : altCounts) {
if (ACcount > 0) {
log10LofK += log10AlleleFrequencyPriors[ACcount];
}
}
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
stateTracker.updateMAPifNeeded(Math.max(log10LofK, -Double.MAX_VALUE), altCounts);
return log10LofK;
}
Aggregations