use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ApplyVQSR method doAlleleSpecificFiltering.
/**
* Calculate the allele-specific filter status of vc
* @param vc
* @param recals
* @param builder is modified by adding attributes
* @return a String with the filter status for this site
*/
private String doAlleleSpecificFiltering(final VariantContext vc, final List<VariantContext> recals, final VariantContextBuilder builder) {
double bestLod = VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE;
final List<String> culpritStrings = new ArrayList<>();
final List<String> lodStrings = new ArrayList<>();
final List<String> AS_filterStrings = new ArrayList<>();
String[] prevCulpritList = null;
String[] prevLodList = null;
String[] prevASfiltersList = null;
//get VQSR annotations from previous run of ApplyRecalibration, if applicable
if (foundINDELTranches || foundSNPTranches) {
final String prevCulprits = vc.getAttributeAsString(GATKVCFConstants.AS_CULPRIT_KEY, "");
prevCulpritList = prevCulprits.isEmpty() ? new String[0] : prevCulprits.split(listPrintSeparator);
final String prevLodString = vc.getAttributeAsString(GATKVCFConstants.AS_VQS_LOD_KEY, "");
prevLodList = prevLodString.isEmpty() ? new String[0] : prevLodString.split(listPrintSeparator);
final String prevASfilters = vc.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "");
prevASfiltersList = prevASfilters.isEmpty() ? new String[0] : prevASfilters.split(listPrintSeparator);
}
//for each allele in the current VariantContext
for (int altIndex = 0; altIndex < vc.getNAlleles() - 1; altIndex++) {
final Allele allele = vc.getAlternateAllele(altIndex);
//if the current allele is not part of this recalibration mode, add its annotations to the list and go to the next allele
if (!VariantDataManager.checkVariationClass(vc, allele, MODE)) {
updateAnnotationsWithoutRecalibrating(altIndex, prevCulpritList, prevLodList, prevASfiltersList, culpritStrings, lodStrings, AS_filterStrings);
continue;
}
//if the current allele does need to have recalibration applied...
//initialize allele-specific VQSR annotation data with values for spanning deletion
String alleleLodString = emptyFloatValue;
String alleleFilterString = emptyStringValue;
String alleleCulpritString = emptyStringValue;
//if it's not a spanning deletion, replace those allele strings with the real values
if (!GATKVCFConstants.isSpanningDeletion(allele)) {
VariantContext recalDatum = getMatchingRecalVC(vc, recals, allele);
if (recalDatum == null) {
throw new UserException("Encountered input allele which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants with flag -AS. First seen at: " + vc);
}
//compare VQSLODs for all alleles in the current mode for filtering later
final double lod = recalDatum.getAttributeAsDouble(GATKVCFConstants.VQS_LOD_KEY, VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE);
if (lod > bestLod)
bestLod = lod;
alleleLodString = String.format("%.4f", lod);
alleleFilterString = generateFilterString(lod);
alleleCulpritString = recalDatum.getAttributeAsString(GATKVCFConstants.CULPRIT_KEY, ".");
if (recalDatum != null) {
if (recalDatum.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY))
builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
if (recalDatum.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY))
builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
}
}
//append per-allele VQSR annotations
lodStrings.add(alleleLodString);
AS_filterStrings.add(alleleFilterString);
culpritStrings.add(alleleCulpritString);
}
// Annotate the new record with its VQSLOD, AS_FilterStatus, and the worst performing annotation
if (!AS_filterStrings.isEmpty())
builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeStringList(AS_filterStrings));
if (!lodStrings.isEmpty())
builder.attribute(GATKVCFConstants.AS_VQS_LOD_KEY, AnnotationUtils.encodeStringList(lodStrings));
if (!culpritStrings.isEmpty())
builder.attribute(GATKVCFConstants.AS_CULPRIT_KEY, AnnotationUtils.encodeStringList(culpritStrings));
return generateFilterStringFromAlleles(vc, bestLod);
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class ASEReadCounter method filterPileup.
private ReadPileup filterPileup(final ReadPileup originalPileup, final CountPileupType countType) {
SAMFileHeader header = getHeaderForReads();
final ReadPileup pileupWithDeletions;
switch(countType) {
case COUNT_FRAGMENTS_REQUIRE_SAME_BASE:
pileupWithDeletions = originalPileup.getOverlappingFragmentFilteredPileup(true, ReadPileup.baseQualTieBreaker, header);
break;
case COUNT_READS:
pileupWithDeletions = originalPileup;
break;
case COUNT_FRAGMENTS:
pileupWithDeletions = originalPileup.getOverlappingFragmentFilteredPileup(false, ReadPileup.baseQualTieBreaker, header);
break;
default:
throw new UserException("Must use valid CountPileupType");
}
return pileupWithDeletions.makeFilteredPileup(p -> !p.isDeletion());
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk-protected by broadinstitute.
the class CoverageModelEMWorkspace method getBiasCovariatesRegularizedLinearOperators.
/**
* Returns the pair of (linear operator, preconditioner) invoked in the M-step update of bias
* covariates in w/ regularization
*
* @return a pair of linear operators
*/
@EvaluatesRDD
private ImmutablePair<GeneralLinearOperator<INDArray>, GeneralLinearOperator<INDArray>> getBiasCovariatesRegularizedLinearOperators() {
final INDArray Q_ll, Q_tll, Z_ll;
final GeneralLinearOperator<INDArray> linop, precond;
final FourierLinearOperatorNDArray regularizerFourierLinearOperator = createRegularizerFourierLinearOperator();
switch(config.getWSolverType()) {
case LOCAL:
/* fetch the required INDArrays */
Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
Q_tll = fetchFromWorkers(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.Q_tll, 0);
Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
/* instantiate the local implementation of linear operators */
linop = new CoverageModelWLinearOperatorLocal(Q_tll, Z_ll, regularizerFourierLinearOperator);
precond = new CoverageModelWPreconditionerLocal(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets);
return ImmutablePair.of(linop, precond);
case SPARK:
if (!sparkContextIsAvailable) {
throw new UserException("The Spark W solver is only available in the Spark mode");
}
/* fetch the required INDArrays */
Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
/* instantiate the spark implementation of linear operators */
linop = new CoverageModelWLinearOperatorSpark(Z_ll, regularizerFourierLinearOperator, numTargets, ctx, computeRDD, targetBlocks);
precond = new CoverageModelWPreconditionerSpark(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets, ctx, numTargetBlocks);
return ImmutablePair.of(linop, precond);
default:
throw new IllegalArgumentException("The solver type for M-step update of bias covariates" + " is not properly set");
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk-protected by broadinstitute.
the class AllelicCountCollector method collect.
/**
* Returns an {@link AllelicCountCollection} based on the pileup at sites (specified by an interval list)
* in a sorted BAM file. Reads and bases below the specified mapping quality and base quality, respectively,
* are filtered out of the pileup. The alt count is defined as the total count minus the ref count, and the
* alt nucleotide is defined as the non-ref base with the highest count, with ties broken by the order of the
* bases in {@link AllelicCountCollector#BASES}.
* @param bamFile sorted BAM file
* @param siteIntervals interval list of sites
* @param minMappingQuality minimum mapping quality required for reads to be included in pileup
* @param minBaseQuality minimum base quality required for bases to be included in pileup
* @return AllelicCountCollection of ref/alt counts at sites in BAM file
*/
public AllelicCountCollection collect(final File bamFile, final IntervalList siteIntervals, final int minMappingQuality, final int minBaseQuality) {
try (final SamReader reader = readerFactory.open(bamFile)) {
ParamUtils.isPositiveOrZero(minMappingQuality, "Minimum mapping quality must be nonnegative.");
ParamUtils.isPositiveOrZero(minBaseQuality, "Minimum base quality must be nonnegative.");
if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException.BadInput("BAM file " + bamFile.toString() + " must be coordinate sorted.");
}
final int numberOfSites = siteIntervals.size();
final boolean useIndex = numberOfSites < MAX_INTERVALS_FOR_INDEX;
final SamLocusIterator locusIterator = new SamLocusIterator(reader, siteIntervals, useIndex);
//set read and locus filters [note: read counts match IGV, but off by a few from pysam.mpileup]
final List<SamRecordFilter> samFilters = Arrays.asList(new NotPrimaryAlignmentFilter(), new DuplicateReadFilter());
locusIterator.setSamFilters(samFilters);
locusIterator.setEmitUncoveredLoci(true);
locusIterator.setIncludeNonPfReads(false);
locusIterator.setMappingQualityScoreCutoff(minMappingQuality);
locusIterator.setQualityScoreCutoff(minBaseQuality);
logger.info("Examining " + numberOfSites + " sites in total...");
int locusCount = 0;
final AllelicCountCollection counts = new AllelicCountCollection();
for (final SamLocusIterator.LocusInfo locus : locusIterator) {
if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
logger.info("Examined " + locusCount + " sites.");
}
locusCount++;
final Nucleotide refBase = Nucleotide.valueOf(referenceWalker.get(locus.getSequenceIndex()).getBases()[locus.getPosition() - 1]);
if (!BASES.contains(refBase)) {
logger.warn(String.format("The reference position at %d has an unknown base call (value: %s). Skipping...", locus.getPosition(), refBase.toString()));
continue;
}
final Nucleotide.Counter baseCounts = getPileupBaseCounts(locus);
//only include total ACGT counts in binomial test (exclude N, etc.)
final int totalBaseCount = BASES.stream().mapToInt(b -> (int) baseCounts.get(b)).sum();
final int refReadCount = (int) baseCounts.get(refBase);
//we take alt = total - ref instead of the actual alt count
final int altReadCount = totalBaseCount - refReadCount;
final Nucleotide altBase = inferAltFromPileupBaseCounts(baseCounts, refBase);
counts.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), refReadCount, altReadCount, refBase, altBase));
}
logger.info(locusCount + " sites out of " + numberOfSites + " total sites were examined.");
return counts;
} catch (final IOException | SAMFormatException e) {
throw new UserException("Unable to collect allelic counts from " + bamFile);
}
}
use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.
the class CoverageModelEMWorkspace method getBiasCovariatesRegularizedLinearOperators.
/**
* Returns the pair of (linear operator, preconditioner) invoked in the M-step update of bias
* covariates in w/ regularization
*
* @return a pair of linear operators
*/
@EvaluatesRDD
private ImmutablePair<GeneralLinearOperator<INDArray>, GeneralLinearOperator<INDArray>> getBiasCovariatesRegularizedLinearOperators() {
final INDArray Q_ll, Q_tll, Z_ll;
final GeneralLinearOperator<INDArray> linop, precond;
final FourierLinearOperatorNDArray regularizerFourierLinearOperator = createRegularizerFourierLinearOperator();
switch(config.getWSolverType()) {
case LOCAL:
/* fetch the required INDArrays */
Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
Q_tll = fetchFromWorkers(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.Q_tll, 0);
Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
/* instantiate the local implementation of linear operators */
linop = new CoverageModelWLinearOperatorLocal(Q_tll, Z_ll, regularizerFourierLinearOperator);
precond = new CoverageModelWPreconditionerLocal(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets);
return ImmutablePair.of(linop, precond);
case SPARK:
if (!sparkContextIsAvailable) {
throw new UserException("The Spark W solver is only available in the Spark mode");
}
/* fetch the required INDArrays */
Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
/* instantiate the spark implementation of linear operators */
linop = new CoverageModelWLinearOperatorSpark(Z_ll, regularizerFourierLinearOperator, numTargets, ctx, computeRDD, targetBlocks);
precond = new CoverageModelWPreconditionerSpark(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets, ctx, numTargetBlocks);
return ImmutablePair.of(linop, precond);
default:
throw new IllegalArgumentException("The solver type for M-step update of bias covariates" + " is not properly set");
}
}
Aggregations