Search in sources :

Example 71 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ApplyVQSR method doAlleleSpecificFiltering.

/**
     * Calculate the allele-specific filter status of vc
     * @param vc
     * @param recals
     * @param builder   is modified by adding attributes
     * @return a String with the filter status for this site
     */
private String doAlleleSpecificFiltering(final VariantContext vc, final List<VariantContext> recals, final VariantContextBuilder builder) {
    double bestLod = VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE;
    final List<String> culpritStrings = new ArrayList<>();
    final List<String> lodStrings = new ArrayList<>();
    final List<String> AS_filterStrings = new ArrayList<>();
    String[] prevCulpritList = null;
    String[] prevLodList = null;
    String[] prevASfiltersList = null;
    //get VQSR annotations from previous run of ApplyRecalibration, if applicable
    if (foundINDELTranches || foundSNPTranches) {
        final String prevCulprits = vc.getAttributeAsString(GATKVCFConstants.AS_CULPRIT_KEY, "");
        prevCulpritList = prevCulprits.isEmpty() ? new String[0] : prevCulprits.split(listPrintSeparator);
        final String prevLodString = vc.getAttributeAsString(GATKVCFConstants.AS_VQS_LOD_KEY, "");
        prevLodList = prevLodString.isEmpty() ? new String[0] : prevLodString.split(listPrintSeparator);
        final String prevASfilters = vc.getAttributeAsString(GATKVCFConstants.AS_FILTER_STATUS_KEY, "");
        prevASfiltersList = prevASfilters.isEmpty() ? new String[0] : prevASfilters.split(listPrintSeparator);
    }
    //for each allele in the current VariantContext
    for (int altIndex = 0; altIndex < vc.getNAlleles() - 1; altIndex++) {
        final Allele allele = vc.getAlternateAllele(altIndex);
        //if the current allele is not part of this recalibration mode, add its annotations to the list and go to the next allele
        if (!VariantDataManager.checkVariationClass(vc, allele, MODE)) {
            updateAnnotationsWithoutRecalibrating(altIndex, prevCulpritList, prevLodList, prevASfiltersList, culpritStrings, lodStrings, AS_filterStrings);
            continue;
        }
        //if the current allele does need to have recalibration applied...
        //initialize allele-specific VQSR annotation data with values for spanning deletion
        String alleleLodString = emptyFloatValue;
        String alleleFilterString = emptyStringValue;
        String alleleCulpritString = emptyStringValue;
        //if it's not a spanning deletion, replace those allele strings with the real values
        if (!GATKVCFConstants.isSpanningDeletion(allele)) {
            VariantContext recalDatum = getMatchingRecalVC(vc, recals, allele);
            if (recalDatum == null) {
                throw new UserException("Encountered input allele which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants with flag -AS. First seen at: " + vc);
            }
            //compare VQSLODs for all alleles in the current mode for filtering later
            final double lod = recalDatum.getAttributeAsDouble(GATKVCFConstants.VQS_LOD_KEY, VariantRecalibratorEngine.MIN_ACCEPTABLE_LOD_SCORE);
            if (lod > bestLod)
                bestLod = lod;
            alleleLodString = String.format("%.4f", lod);
            alleleFilterString = generateFilterString(lod);
            alleleCulpritString = recalDatum.getAttributeAsString(GATKVCFConstants.CULPRIT_KEY, ".");
            if (recalDatum != null) {
                if (recalDatum.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY))
                    builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
                if (recalDatum.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY))
                    builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
            }
        }
        //append per-allele VQSR annotations
        lodStrings.add(alleleLodString);
        AS_filterStrings.add(alleleFilterString);
        culpritStrings.add(alleleCulpritString);
    }
    // Annotate the new record with its VQSLOD, AS_FilterStatus, and the worst performing annotation
    if (!AS_filterStrings.isEmpty())
        builder.attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, AnnotationUtils.encodeStringList(AS_filterStrings));
    if (!lodStrings.isEmpty())
        builder.attribute(GATKVCFConstants.AS_VQS_LOD_KEY, AnnotationUtils.encodeStringList(lodStrings));
    if (!culpritStrings.isEmpty())
        builder.attribute(GATKVCFConstants.AS_CULPRIT_KEY, AnnotationUtils.encodeStringList(culpritStrings));
    return generateFilterStringFromAlleles(vc, bestLod);
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) VariantContext(htsjdk.variant.variantcontext.VariantContext) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 72 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class ASEReadCounter method filterPileup.

private ReadPileup filterPileup(final ReadPileup originalPileup, final CountPileupType countType) {
    SAMFileHeader header = getHeaderForReads();
    final ReadPileup pileupWithDeletions;
    switch(countType) {
        case COUNT_FRAGMENTS_REQUIRE_SAME_BASE:
            pileupWithDeletions = originalPileup.getOverlappingFragmentFilteredPileup(true, ReadPileup.baseQualTieBreaker, header);
            break;
        case COUNT_READS:
            pileupWithDeletions = originalPileup;
            break;
        case COUNT_FRAGMENTS:
            pileupWithDeletions = originalPileup.getOverlappingFragmentFilteredPileup(false, ReadPileup.baseQualTieBreaker, header);
            break;
        default:
            throw new UserException("Must use valid CountPileupType");
    }
    return pileupWithDeletions.makeFilteredPileup(p -> !p.isDeletion());
}
Also used : ReadPileup(org.broadinstitute.hellbender.utils.pileup.ReadPileup) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 73 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk-protected by broadinstitute.

the class CoverageModelEMWorkspace method getBiasCovariatesRegularizedLinearOperators.

/**
     * Returns the pair of (linear operator, preconditioner) invoked in the M-step update of bias
     * covariates in w/ regularization
     *
     * @return a pair of linear operators
     */
@EvaluatesRDD
private ImmutablePair<GeneralLinearOperator<INDArray>, GeneralLinearOperator<INDArray>> getBiasCovariatesRegularizedLinearOperators() {
    final INDArray Q_ll, Q_tll, Z_ll;
    final GeneralLinearOperator<INDArray> linop, precond;
    final FourierLinearOperatorNDArray regularizerFourierLinearOperator = createRegularizerFourierLinearOperator();
    switch(config.getWSolverType()) {
        case LOCAL:
            /* fetch the required INDArrays */
            Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
            Q_tll = fetchFromWorkers(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.Q_tll, 0);
            Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
            /* instantiate the local implementation of linear operators */
            linop = new CoverageModelWLinearOperatorLocal(Q_tll, Z_ll, regularizerFourierLinearOperator);
            precond = new CoverageModelWPreconditionerLocal(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets);
            return ImmutablePair.of(linop, precond);
        case SPARK:
            if (!sparkContextIsAvailable) {
                throw new UserException("The Spark W solver is only available in the Spark mode");
            }
            /* fetch the required INDArrays */
            Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
            Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
            /* instantiate the spark implementation of linear operators */
            linop = new CoverageModelWLinearOperatorSpark(Z_ll, regularizerFourierLinearOperator, numTargets, ctx, computeRDD, targetBlocks);
            precond = new CoverageModelWPreconditionerSpark(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets, ctx, numTargetBlocks);
            return ImmutablePair.of(linop, precond);
        default:
            throw new IllegalArgumentException("The solver type for M-step update of bias covariates" + " is not properly set");
    }
}
Also used : FourierLinearOperatorNDArray(org.broadinstitute.hellbender.tools.coveragemodel.linalg.FourierLinearOperatorNDArray) INDArray(org.nd4j.linalg.api.ndarray.INDArray) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 74 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk-protected by broadinstitute.

the class AllelicCountCollector method collect.

/**
     * Returns an {@link AllelicCountCollection} based on the pileup at sites (specified by an interval list)
     * in a sorted BAM file.  Reads and bases below the specified mapping quality and base quality, respectively,
     * are filtered out of the pileup.  The alt count is defined as the total count minus the ref count, and the
     * alt nucleotide is defined as the non-ref base with the highest count, with ties broken by the order of the
     * bases in {@link AllelicCountCollector#BASES}.
     * @param bamFile           sorted BAM file
     * @param siteIntervals     interval list of sites
     * @param minMappingQuality minimum mapping quality required for reads to be included in pileup
     * @param minBaseQuality    minimum base quality required for bases to be included in pileup
     * @return                  AllelicCountCollection of ref/alt counts at sites in BAM file
     */
public AllelicCountCollection collect(final File bamFile, final IntervalList siteIntervals, final int minMappingQuality, final int minBaseQuality) {
    try (final SamReader reader = readerFactory.open(bamFile)) {
        ParamUtils.isPositiveOrZero(minMappingQuality, "Minimum mapping quality must be nonnegative.");
        ParamUtils.isPositiveOrZero(minBaseQuality, "Minimum base quality must be nonnegative.");
        if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new UserException.BadInput("BAM file " + bamFile.toString() + " must be coordinate sorted.");
        }
        final int numberOfSites = siteIntervals.size();
        final boolean useIndex = numberOfSites < MAX_INTERVALS_FOR_INDEX;
        final SamLocusIterator locusIterator = new SamLocusIterator(reader, siteIntervals, useIndex);
        //set read and locus filters [note: read counts match IGV, but off by a few from pysam.mpileup]
        final List<SamRecordFilter> samFilters = Arrays.asList(new NotPrimaryAlignmentFilter(), new DuplicateReadFilter());
        locusIterator.setSamFilters(samFilters);
        locusIterator.setEmitUncoveredLoci(true);
        locusIterator.setIncludeNonPfReads(false);
        locusIterator.setMappingQualityScoreCutoff(minMappingQuality);
        locusIterator.setQualityScoreCutoff(minBaseQuality);
        logger.info("Examining " + numberOfSites + " sites in total...");
        int locusCount = 0;
        final AllelicCountCollection counts = new AllelicCountCollection();
        for (final SamLocusIterator.LocusInfo locus : locusIterator) {
            if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
                logger.info("Examined " + locusCount + " sites.");
            }
            locusCount++;
            final Nucleotide refBase = Nucleotide.valueOf(referenceWalker.get(locus.getSequenceIndex()).getBases()[locus.getPosition() - 1]);
            if (!BASES.contains(refBase)) {
                logger.warn(String.format("The reference position at %d has an unknown base call (value: %s). Skipping...", locus.getPosition(), refBase.toString()));
                continue;
            }
            final Nucleotide.Counter baseCounts = getPileupBaseCounts(locus);
            //only include total ACGT counts in binomial test (exclude N, etc.)
            final int totalBaseCount = BASES.stream().mapToInt(b -> (int) baseCounts.get(b)).sum();
            final int refReadCount = (int) baseCounts.get(refBase);
            //we take alt = total - ref instead of the actual alt count
            final int altReadCount = totalBaseCount - refReadCount;
            final Nucleotide altBase = inferAltFromPileupBaseCounts(baseCounts, refBase);
            counts.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), refReadCount, altReadCount, refBase, altBase));
        }
        logger.info(locusCount + " sites out of " + numberOfSites + " total sites were examined.");
        return counts;
    } catch (final IOException | SAMFormatException e) {
        throw new UserException("Unable to collect allelic counts from " + bamFile);
    }
}
Also used : Arrays(java.util.Arrays) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) IntervalList(htsjdk.samtools.util.IntervalList) IOException(java.io.IOException) Nucleotide(org.broadinstitute.hellbender.utils.Nucleotide) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) NotPrimaryAlignmentFilter(htsjdk.samtools.filter.NotPrimaryAlignmentFilter) List(java.util.List) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) Utils(org.broadinstitute.hellbender.utils.Utils) htsjdk.samtools(htsjdk.samtools) LogManager(org.apache.logging.log4j.LogManager) Collections(java.util.Collections) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) IOException(java.io.IOException) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) NotPrimaryAlignmentFilter(htsjdk.samtools.filter.NotPrimaryAlignmentFilter) Nucleotide(org.broadinstitute.hellbender.utils.Nucleotide) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 75 with UserException

use of org.broadinstitute.hellbender.exceptions.UserException in project gatk by broadinstitute.

the class CoverageModelEMWorkspace method getBiasCovariatesRegularizedLinearOperators.

/**
     * Returns the pair of (linear operator, preconditioner) invoked in the M-step update of bias
     * covariates in w/ regularization
     *
     * @return a pair of linear operators
     */
@EvaluatesRDD
private ImmutablePair<GeneralLinearOperator<INDArray>, GeneralLinearOperator<INDArray>> getBiasCovariatesRegularizedLinearOperators() {
    final INDArray Q_ll, Q_tll, Z_ll;
    final GeneralLinearOperator<INDArray> linop, precond;
    final FourierLinearOperatorNDArray regularizerFourierLinearOperator = createRegularizerFourierLinearOperator();
    switch(config.getWSolverType()) {
        case LOCAL:
            /* fetch the required INDArrays */
            Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
            Q_tll = fetchFromWorkers(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.Q_tll, 0);
            Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
            /* instantiate the local implementation of linear operators */
            linop = new CoverageModelWLinearOperatorLocal(Q_tll, Z_ll, regularizerFourierLinearOperator);
            precond = new CoverageModelWPreconditionerLocal(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets);
            return ImmutablePair.of(linop, precond);
        case SPARK:
            if (!sparkContextIsAvailable) {
                throw new UserException("The Spark W solver is only available in the Spark mode");
            }
            /* fetch the required INDArrays */
            Q_ll = mapWorkersAndReduce(cb -> cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.sum_Q_ll), INDArray::add).div(numTargets);
            Z_ll = sampleBiasLatentPosteriorSecondMoments.sum(0);
            /* instantiate the spark implementation of linear operators */
            linop = new CoverageModelWLinearOperatorSpark(Z_ll, regularizerFourierLinearOperator, numTargets, ctx, computeRDD, targetBlocks);
            precond = new CoverageModelWPreconditionerSpark(Q_ll, Z_ll, regularizerFourierLinearOperator, numTargets, ctx, numTargetBlocks);
            return ImmutablePair.of(linop, precond);
        default:
            throw new IllegalArgumentException("The solver type for M-step update of bias covariates" + " is not properly set");
    }
}
Also used : FourierLinearOperatorNDArray(org.broadinstitute.hellbender.tools.coveragemodel.linalg.FourierLinearOperatorNDArray) INDArray(org.nd4j.linalg.api.ndarray.INDArray) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Aggregations

UserException (org.broadinstitute.hellbender.exceptions.UserException)100 File (java.io.File)30 IOException (java.io.IOException)30 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)11 SamReader (htsjdk.samtools.SamReader)10 VariantContext (htsjdk.variant.variantcontext.VariantContext)10 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)10 SAMRecord (htsjdk.samtools.SAMRecord)9 IntervalList (htsjdk.samtools.util.IntervalList)9 List (java.util.List)9 SAMFileHeader (htsjdk.samtools.SAMFileHeader)8 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)8 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)8 LogManager (org.apache.logging.log4j.LogManager)8 Logger (org.apache.logging.log4j.Logger)8 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)7 MetricsFile (htsjdk.samtools.metrics.MetricsFile)6 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)5 FileNotFoundException (java.io.FileNotFoundException)5