Examples with ReferenceSequenceFileWalker - htsjdk.samtools.reference.ReferenceSequenceFileWalker

Example 6 with ReferenceSequenceFileWalker

use of htsjdk.samtools.reference.ReferenceSequenceFileWalker in project gatk by broadinstitute.

the class CountingPairedFilter method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    // Setup all the inputs
    final ProgressLogger progress = new ProgressLogger(logger, 10000000, "Processed", "loci");
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
    // Load up the reference sequence and double check sequence dictionaries
    if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
        SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), refWalker.getSequenceDictionary());
    }
    final SamLocusIterator iterator = new SamLocusIterator(in);
    final List<SamRecordFilter> filters = new ArrayList<>();
    final CountingFilter dupeFilter = new CountingDuplicateFilter();
    final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
    final CountingPairedFilter pairFilter = new CountingPairedFilter();
    filters.add(mapqFilter);
    filters.add(dupeFilter);
    filters.add(pairFilter);
    // Not a counting filter because we never want to count reads twice
    filters.add(new SecondaryAlignmentFilter());
    iterator.setSamFilters(filters);
    iterator.setEmitUncoveredLoci(true);
    // Handled separately because we want to count bases
    iterator.setMappingQualityScoreCutoff(0);
    // Handled separately because we want to count bases
    iterator.setQualityScoreCutoff(0);
    iterator.setIncludeNonPfReads(false);
    final int max = COVERAGE_CAP;
    final long[] HistogramArray = new long[max + 1];
    final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
    final boolean usingStopAfter = STOP_AFTER > 0;
    final long stopAfter = STOP_AFTER - 1;
    long counter = 0;
    long basesExcludedByBaseq = 0;
    long basesExcludedByOverlap = 0;
    long basesExcludedByCapping = 0;
    // Loop through all the loci
    while (iterator.hasNext()) {
        final SamLocusIterator.LocusInfo info = iterator.next();
        // Check that the reference is not N
        final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
        final byte base = ref.getBases()[info.getPosition() - 1];
        if (base == 'N')
            continue;
        // Figure out the coverage while not counting overlapping reads twice, and excluding various things
        final Set<String> readNames = new HashSet<>(info.getRecordAndOffsets().size());
        int pileupSize = 0;
        for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) {
            if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) {
                ++basesExcludedByBaseq;
                continue;
            }
            if (!readNames.add(recs.getRecord().getReadName())) {
                ++basesExcludedByOverlap;
                continue;
            }
            pileupSize++;
            if (pileupSize <= max) {
                baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
            }
        }
        final int depth = Math.min(readNames.size(), max);
        if (depth < readNames.size())
            basesExcludedByCapping += readNames.size() - max;
        HistogramArray[depth]++;
        // Record progress and perhaps stop
        progress.record(info.getSequenceName(), info.getPosition());
        if (usingStopAfter && ++counter > stopAfter)
            break;
    }
    // Construct and write the outputs
    final Histogram<Integer> histo = new Histogram<>("coverage", "count");
    for (int i = 0; i < HistogramArray.length; ++i) {
        histo.increment(i, HistogramArray[i]);
    }
    // Construct and write the outputs
    final Histogram<Integer> baseQHisto = new Histogram<>("value", "baseq_count");
    for (int i = 0; i < baseQHistogramArray.length; ++i) {
        baseQHisto.increment(i, baseQHistogramArray[i]);
    }
    final WgsMetrics metrics = generateWgsMetrics();
    metrics.GENOME_TERRITORY = (long) histo.getSumOfValues();
    metrics.MEAN_COVERAGE = histo.getMean();
    metrics.SD_COVERAGE = histo.getStandardDeviation();
    metrics.MEDIAN_COVERAGE = histo.getMedian();
    metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation();
    final long basesExcludedByDupes = dupeFilter.getFilteredBases();
    final long basesExcludedByMapq = mapqFilter.getFilteredBases();
    final long basesExcludedByPairing = pairFilter.getFilteredBases();
    final double total = histo.getSum();
    final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping;
    metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
    metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
    metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes;
    metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes;
    metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes;
    metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
    metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;
    metrics.PCT_5X = MathUtils.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_10X = MathUtils.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_15X = MathUtils.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_20X = MathUtils.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_25X = MathUtils.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_30X = MathUtils.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_40X = MathUtils.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_50X = MathUtils.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_60X = MathUtils.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_70X = MathUtils.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_80X = MathUtils.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_90X = MathUtils.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_100X = MathUtils.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
    out.addMetric(metrics);
    out.addHistogram(histo);
    if (INCLUDE_BQ_HISTOGRAM) {
        out.addHistogram(baseQHisto);
    }
    out.write(OUTPUT);
    return null;
}

Also used : Histogram(htsjdk.samtools.util.Histogram) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) ArrayList(java.util.ArrayList) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) SamReader(htsjdk.samtools.SamReader) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) SecondaryAlignmentFilter(htsjdk.samtools.filter.SecondaryAlignmentFilter) HashSet(java.util.HashSet) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator)

Example 7 with ReferenceSequenceFileWalker

use of htsjdk.samtools.reference.ReferenceSequenceFileWalker in project gatk by broadinstitute.

the class LiftOverVcf method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsReadable(CHAIN);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(REJECT);
    ////////////////////////////////////////////////////////////////////////
    // Setup the inputs
    ////////////////////////////////////////////////////////////////////////
    final LiftOver liftOver = new LiftOver(CHAIN);
    final VCFFileReader in = new VCFFileReader(INPUT, false);
    logger.info("Loading up the target reference genome.");
    final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final Map<String, byte[]> refSeqs = new HashMap<>();
    for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) {
        refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()).getBases());
    }
    CloserUtil.close(walker);
    ////////////////////////////////////////////////////////////////////////
    // Setup the outputs
    ////////////////////////////////////////////////////////////////////////
    final VCFHeader inHeader = in.getFileHeader();
    final VCFHeader outHeader = new VCFHeader(inHeader);
    outHeader.setSequenceDictionary(walker.getSequenceDictionary());
    final VariantContextWriter out = new VariantContextWriterBuilder().setOption(Options.INDEX_ON_THE_FLY).setOutputFile(OUTPUT).setReferenceDictionary(walker.getSequenceDictionary()).build();
    out.writeHeader(outHeader);
    final VariantContextWriter rejects = new VariantContextWriterBuilder().setOutputFile(REJECT).unsetOption(Options.INDEX_ON_THE_FLY).build();
    final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
    for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
    rejects.writeHeader(rejectHeader);
    ////////////////////////////////////////////////////////////////////////
    // Read the input VCF, lift the records over and write to the sorting
    // collection.
    ////////////////////////////////////////////////////////////////////////
    long failedLiftover = 0, failedAlleleCheck = 0, total = 0;
    logger.info("Lifting variants over and sorting.");
    final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outHeader), outHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
    ProgressLogger progress = new ProgressLogger(logger, 1000000, "read");
    for (final VariantContext ctx : in) {
        ++total;
        final Interval source = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
        final Interval target = liftOver.liftOver(source, 1.0);
        if (target == null) {
            rejects.add(new VariantContextBuilder(ctx).filter(FILTER_CANNOT_LIFTOVER).make());
            failedLiftover++;
        } else {
            // Fix the alleles if we went from positive to negative strand
            final List<Allele> alleles = new ArrayList<>();
            for (final Allele oldAllele : ctx.getAlleles()) {
                if (target.isPositiveStrand() || oldAllele.isSymbolic()) {
                    alleles.add(oldAllele);
                } else {
                    alleles.add(Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference()));
                }
            }
            // Build the new variant context
            final VariantContextBuilder builder = new VariantContextBuilder(ctx.getSource(), target.getContig(), target.getStart(), target.getEnd(), alleles);
            builder.id(ctx.getID());
            builder.attributes(ctx.getAttributes());
            builder.genotypes(ctx.getGenotypes());
            builder.filters(ctx.getFilters());
            builder.log10PError(ctx.getLog10PError());
            // Check that the reference allele still agrees with the reference sequence
            boolean mismatchesReference = false;
            for (final Allele allele : builder.getAlleles()) {
                if (allele.isReference()) {
                    final byte[] ref = refSeqs.get(target.getContig());
                    final String refString = StringUtil.bytesToString(ref, target.getStart() - 1, target.length());
                    if (!refString.equalsIgnoreCase(allele.getBaseString())) {
                        mismatchesReference = true;
                    }
                    break;
                }
            }
            if (mismatchesReference) {
                rejects.add(new VariantContextBuilder(ctx).filter(FILTER_MISMATCHING_REF_ALLELE).make());
                failedAlleleCheck++;
            } else {
                sorter.add(builder.make());
            }
        }
        progress.record(ctx.getContig(), ctx.getStart());
    }
    final NumberFormat pfmt = new DecimalFormat("0.0000%");
    final String pct = pfmt.format((failedLiftover + failedAlleleCheck) / (double) total);
    logger.info("Processed ", total, " variants.");
    logger.info(Long.toString(failedLiftover), " variants failed to liftover.");
    logger.info(Long.toString(failedAlleleCheck), " variants lifted over but had mismatching reference alleles after lift over.");
    logger.info(pct, " of variants were not successfully lifted over and written to the output.");
    rejects.close();
    in.close();
    ////////////////////////////////////////////////////////////////////////
    // Write the sorted outputs to the final output file
    ////////////////////////////////////////////////////////////////////////
    sorter.doneAdding();
    progress = new ProgressLogger(logger, 1000000, "written");
    logger.info("Writing out sorted records to final VCF.");
    for (final VariantContext ctx : sorter) {
        out.add(ctx);
        progress.record(ctx.getContig(), ctx.getStart());
    }
    out.close();
    sorter.cleanup();
    return null;
}

Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) HashMap(java.util.HashMap) DecimalFormat(java.text.DecimalFormat) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) VCFHeader(htsjdk.variant.vcf.VCFHeader) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) NumberFormat(java.text.NumberFormat)

Example 8 with ReferenceSequenceFileWalker

use of htsjdk.samtools.reference.ReferenceSequenceFileWalker in project gatk by broadinstitute.

the class BayesianHetPulldownCalculator method getHetPulldown.

/**
     * For a given normal or tumor BAM file, walks through the list of common SNPs,
     * {@link BayesianHetPulldownCalculator#snpIntervals}), detects heterozygous sites, and returns
     * a {@link Pulldown} containing detailed information on the called heterozygous SNP sites.
     *
     * The {@code hetCallingStrigency} parameters sets the threshold posterior for calling a Het SNP site:
     *
     *      hetPosteriorThreshold = 1 - 10^{-hetCallingStringency}
     *      hetThresholdLogOdds = log(hetPosteriorThreshold/(1-hetPosteriorThreshold))
     *                          = log(10^{hetCallingStringency} - 1)
     *
     * (see CNV-methods.pdf for details)
     *
     * @param bamFile sorted BAM file for sample
     * @param hetCallingStringency strigency for calling a Het site
     * @return Pulldown of heterozygous SNP sites in 1-based format
     */
public Pulldown getHetPulldown(final File bamFile, final double hetCallingStringency) {
    /* log odds from stringency */
    final double hetThresholdLogOdds = FastMath.log(FastMath.pow(10, hetCallingStringency) - 1);
    try (final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(refFile).open(bamFile);
        final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refFile)) {
        if (bamReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new UserException.BadInput("BAM file " + bamFile.toString() + " must be coordinate sorted.");
        }
        final Pulldown hetPulldown = new Pulldown(bamReader.getFileHeader());
        final SamLocusIterator locusIterator = getSamLocusIteratorWithDefaultFilters(bamReader);
        final int totalNumberOfSNPs = snpIntervals.size();
        logger.info("Examining " + totalNumberOfSNPs + " sites in total...");
        int locusCount = 0;
        for (final SamLocusIterator.LocusInfo locus : locusIterator) {
            if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
                logger.info("Examined " + locusCount + " covered sites.");
            }
            locusCount++;
            final int totalReadCount = locus.getRecordAndOffsets().size();
            if (totalReadCount <= readDepthThreshold) {
                continue;
            }
            final Nucleotide refBase = Nucleotide.valueOf(refWalker.get(locus.getSequenceIndex()).getBases()[locus.getPosition() - 1]);
            if (!isProperBase(refBase)) {
                logger.warn(String.format("The reference position at %d has an unknown base call (value: %s). Even though" + " this position is indicated to be a possible heterozygous SNP in the provided SNP interval list," + " no inference can be made. Continuing ...", locus.getPosition(), refBase.toString()));
                continue;
            }
            final Map<Nucleotide, List<BaseQuality>> baseQualities = getPileupBaseQualities(locus);
            final Nucleotide altBase = inferAltFromPileup(baseQualities, refBase);
            /* calculate Het log odds */
            final double hetLogLikelihood = getHetLogLikelihood(baseQualities, refBase, altBase);
            final double homLogLikelihood = getHomLogLikelihood(baseQualities, refBase, altBase, DEFAULT_PRIOR_REF_HOM);
            final double hetLogOdds = (hetLogLikelihood + FastMath.log(DEFAULT_PRIOR_HET)) - (homLogLikelihood + FastMath.log(1 - DEFAULT_PRIOR_HET));
            if (hetLogOdds > hetThresholdLogOdds) {
                hetPulldown.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), baseQualities.get(refBase).size(), baseQualities.get(altBase).size(), refBase, altBase, totalReadCount, hetLogOdds));
            }
        }
        logger.info(locusCount + " covered sites out of " + totalNumberOfSNPs + " total sites were examined.");
        return hetPulldown;
    } catch (final IOException | SAMFormatException e) {
        throw new UserException(e.getMessage());
    }
}

Also used : IOException(java.io.IOException) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) IntervalList(htsjdk.samtools.util.IntervalList) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Example 9 with ReferenceSequenceFileWalker

use of htsjdk.samtools.reference.ReferenceSequenceFileWalker in project gatk by broadinstitute.

the class HetPulldownCalculator method getHetPulldown.

/**
     * For a normal or tumor sample, returns a data structure giving (intervals, reference counts, alternate counts),
     * where intervals give positions of likely heterozygous SNP sites.
     *
     * <p>
     *     For a normal sample:
     *     <ul>
     *         The IntervalList snpIntervals gives common SNP sites in 1-based format.
     *     </ul>
     *     <ul>
     *         The p-value threshold must be specified for a two-sided binomial test,
     *         which is used to determine SNP sites from snpIntervals that are
     *         compatible with a heterozygous SNP, given the sample.  Only these sites are output.
     *     </ul>
     * </p>
     * <p>
     *     For a tumor sample:
     *     <ul>
     *         The IntervalList snpIntervals gives heterozygous SNP sites likely to be present in the normal sample.
     *         This should be from {@link HetPulldownCalculator#getNormal} in 1-based format.
     *         Only these sites are output.
     *     </ul>
     * </p>
     * @param bamFile           sorted BAM file for sample
     * @param snpIntervals      IntervalList of SNP sites
     * @param sampleType        flag indicating type of sample (SampleType.NORMAL or SampleType.TUMOR)
     *                          (determines whether to perform binomial test)
     * @param pvalThreshold     p-value threshold for two-sided binomial test, used for normal sample
     * @param minimumRawReads   minimum number of total reads that must be present at a het site
     * @return                  Pulldown of heterozygous SNP sites in 1-based format
     */
private Pulldown getHetPulldown(final File bamFile, final IntervalList snpIntervals, final SampleType sampleType, final double pvalThreshold, final int minimumRawReads) {
    try (final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(validationStringency).referenceSequence(refFile).open(bamFile);
        final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refFile)) {
        if (bamReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new UserException.BadInput("BAM file " + bamFile.toString() + " must be coordinate sorted.");
        }
        final Pulldown hetPulldown = new Pulldown(bamReader.getFileHeader());
        final int totalNumberOfSNPs = snpIntervals.size();
        final SamLocusIterator locusIterator = new SamLocusIterator(bamReader, snpIntervals, totalNumberOfSNPs < MAX_INTERVALS_FOR_INDEX);
        //set read and locus filters [note: read counts match IGV, but off by a few from pysam.mpileup]
        final List<SamRecordFilter> samFilters = Arrays.asList(new NotPrimaryAlignmentFilter(), new DuplicateReadFilter());
        locusIterator.setSamFilters(samFilters);
        locusIterator.setEmitUncoveredLoci(false);
        locusIterator.setIncludeNonPfReads(false);
        locusIterator.setMappingQualityScoreCutoff(minMappingQuality);
        locusIterator.setQualityScoreCutoff(minBaseQuality);
        logger.info("Examining " + totalNumberOfSNPs + " sites in total...");
        int locusCount = 0;
        for (final SamLocusIterator.LocusInfo locus : locusIterator) {
            if (locusCount % NUMBER_OF_SITES_PER_LOGGED_STATUS_UPDATE == 0) {
                logger.info("Examined " + locusCount + " covered sites.");
            }
            locusCount++;
            //include N, etc. reads here
            final int totalReadCount = locus.getRecordAndOffsets().size();
            if (totalReadCount < minimumRawReads) {
                continue;
            }
            final Nucleotide.Counter baseCounts = getPileupBaseCounts(locus);
            //only include total ACGT counts in binomial test (exclude N, etc.)
            final int totalBaseCount = Arrays.stream(BASES).mapToInt(b -> (int) baseCounts.get(b)).sum();
            if (sampleType == SampleType.NORMAL && !isPileupHetCompatible(baseCounts, totalBaseCount, pvalThreshold)) {
                continue;
            }
            final Nucleotide refBase = Nucleotide.valueOf(refWalker.get(locus.getSequenceIndex()).getBases()[locus.getPosition() - 1]);
            final int refReadCount = (int) baseCounts.get(refBase);
            final int altReadCount = totalBaseCount - refReadCount;
            hetPulldown.add(new AllelicCount(new SimpleInterval(locus.getSequenceName(), locus.getPosition(), locus.getPosition()), refReadCount, altReadCount));
        }
        logger.info(locusCount + " covered sites out of " + totalNumberOfSNPs + " total sites were examined.");
        return hetPulldown;
    } catch (final IOException | SAMFormatException e) {
        throw new UserException(e.getMessage());
    }
}

Also used : Arrays(java.util.Arrays) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) IntervalList(htsjdk.samtools.util.IntervalList) AlternativeHypothesis(org.apache.commons.math3.stat.inference.AlternativeHypothesis) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount) IOException(java.io.IOException) Nucleotide(org.broadinstitute.hellbender.utils.Nucleotide) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) File(java.io.File) BinomialTest(org.apache.commons.math3.stat.inference.BinomialTest) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) NotPrimaryAlignmentFilter(htsjdk.samtools.filter.NotPrimaryAlignmentFilter) List(java.util.List) Logger(org.apache.logging.log4j.Logger) UserException(org.broadinstitute.hellbender.exceptions.UserException) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) VisibleForTesting(com.google.common.annotations.VisibleForTesting) htsjdk.samtools(htsjdk.samtools) LogManager(org.apache.logging.log4j.LogManager) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) IOException(java.io.IOException) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) NotPrimaryAlignmentFilter(htsjdk.samtools.filter.NotPrimaryAlignmentFilter) Nucleotide(org.broadinstitute.hellbender.utils.Nucleotide) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) AllelicCount(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)

Aggregations

ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)9 UserException (org.broadinstitute.hellbender.exceptions.UserException)6 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)5 SamReader (htsjdk.samtools.SamReader)4 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)4 IntervalList (htsjdk.samtools.util.IntervalList)4 IOException (java.io.IOException)4 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)4 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)4 DuplicateReadFilter (htsjdk.samtools.filter.DuplicateReadFilter)3 NotPrimaryAlignmentFilter (htsjdk.samtools.filter.NotPrimaryAlignmentFilter)3 ReferenceSequence (htsjdk.samtools.reference.ReferenceSequence)3 File (java.io.File)3 ArrayList (java.util.ArrayList)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 htsjdk.samtools (htsjdk.samtools)2 SAMRecord (htsjdk.samtools.SAMRecord)2 Arrays (java.util.Arrays)2 HashSet (java.util.HashSet)2 List (java.util.List)2