Examples with Histogram - htsjdk.samtools.util.Histogram

Example 1 with Histogram

use of htsjdk.samtools.util.Histogram in project gatk by broadinstitute.

the class EstimateLibraryComplexity method doWork.

/**
     * Method that does most of the work.  Reads through the input BAM file and extracts the
     * read sequences of each read pair and sorts them via a SortingCollection.  Then traverses
     * the sorted reads and looks at small groups at a time to find duplicates.
     */
@Override
protected Object doWork() {
    for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
    logger.info("Will store " + MAX_RECORDS_IN_RAM + " read pairs in memory before sorting.");
    final List<SAMReadGroupRecord> readGroups = new ArrayList<>();
    final int recordsRead = 0;
    final SortingCollection<PairedReadSequence> sorter = SortingCollection.newInstance(PairedReadSequence.class, new PairedReadCodec(), new PairedReadComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
    // Loop through the input files and pick out the read sequences etc.
    final ProgressLogger progress = new ProgressLogger(logger, (int) 1e6, "Read");
    for (final File f : INPUT) {
        final Map<String, PairedReadSequence> pendingByName = new HashMap<>();
        final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(f);
        readGroups.addAll(in.getFileHeader().getReadGroups());
        for (final SAMRecord rec : in) {
            if (!rec.getReadPairedFlag())
                continue;
            if (!rec.getFirstOfPairFlag() && !rec.getSecondOfPairFlag()) {
                continue;
            }
            PairedReadSequence prs = pendingByName.remove(rec.getReadName());
            if (prs == null) {
                // Make a new paired read object and add RG and physical location information to it
                prs = new PairedReadSequence();
                if (opticalDuplicateFinder.addLocationInformation(rec.getReadName(), prs)) {
                    final SAMReadGroupRecord rg = rec.getReadGroup();
                    if (rg != null)
                        prs.setReadGroup((short) readGroups.indexOf(rg));
                }
                pendingByName.put(rec.getReadName(), prs);
            }
            // Read passes quality check if both ends meet the mean quality criteria
            final boolean passesQualityCheck = passesQualityCheck(rec.getReadBases(), rec.getBaseQualities(), MIN_IDENTICAL_BASES, MIN_MEAN_QUALITY);
            prs.qualityOk = prs.qualityOk && passesQualityCheck;
            // Get the bases and restore them to their original orientation if necessary
            final byte[] bases = rec.getReadBases();
            if (rec.getReadNegativeStrandFlag())
                SequenceUtil.reverseComplement(bases);
            if (rec.getFirstOfPairFlag()) {
                prs.read1 = bases;
            } else {
                prs.read2 = bases;
            }
            if (prs.read1 != null && prs.read2 != null && prs.qualityOk) {
                sorter.add(prs);
            }
            progress.record(rec);
        }
        CloserUtil.close(in);
    }
    logger.info("Finished reading - moving on to scanning for duplicates.");
    // Now go through the sorted reads and attempt to find duplicates
    try (final PeekableIterator<PairedReadSequence> iterator = new PeekableIterator<>(sorter.iterator())) {
        final Map<String, Histogram<Integer>> duplicationHistosByLibrary = new HashMap<>();
        final Map<String, Histogram<Integer>> opticalHistosByLibrary = new HashMap<>();
        int groupsProcessed = 0;
        long lastLogTime = System.currentTimeMillis();
        final int meanGroupSize = Math.max(1, (recordsRead / 2) / (int) pow(4.0, (double) MIN_IDENTICAL_BASES * 2));
        while (iterator.hasNext()) {
            // Get the next group and split it apart by library
            final List<PairedReadSequence> group = getNextGroup(iterator);
            if (group.size() > meanGroupSize * MAX_GROUP_RATIO) {
                final PairedReadSequence prs = group.get(0);
                logger.warn("Omitting group with over " + MAX_GROUP_RATIO + " times the expected mean number of read pairs. " + "Mean=" + meanGroupSize + ", Actual=" + group.size() + ". Prefixes: " + StringUtil.bytesToString(prs.read1, 0, MIN_IDENTICAL_BASES) + " / " + StringUtil.bytesToString(prs.read1, 0, MIN_IDENTICAL_BASES));
            } else {
                final Map<String, List<PairedReadSequence>> sequencesByLibrary = splitByLibrary(group, readGroups);
                // Now process the reads by library
                for (final Map.Entry<String, List<PairedReadSequence>> entry : sequencesByLibrary.entrySet()) {
                    final String library = entry.getKey();
                    final List<PairedReadSequence> seqs = entry.getValue();
                    Histogram<Integer> duplicationHisto = duplicationHistosByLibrary.get(library);
                    Histogram<Integer> opticalHisto = opticalHistosByLibrary.get(library);
                    if (duplicationHisto == null) {
                        duplicationHisto = new Histogram<>("duplication_group_count", library);
                        opticalHisto = new Histogram<>("duplication_group_count", "optical_duplicates");
                        duplicationHistosByLibrary.put(library, duplicationHisto);
                        opticalHistosByLibrary.put(library, opticalHisto);
                    }
                    // Figure out if any reads within this group are duplicates of one another
                    for (int i = 0; i < seqs.size(); ++i) {
                        final PairedReadSequence lhs = seqs.get(i);
                        if (lhs == null)
                            continue;
                        final List<PairedReadSequence> dupes = new ArrayList<>();
                        for (int j = i + 1; j < seqs.size(); ++j) {
                            final PairedReadSequence rhs = seqs.get(j);
                            if (rhs == null)
                                continue;
                            if (matches(lhs, rhs, MAX_DIFF_RATE)) {
                                dupes.add(rhs);
                                seqs.set(j, null);
                            }
                        }
                        if (!dupes.isEmpty()) {
                            dupes.add(lhs);
                            final int duplicateCount = dupes.size();
                            duplicationHisto.increment(duplicateCount);
                            final boolean[] flags = opticalDuplicateFinder.findOpticalDuplicates(dupes);
                            for (final boolean b : flags) {
                                if (b)
                                    opticalHisto.increment(duplicateCount);
                            }
                        } else {
                            duplicationHisto.increment(1);
                        }
                    }
                }
                ++groupsProcessed;
                if (lastLogTime < System.currentTimeMillis() - 60000) {
                    logger.info("Processed " + groupsProcessed + " groups.");
                    lastLogTime = System.currentTimeMillis();
                }
            }
        }
        sorter.cleanup();
        final MetricsFile<DuplicationMetrics, Integer> file = getMetricsFile();
        for (final String library : duplicationHistosByLibrary.keySet()) {
            final Histogram<Integer> duplicationHisto = duplicationHistosByLibrary.get(library);
            final Histogram<Integer> opticalHisto = opticalHistosByLibrary.get(library);
            final DuplicationMetrics metrics = new DuplicationMetrics();
            metrics.LIBRARY = library;
            // Filter out any bins that have only a single entry in them and calcu
            for (final Integer bin : duplicationHisto.keySet()) {
                final double duplicateGroups = duplicationHisto.get(bin).getValue();
                final double opticalDuplicates = opticalHisto.get(bin) == null ? 0 : opticalHisto.get(bin).getValue();
                if (duplicateGroups > 1) {
                    metrics.READ_PAIRS_EXAMINED += (bin * duplicateGroups);
                    metrics.READ_PAIR_DUPLICATES += ((bin - 1) * duplicateGroups);
                    metrics.READ_PAIR_OPTICAL_DUPLICATES += opticalDuplicates;
                }
            }
            metrics.calculateDerivedMetrics();
            file.addMetric(metrics);
            file.addHistogram(duplicationHisto);
        }
        file.write(OUTPUT);
    }
    return null;
}

Also used : Histogram(htsjdk.samtools.util.Histogram) DuplicationMetrics(org.broadinstitute.hellbender.utils.read.markduplicates.DuplicationMetrics) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) ArrayList(java.util.ArrayList) List(java.util.List) SAMRecord(htsjdk.samtools.SAMRecord) PeekableIterator(htsjdk.samtools.util.PeekableIterator) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with Histogram

use of htsjdk.samtools.util.Histogram in project gatk by broadinstitute.

the class CollectJumpingLibraryMetrics method doWork.

/**
     * Calculates the detailed statistics about the jumping library and then generates the results.
     */
@Override
protected Object doWork() {
    for (File f : INPUT) {
        IOUtil.assertFileIsReadable(f);
    }
    IOUtil.assertFileIsWritable(OUTPUT);
    Histogram<Integer> innieHistogram = new Histogram<>();
    Histogram<Integer> outieHistogram = new Histogram<>();
    int fragments = 0;
    int innies = 0;
    int outies = 0;
    int innieDupes = 0;
    int outieDupes = 0;
    int crossChromPairs = 0;
    int superSized = 0;
    int tandemPairs = 0;
    double chimeraSizeMinimum = Math.max(getOutieMode(), (double) CHIMERA_KB_MIN);
    for (File f : INPUT) {
        SamReader reader = SamReaderFactory.makeDefault().open(f);
        if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new UserException("SAM file must " + f.getName() + " must be sorted in coordinate order");
        }
        for (SAMRecord sam : reader) {
            // We're getting all our info from the first of each pair.
            if (!sam.getFirstOfPairFlag()) {
                continue;
            }
            // Ignore unmapped read pairs
            if (sam.getReadUnmappedFlag()) {
                if (!sam.getMateUnmappedFlag()) {
                    fragments++;
                    continue;
                }
                // If both ends are unmapped and we've hit unaligned reads we're done
                if (sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                    break;
                }
                continue;
            }
            if (sam.getMateUnmappedFlag()) {
                fragments++;
                continue;
            }
            // Ignore low-quality reads.  If we don't have the mate mapping quality, assume it's OK
            if ((sam.getAttribute(SAMTag.MQ.name()) != null && sam.getIntegerAttribute(SAMTag.MQ.name()) < MINIMUM_MAPPING_QUALITY) || sam.getMappingQuality() < MINIMUM_MAPPING_QUALITY) {
                continue;
            }
            final int absInsertSize = Math.abs(sam.getInferredInsertSize());
            if (absInsertSize > chimeraSizeMinimum) {
                superSized++;
            } else if (sam.getMateNegativeStrandFlag() == sam.getReadNegativeStrandFlag()) {
                tandemPairs++;
            } else if (!sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                crossChromPairs++;
            } else {
                final PairOrientation pairOrientation = SamPairUtil.getPairOrientation(sam);
                if (pairOrientation == PairOrientation.RF) {
                    outieHistogram.increment(absInsertSize);
                    outies++;
                    if (sam.getDuplicateReadFlag()) {
                        outieDupes++;
                    }
                } else if (pairOrientation == PairOrientation.FR) {
                    innieHistogram.increment(absInsertSize);
                    innies++;
                    if (sam.getDuplicateReadFlag()) {
                        innieDupes++;
                    }
                } else {
                    throw new IllegalStateException("This should never happen");
                }
            }
        }
        CloserUtil.close(reader);
    }
    MetricsFile<JumpingLibraryMetrics, Integer> metricsFile = getMetricsFile();
    JumpingLibraryMetrics metrics = new JumpingLibraryMetrics();
    metrics.JUMP_PAIRS = outies;
    metrics.JUMP_DUPLICATE_PAIRS = outieDupes;
    metrics.JUMP_DUPLICATE_PCT = outies != 0 ? outieDupes / (double) outies : 0;
    metrics.JUMP_LIBRARY_SIZE = (outies > 0 && outieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(outies, outies - outieDupes) : 0;
    outieHistogram.trimByTailLimit(TAIL_LIMIT);
    metrics.JUMP_MEAN_INSERT_SIZE = outieHistogram.getMean();
    metrics.JUMP_STDEV_INSERT_SIZE = outieHistogram.getStandardDeviation();
    metrics.NONJUMP_PAIRS = innies;
    metrics.NONJUMP_DUPLICATE_PAIRS = innieDupes;
    metrics.NONJUMP_DUPLICATE_PCT = innies != 0 ? innieDupes / (double) innies : 0;
    metrics.NONJUMP_LIBRARY_SIZE = (innies > 0 && innieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(innies, innies - innieDupes) : 0;
    innieHistogram.trimByTailLimit(TAIL_LIMIT);
    metrics.NONJUMP_MEAN_INSERT_SIZE = innieHistogram.getMean();
    metrics.NONJUMP_STDEV_INSERT_SIZE = innieHistogram.getStandardDeviation();
    metrics.CHIMERIC_PAIRS = crossChromPairs + superSized + tandemPairs;
    metrics.FRAGMENTS = fragments;
    double totalPairs = outies + innies + metrics.CHIMERIC_PAIRS;
    metrics.PCT_JUMPS = totalPairs != 0 ? outies / totalPairs : 0;
    metrics.PCT_NONJUMPS = totalPairs != 0 ? innies / totalPairs : 0;
    metrics.PCT_CHIMERAS = totalPairs != 0 ? metrics.CHIMERIC_PAIRS / totalPairs : 0;
    metricsFile.addMetric(metrics);
    metricsFile.write(OUTPUT);
    return null;
}

Also used : Histogram(htsjdk.samtools.util.Histogram) PairOrientation(htsjdk.samtools.SamPairUtil.PairOrientation) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 3 with Histogram

use of htsjdk.samtools.util.Histogram in project gatk by broadinstitute.

the class CollectJumpingLibraryMetrics method getOutieMode.

/**
     * Calculates the mode for outward-facing pairs, using the first SAMPLE_FOR_MODE
     * outward-facing pairs found in INPUT
     */
private double getOutieMode() {
    int samplePerFile = SAMPLE_FOR_MODE / INPUT.size();
    Histogram<Integer> histo = new Histogram<>();
    for (File f : INPUT) {
        SamReader reader = SamReaderFactory.makeDefault().open(f);
        int sampled = 0;
        for (Iterator<SAMRecord> it = reader.iterator(); it.hasNext() && sampled < samplePerFile; ) {
            SAMRecord sam = it.next();
            if (!sam.getFirstOfPairFlag()) {
                continue;
            }
            // If we get here we've hit the end of the aligned reads
            if (sam.getReadUnmappedFlag() && sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                break;
            } else if (sam.getReadUnmappedFlag() || sam.getMateUnmappedFlag()) {
                continue;
            } else {
                if ((sam.getAttribute(SAMTag.MQ.name()) == null || sam.getIntegerAttribute(SAMTag.MQ.name()) >= MINIMUM_MAPPING_QUALITY) && sam.getMappingQuality() >= MINIMUM_MAPPING_QUALITY && sam.getMateNegativeStrandFlag() != sam.getReadNegativeStrandFlag() && sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                    if (SamPairUtil.getPairOrientation(sam) == PairOrientation.RF) {
                        histo.increment(Math.abs(sam.getInferredInsertSize()));
                        sampled++;
                    }
                }
            }
        }
        CloserUtil.close(reader);
    }
    return histo.size() > 0 ? histo.getMode() : 0;
}

Also used : SamReader(htsjdk.samtools.SamReader) Histogram(htsjdk.samtools.util.Histogram) SAMRecord(htsjdk.samtools.SAMRecord) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 4 with Histogram

use of htsjdk.samtools.util.Histogram in project gatk by broadinstitute.

the class CountingPairedFilter method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    // Setup all the inputs
    final ProgressLogger progress = new ProgressLogger(logger, 10000000, "Processed", "loci");
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
    // Load up the reference sequence and double check sequence dictionaries
    if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
        SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), refWalker.getSequenceDictionary());
    }
    final SamLocusIterator iterator = new SamLocusIterator(in);
    final List<SamRecordFilter> filters = new ArrayList<>();
    final CountingFilter dupeFilter = new CountingDuplicateFilter();
    final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
    final CountingPairedFilter pairFilter = new CountingPairedFilter();
    filters.add(mapqFilter);
    filters.add(dupeFilter);
    filters.add(pairFilter);
    // Not a counting filter because we never want to count reads twice
    filters.add(new SecondaryAlignmentFilter());
    iterator.setSamFilters(filters);
    iterator.setEmitUncoveredLoci(true);
    // Handled separately because we want to count bases
    iterator.setMappingQualityScoreCutoff(0);
    // Handled separately because we want to count bases
    iterator.setQualityScoreCutoff(0);
    iterator.setIncludeNonPfReads(false);
    final int max = COVERAGE_CAP;
    final long[] HistogramArray = new long[max + 1];
    final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
    final boolean usingStopAfter = STOP_AFTER > 0;
    final long stopAfter = STOP_AFTER - 1;
    long counter = 0;
    long basesExcludedByBaseq = 0;
    long basesExcludedByOverlap = 0;
    long basesExcludedByCapping = 0;
    // Loop through all the loci
    while (iterator.hasNext()) {
        final SamLocusIterator.LocusInfo info = iterator.next();
        // Check that the reference is not N
        final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
        final byte base = ref.getBases()[info.getPosition() - 1];
        if (base == 'N')
            continue;
        // Figure out the coverage while not counting overlapping reads twice, and excluding various things
        final Set<String> readNames = new HashSet<>(info.getRecordAndOffsets().size());
        int pileupSize = 0;
        for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) {
            if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) {
                ++basesExcludedByBaseq;
                continue;
            }
            if (!readNames.add(recs.getRecord().getReadName())) {
                ++basesExcludedByOverlap;
                continue;
            }
            pileupSize++;
            if (pileupSize <= max) {
                baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
            }
        }
        final int depth = Math.min(readNames.size(), max);
        if (depth < readNames.size())
            basesExcludedByCapping += readNames.size() - max;
        HistogramArray[depth]++;
        // Record progress and perhaps stop
        progress.record(info.getSequenceName(), info.getPosition());
        if (usingStopAfter && ++counter > stopAfter)
            break;
    }
    // Construct and write the outputs
    final Histogram<Integer> histo = new Histogram<>("coverage", "count");
    for (int i = 0; i < HistogramArray.length; ++i) {
        histo.increment(i, HistogramArray[i]);
    }
    // Construct and write the outputs
    final Histogram<Integer> baseQHisto = new Histogram<>("value", "baseq_count");
    for (int i = 0; i < baseQHistogramArray.length; ++i) {
        baseQHisto.increment(i, baseQHistogramArray[i]);
    }
    final WgsMetrics metrics = generateWgsMetrics();
    metrics.GENOME_TERRITORY = (long) histo.getSumOfValues();
    metrics.MEAN_COVERAGE = histo.getMean();
    metrics.SD_COVERAGE = histo.getStandardDeviation();
    metrics.MEDIAN_COVERAGE = histo.getMedian();
    metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation();
    final long basesExcludedByDupes = dupeFilter.getFilteredBases();
    final long basesExcludedByMapq = mapqFilter.getFilteredBases();
    final long basesExcludedByPairing = pairFilter.getFilteredBases();
    final double total = histo.getSum();
    final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping;
    metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
    metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
    metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes;
    metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes;
    metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes;
    metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
    metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;
    metrics.PCT_5X = MathUtils.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_10X = MathUtils.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_15X = MathUtils.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_20X = MathUtils.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_25X = MathUtils.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_30X = MathUtils.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_40X = MathUtils.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_50X = MathUtils.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_60X = MathUtils.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_70X = MathUtils.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_80X = MathUtils.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_90X = MathUtils.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_100X = MathUtils.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
    out.addMetric(metrics);
    out.addHistogram(histo);
    if (INCLUDE_BQ_HISTOGRAM) {
        out.addHistogram(baseQHisto);
    }
    out.write(OUTPUT);
    return null;
}

Also used : Histogram(htsjdk.samtools.util.Histogram) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) ArrayList(java.util.ArrayList) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) SamReader(htsjdk.samtools.SamReader) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) SecondaryAlignmentFilter(htsjdk.samtools.filter.SecondaryAlignmentFilter) HashSet(java.util.HashSet) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator)

Example 5 with Histogram

use of htsjdk.samtools.util.Histogram in project gatk by broadinstitute.

the class QualityScoreDistribution method finish.

@Override
protected void finish() {
    // Built the Histograms out of the long[]s
    final Histogram<Byte> qHisto = new Histogram<>("QUALITY", "COUNT_OF_Q");
    final Histogram<Byte> oqHisto = new Histogram<>("QUALITY", "COUNT_OF_OQ");
    for (int i = 0; i < qCounts.length; ++i) {
        if (qCounts[i] > 0)
            qHisto.increment((byte) i, (double) qCounts[i]);
        if (oqCounts[i] > 0)
            oqHisto.increment((byte) i, (double) oqCounts[i]);
    }
    final MetricsFile<?, Byte> metrics = getMetricsFile();
    metrics.addHistogram(qHisto);
    if (!oqHisto.isEmpty())
        metrics.addHistogram(oqHisto);
    metrics.write(OUTPUT);
    if (qHisto.isEmpty() && oqHisto.isEmpty()) {
        log.warn("No valid bases found in input file. No plot will be produced.");
    } else if (PRODUCE_PLOT) {
        // Now run R to generate a chart
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(new Resource(R_SCRIPT, QualityScoreDistribution.class));
        executor.addArgs(OUTPUT.getAbsolutePath(), CHART_OUTPUT.getAbsolutePath(), INPUT.getName(), plotSubtitle);
        executor.exec();
    }
}

Also used : Histogram(htsjdk.samtools.util.Histogram) Resource(org.broadinstitute.hellbender.utils.io.Resource) RScriptExecutor(org.broadinstitute.hellbender.utils.R.RScriptExecutor)

Aggregations

Histogram (htsjdk.samtools.util.Histogram)5 SamReader (htsjdk.samtools.SamReader)4 SAMRecord (htsjdk.samtools.SAMRecord)3 MetricsFile (htsjdk.samtools.metrics.MetricsFile)3 File (java.io.File)3 ArrayList (java.util.ArrayList)2 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)2 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 PairOrientation (htsjdk.samtools.SamPairUtil.PairOrientation)1 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)1 SecondaryAlignmentFilter (htsjdk.samtools.filter.SecondaryAlignmentFilter)1 ReferenceSequence (htsjdk.samtools.reference.ReferenceSequence)1 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)1 PeekableIterator (htsjdk.samtools.util.PeekableIterator)1 SamLocusIterator (htsjdk.samtools.util.SamLocusIterator)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 UserException (org.broadinstitute.hellbender.exceptions.UserException)1