Search in sources :

Example 16 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class CountingPairedFilter method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    // Setup all the inputs
    final ProgressLogger progress = new ProgressLogger(logger, 10000000, "Processed", "loci");
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
    // Load up the reference sequence and double check sequence dictionaries
    if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
        SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), refWalker.getSequenceDictionary());
    }
    final SamLocusIterator iterator = new SamLocusIterator(in);
    final List<SamRecordFilter> filters = new ArrayList<>();
    final CountingFilter dupeFilter = new CountingDuplicateFilter();
    final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
    final CountingPairedFilter pairFilter = new CountingPairedFilter();
    filters.add(mapqFilter);
    filters.add(dupeFilter);
    filters.add(pairFilter);
    // Not a counting filter because we never want to count reads twice
    filters.add(new SecondaryAlignmentFilter());
    iterator.setSamFilters(filters);
    iterator.setEmitUncoveredLoci(true);
    // Handled separately because we want to count bases
    iterator.setMappingQualityScoreCutoff(0);
    // Handled separately because we want to count bases
    iterator.setQualityScoreCutoff(0);
    iterator.setIncludeNonPfReads(false);
    final int max = COVERAGE_CAP;
    final long[] HistogramArray = new long[max + 1];
    final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
    final boolean usingStopAfter = STOP_AFTER > 0;
    final long stopAfter = STOP_AFTER - 1;
    long counter = 0;
    long basesExcludedByBaseq = 0;
    long basesExcludedByOverlap = 0;
    long basesExcludedByCapping = 0;
    // Loop through all the loci
    while (iterator.hasNext()) {
        final SamLocusIterator.LocusInfo info = iterator.next();
        // Check that the reference is not N
        final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
        final byte base = ref.getBases()[info.getPosition() - 1];
        if (base == 'N')
            continue;
        // Figure out the coverage while not counting overlapping reads twice, and excluding various things
        final Set<String> readNames = new HashSet<>(info.getRecordAndOffsets().size());
        int pileupSize = 0;
        for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) {
            if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) {
                ++basesExcludedByBaseq;
                continue;
            }
            if (!readNames.add(recs.getRecord().getReadName())) {
                ++basesExcludedByOverlap;
                continue;
            }
            pileupSize++;
            if (pileupSize <= max) {
                baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
            }
        }
        final int depth = Math.min(readNames.size(), max);
        if (depth < readNames.size())
            basesExcludedByCapping += readNames.size() - max;
        HistogramArray[depth]++;
        // Record progress and perhaps stop
        progress.record(info.getSequenceName(), info.getPosition());
        if (usingStopAfter && ++counter > stopAfter)
            break;
    }
    // Construct and write the outputs
    final Histogram<Integer> histo = new Histogram<>("coverage", "count");
    for (int i = 0; i < HistogramArray.length; ++i) {
        histo.increment(i, HistogramArray[i]);
    }
    // Construct and write the outputs
    final Histogram<Integer> baseQHisto = new Histogram<>("value", "baseq_count");
    for (int i = 0; i < baseQHistogramArray.length; ++i) {
        baseQHisto.increment(i, baseQHistogramArray[i]);
    }
    final WgsMetrics metrics = generateWgsMetrics();
    metrics.GENOME_TERRITORY = (long) histo.getSumOfValues();
    metrics.MEAN_COVERAGE = histo.getMean();
    metrics.SD_COVERAGE = histo.getStandardDeviation();
    metrics.MEDIAN_COVERAGE = histo.getMedian();
    metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation();
    final long basesExcludedByDupes = dupeFilter.getFilteredBases();
    final long basesExcludedByMapq = mapqFilter.getFilteredBases();
    final long basesExcludedByPairing = pairFilter.getFilteredBases();
    final double total = histo.getSum();
    final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping;
    metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
    metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
    metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes;
    metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes;
    metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes;
    metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
    metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;
    metrics.PCT_5X = MathUtils.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_10X = MathUtils.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_15X = MathUtils.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_20X = MathUtils.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_25X = MathUtils.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_30X = MathUtils.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_40X = MathUtils.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_50X = MathUtils.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_60X = MathUtils.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_70X = MathUtils.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_80X = MathUtils.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_90X = MathUtils.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_100X = MathUtils.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
    out.addMetric(metrics);
    out.addHistogram(histo);
    if (INCLUDE_BQ_HISTOGRAM) {
        out.addHistogram(baseQHisto);
    }
    out.write(OUTPUT);
    return null;
}
Also used : Histogram(htsjdk.samtools.util.Histogram) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) ArrayList(java.util.ArrayList) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) SamReader(htsjdk.samtools.SamReader) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) SecondaryAlignmentFilter(htsjdk.samtools.filter.SecondaryAlignmentFilter) HashSet(java.util.HashSet) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator)

Example 17 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class SamFormatConverter method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, reader.getFileHeader(), true)) {
        final ProgressLogger progress = new ProgressLogger(logger);
        for (final SAMRecord rec : reader) {
            writer.addAlignment(rec);
            progress.record(rec);
        }
    }
    CloserUtil.close(reader);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger)

Example 18 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class SamToFastq method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final Map<String, SAMRecord> firstSeenMates = new HashMap<>();
    final FastqWriterFactory factory = new FastqWriterFactory();
    factory.setCreateMd5(CREATE_MD5_FILE);
    final Map<SAMReadGroupRecord, FastqWriters> writers = generateWriters(reader.getFileHeader().getReadGroups(), factory);
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord currentRecord : reader) {
        if (currentRecord.isSecondaryOrSupplementary() && !INCLUDE_NON_PRIMARY_ALIGNMENTS)
            continue;
        // Skip non-PF reads as necessary
        if (currentRecord.getReadFailsVendorQualityCheckFlag() && !INCLUDE_NON_PF_READS)
            continue;
        final FastqWriters fq = writers.get(currentRecord.getReadGroup());
        if (currentRecord.getReadPairedFlag()) {
            final String currentReadName = currentRecord.getReadName();
            final SAMRecord firstRecord = firstSeenMates.remove(currentReadName);
            if (firstRecord == null) {
                firstSeenMates.put(currentReadName, currentRecord);
            } else {
                assertPairedMates(firstRecord, currentRecord);
                final SAMRecord read1 = currentRecord.getFirstOfPairFlag() ? currentRecord : firstRecord;
                final SAMRecord read2 = currentRecord.getFirstOfPairFlag() ? firstRecord : currentRecord;
                writeRecord(read1, 1, fq.getFirstOfPair(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
                final FastqWriter secondOfPairWriter = fq.getSecondOfPair();
                if (secondOfPairWriter == null) {
                    throw new UserException("Input contains paired reads but no SECOND_END_FASTQ specified.");
                }
                writeRecord(read2, 2, secondOfPairWriter, READ2_TRIM, READ2_MAX_BASES_TO_WRITE);
            }
        } else {
            writeRecord(currentRecord, null, fq.getUnpaired(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
        }
        progress.record(currentRecord);
    }
    CloserUtil.close(reader);
    // Close all the fastq writers being careful to close each one only once!
    for (final FastqWriters writerMapping : new HashSet<>(writers.values())) {
        writerMapping.closeAll();
    }
    if (!firstSeenMates.isEmpty()) {
        SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.MATE_NOT_FOUND, "Found " + firstSeenMates.size() + " unpaired mates", null), VALIDATION_STRINGENCY);
    }
    return null;
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) SAMRecord(htsjdk.samtools.SAMRecord) FastqWriterFactory(htsjdk.samtools.fastq.FastqWriterFactory) FastqWriter(htsjdk.samtools.fastq.FastqWriter) UserException(org.broadinstitute.hellbender.exceptions.UserException) HashSet(java.util.HashSet)

Example 19 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class SortSam method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY).referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    SAMFileHeader writeHeader = reader.getFileHeader().clone();
    writeHeader.setSortOrder(SORT_ORDER);
    try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, writeHeader, false)) {
        writer.setProgressLogger(new ProgressLogger(logger, (int) 1e7, "Wrote", "records from a sorting collection"));
        final ProgressLogger progress = new ProgressLogger(logger, (int) 1e7, "Read");
        for (final SAMRecord rec : reader) {
            writer.addAlignment(rec);
            progress.record(rec);
        }
        logger.info("Finished reading inputs, merging and writing to output now.");
    }
    CloserUtil.close(reader);
    return null;
}
Also used : ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger)

Example 20 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class CleanSam method doWork.

/**
     * Do the work after command line has been parsed.
     * RuntimeException may be thrown by this method, and are reported appropriately.
     */
@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);
    if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) {
        factory.validationStringency(ValidationStringency.LENIENT);
    }
    final SamReader reader = factory.open(INPUT);
    try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, reader.getFileHeader(), true);
        final CloseableIterator<SAMRecord> it = reader.iterator()) {
        final ProgressLogger progress = new ProgressLogger(logger);
        // If the read (or its mate) maps off the end of the alignment, clip it
        while (it.hasNext()) {
            final SAMRecord rec = it.next();
            // If the read (or its mate) maps off the end of the alignment, clip it
            AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);
            // check the read's mapping quality
            if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
                rec.setMappingQuality(0);
            }
            writer.addAlignment(rec);
            progress.record(rec);
        }
    } finally {
        CloserUtil.close(reader);
    }
    return null;
}
Also used : ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger)

Aggregations

ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)31 SamReader (htsjdk.samtools.SamReader)13 SAMRecord (htsjdk.samtools.SAMRecord)12 UserException (org.broadinstitute.hellbender.exceptions.UserException)11 VariantContext (htsjdk.variant.variantcontext.VariantContext)7 File (java.io.File)7 ArrayList (java.util.ArrayList)7 SAMFileWriter (htsjdk.samtools.SAMFileWriter)6 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)6 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)5 VCFHeader (htsjdk.variant.vcf.VCFHeader)5 HashMap (java.util.HashMap)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 MetricsFile (htsjdk.samtools.metrics.MetricsFile)4 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4 BAMRecordCodec (htsjdk.samtools.BAMRecordCodec)3 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)3 SAMRecordQueryNameComparator (htsjdk.samtools.SAMRecordQueryNameComparator)3