use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class CountingPairedFilter method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
// Setup all the inputs
final ProgressLogger progress = new ProgressLogger(logger, 10000000, "Processed", "loci");
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
// Load up the reference sequence and double check sequence dictionaries
if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), refWalker.getSequenceDictionary());
}
final SamLocusIterator iterator = new SamLocusIterator(in);
final List<SamRecordFilter> filters = new ArrayList<>();
final CountingFilter dupeFilter = new CountingDuplicateFilter();
final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
final CountingPairedFilter pairFilter = new CountingPairedFilter();
filters.add(mapqFilter);
filters.add(dupeFilter);
filters.add(pairFilter);
// Not a counting filter because we never want to count reads twice
filters.add(new SecondaryAlignmentFilter());
iterator.setSamFilters(filters);
iterator.setEmitUncoveredLoci(true);
// Handled separately because we want to count bases
iterator.setMappingQualityScoreCutoff(0);
// Handled separately because we want to count bases
iterator.setQualityScoreCutoff(0);
iterator.setIncludeNonPfReads(false);
final int max = COVERAGE_CAP;
final long[] HistogramArray = new long[max + 1];
final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
final boolean usingStopAfter = STOP_AFTER > 0;
final long stopAfter = STOP_AFTER - 1;
long counter = 0;
long basesExcludedByBaseq = 0;
long basesExcludedByOverlap = 0;
long basesExcludedByCapping = 0;
// Loop through all the loci
while (iterator.hasNext()) {
final SamLocusIterator.LocusInfo info = iterator.next();
// Check that the reference is not N
final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
final byte base = ref.getBases()[info.getPosition() - 1];
if (base == 'N')
continue;
// Figure out the coverage while not counting overlapping reads twice, and excluding various things
final Set<String> readNames = new HashSet<>(info.getRecordAndOffsets().size());
int pileupSize = 0;
for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) {
if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) {
++basesExcludedByBaseq;
continue;
}
if (!readNames.add(recs.getRecord().getReadName())) {
++basesExcludedByOverlap;
continue;
}
pileupSize++;
if (pileupSize <= max) {
baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
}
}
final int depth = Math.min(readNames.size(), max);
if (depth < readNames.size())
basesExcludedByCapping += readNames.size() - max;
HistogramArray[depth]++;
// Record progress and perhaps stop
progress.record(info.getSequenceName(), info.getPosition());
if (usingStopAfter && ++counter > stopAfter)
break;
}
// Construct and write the outputs
final Histogram<Integer> histo = new Histogram<>("coverage", "count");
for (int i = 0; i < HistogramArray.length; ++i) {
histo.increment(i, HistogramArray[i]);
}
// Construct and write the outputs
final Histogram<Integer> baseQHisto = new Histogram<>("value", "baseq_count");
for (int i = 0; i < baseQHistogramArray.length; ++i) {
baseQHisto.increment(i, baseQHistogramArray[i]);
}
final WgsMetrics metrics = generateWgsMetrics();
metrics.GENOME_TERRITORY = (long) histo.getSumOfValues();
metrics.MEAN_COVERAGE = histo.getMean();
metrics.SD_COVERAGE = histo.getStandardDeviation();
metrics.MEDIAN_COVERAGE = histo.getMedian();
metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation();
final long basesExcludedByDupes = dupeFilter.getFilteredBases();
final long basesExcludedByMapq = mapqFilter.getFilteredBases();
final long basesExcludedByPairing = pairFilter.getFilteredBases();
final double total = histo.getSum();
final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping;
metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes;
metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes;
metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes;
metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;
metrics.PCT_5X = MathUtils.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_10X = MathUtils.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_15X = MathUtils.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_20X = MathUtils.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_25X = MathUtils.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_30X = MathUtils.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_40X = MathUtils.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_50X = MathUtils.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_60X = MathUtils.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_70X = MathUtils.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_80X = MathUtils.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_90X = MathUtils.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_100X = MathUtils.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
out.addMetric(metrics);
out.addHistogram(histo);
if (INCLUDE_BQ_HISTOGRAM) {
out.addHistogram(baseQHisto);
}
out.write(OUTPUT);
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class SamFormatConverter method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, reader.getFileHeader(), true)) {
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord rec : reader) {
writer.addAlignment(rec);
progress.record(rec);
}
}
CloserUtil.close(reader);
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class SamToFastq method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final Map<String, SAMRecord> firstSeenMates = new HashMap<>();
final FastqWriterFactory factory = new FastqWriterFactory();
factory.setCreateMd5(CREATE_MD5_FILE);
final Map<SAMReadGroupRecord, FastqWriters> writers = generateWriters(reader.getFileHeader().getReadGroups(), factory);
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord currentRecord : reader) {
if (currentRecord.isSecondaryOrSupplementary() && !INCLUDE_NON_PRIMARY_ALIGNMENTS)
continue;
// Skip non-PF reads as necessary
if (currentRecord.getReadFailsVendorQualityCheckFlag() && !INCLUDE_NON_PF_READS)
continue;
final FastqWriters fq = writers.get(currentRecord.getReadGroup());
if (currentRecord.getReadPairedFlag()) {
final String currentReadName = currentRecord.getReadName();
final SAMRecord firstRecord = firstSeenMates.remove(currentReadName);
if (firstRecord == null) {
firstSeenMates.put(currentReadName, currentRecord);
} else {
assertPairedMates(firstRecord, currentRecord);
final SAMRecord read1 = currentRecord.getFirstOfPairFlag() ? currentRecord : firstRecord;
final SAMRecord read2 = currentRecord.getFirstOfPairFlag() ? firstRecord : currentRecord;
writeRecord(read1, 1, fq.getFirstOfPair(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
final FastqWriter secondOfPairWriter = fq.getSecondOfPair();
if (secondOfPairWriter == null) {
throw new UserException("Input contains paired reads but no SECOND_END_FASTQ specified.");
}
writeRecord(read2, 2, secondOfPairWriter, READ2_TRIM, READ2_MAX_BASES_TO_WRITE);
}
} else {
writeRecord(currentRecord, null, fq.getUnpaired(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
}
progress.record(currentRecord);
}
CloserUtil.close(reader);
// Close all the fastq writers being careful to close each one only once!
for (final FastqWriters writerMapping : new HashSet<>(writers.values())) {
writerMapping.closeAll();
}
if (!firstSeenMates.isEmpty()) {
SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.MATE_NOT_FOUND, "Found " + firstSeenMates.size() + " unpaired mates", null), VALIDATION_STRINGENCY);
}
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class SortSam method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader reader = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY).referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
SAMFileHeader writeHeader = reader.getFileHeader().clone();
writeHeader.setSortOrder(SORT_ORDER);
try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, writeHeader, false)) {
writer.setProgressLogger(new ProgressLogger(logger, (int) 1e7, "Wrote", "records from a sorting collection"));
final ProgressLogger progress = new ProgressLogger(logger, (int) 1e7, "Read");
for (final SAMRecord rec : reader) {
writer.addAlignment(rec);
progress.record(rec);
}
logger.info("Finished reading inputs, merging and writing to output now.");
}
CloserUtil.close(reader);
return null;
}
use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.
the class CleanSam method doWork.
/**
* Do the work after command line has been parsed.
* RuntimeException may be thrown by this method, and are reported appropriately.
*/
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);
if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) {
factory.validationStringency(ValidationStringency.LENIENT);
}
final SamReader reader = factory.open(INPUT);
try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, reader.getFileHeader(), true);
final CloseableIterator<SAMRecord> it = reader.iterator()) {
final ProgressLogger progress = new ProgressLogger(logger);
// If the read (or its mate) maps off the end of the alignment, clip it
while (it.hasNext()) {
final SAMRecord rec = it.next();
// If the read (or its mate) maps off the end of the alignment, clip it
AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);
// check the read's mapping quality
if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
rec.setMappingQuality(0);
}
writer.addAlignment(rec);
progress.record(rec);
}
} finally {
CloserUtil.close(reader);
}
return null;
}
Aggregations