Search in sources :

Example 11 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class CollectJumpingLibraryMetrics method doWork.

/**
     * Calculates the detailed statistics about the jumping library and then generates the results.
     */
@Override
protected Object doWork() {
    for (File f : INPUT) {
        IOUtil.assertFileIsReadable(f);
    }
    IOUtil.assertFileIsWritable(OUTPUT);
    Histogram<Integer> innieHistogram = new Histogram<>();
    Histogram<Integer> outieHistogram = new Histogram<>();
    int fragments = 0;
    int innies = 0;
    int outies = 0;
    int innieDupes = 0;
    int outieDupes = 0;
    int crossChromPairs = 0;
    int superSized = 0;
    int tandemPairs = 0;
    double chimeraSizeMinimum = Math.max(getOutieMode(), (double) CHIMERA_KB_MIN);
    for (File f : INPUT) {
        SamReader reader = SamReaderFactory.makeDefault().open(f);
        if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new UserException("SAM file must " + f.getName() + " must be sorted in coordinate order");
        }
        for (SAMRecord sam : reader) {
            // We're getting all our info from the first of each pair.
            if (!sam.getFirstOfPairFlag()) {
                continue;
            }
            // Ignore unmapped read pairs
            if (sam.getReadUnmappedFlag()) {
                if (!sam.getMateUnmappedFlag()) {
                    fragments++;
                    continue;
                }
                // If both ends are unmapped and we've hit unaligned reads we're done
                if (sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                    break;
                }
                continue;
            }
            if (sam.getMateUnmappedFlag()) {
                fragments++;
                continue;
            }
            // Ignore low-quality reads.  If we don't have the mate mapping quality, assume it's OK
            if ((sam.getAttribute(SAMTag.MQ.name()) != null && sam.getIntegerAttribute(SAMTag.MQ.name()) < MINIMUM_MAPPING_QUALITY) || sam.getMappingQuality() < MINIMUM_MAPPING_QUALITY) {
                continue;
            }
            final int absInsertSize = Math.abs(sam.getInferredInsertSize());
            if (absInsertSize > chimeraSizeMinimum) {
                superSized++;
            } else if (sam.getMateNegativeStrandFlag() == sam.getReadNegativeStrandFlag()) {
                tandemPairs++;
            } else if (!sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                crossChromPairs++;
            } else {
                final PairOrientation pairOrientation = SamPairUtil.getPairOrientation(sam);
                if (pairOrientation == PairOrientation.RF) {
                    outieHistogram.increment(absInsertSize);
                    outies++;
                    if (sam.getDuplicateReadFlag()) {
                        outieDupes++;
                    }
                } else if (pairOrientation == PairOrientation.FR) {
                    innieHistogram.increment(absInsertSize);
                    innies++;
                    if (sam.getDuplicateReadFlag()) {
                        innieDupes++;
                    }
                } else {
                    throw new IllegalStateException("This should never happen");
                }
            }
        }
        CloserUtil.close(reader);
    }
    MetricsFile<JumpingLibraryMetrics, Integer> metricsFile = getMetricsFile();
    JumpingLibraryMetrics metrics = new JumpingLibraryMetrics();
    metrics.JUMP_PAIRS = outies;
    metrics.JUMP_DUPLICATE_PAIRS = outieDupes;
    metrics.JUMP_DUPLICATE_PCT = outies != 0 ? outieDupes / (double) outies : 0;
    metrics.JUMP_LIBRARY_SIZE = (outies > 0 && outieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(outies, outies - outieDupes) : 0;
    outieHistogram.trimByTailLimit(TAIL_LIMIT);
    metrics.JUMP_MEAN_INSERT_SIZE = outieHistogram.getMean();
    metrics.JUMP_STDEV_INSERT_SIZE = outieHistogram.getStandardDeviation();
    metrics.NONJUMP_PAIRS = innies;
    metrics.NONJUMP_DUPLICATE_PAIRS = innieDupes;
    metrics.NONJUMP_DUPLICATE_PCT = innies != 0 ? innieDupes / (double) innies : 0;
    metrics.NONJUMP_LIBRARY_SIZE = (innies > 0 && innieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(innies, innies - innieDupes) : 0;
    innieHistogram.trimByTailLimit(TAIL_LIMIT);
    metrics.NONJUMP_MEAN_INSERT_SIZE = innieHistogram.getMean();
    metrics.NONJUMP_STDEV_INSERT_SIZE = innieHistogram.getStandardDeviation();
    metrics.CHIMERIC_PAIRS = crossChromPairs + superSized + tandemPairs;
    metrics.FRAGMENTS = fragments;
    double totalPairs = outies + innies + metrics.CHIMERIC_PAIRS;
    metrics.PCT_JUMPS = totalPairs != 0 ? outies / totalPairs : 0;
    metrics.PCT_NONJUMPS = totalPairs != 0 ? innies / totalPairs : 0;
    metrics.PCT_CHIMERAS = totalPairs != 0 ? metrics.CHIMERIC_PAIRS / totalPairs : 0;
    metricsFile.addMetric(metrics);
    metricsFile.write(OUTPUT);
    return null;
}
Also used : Histogram(htsjdk.samtools.util.Histogram) PairOrientation(htsjdk.samtools.SamPairUtil.PairOrientation) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 12 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class CollectOxoGMetrics method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (INTERVALS != null)
        IOUtil.assertFileIsReadable(INTERVALS);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = SamReaderFactory.makeDefault().open(INPUT);
    final Set<String> samples = new HashSet<>();
    final Set<String> libraries = new HashSet<>();
    for (final SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
        samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
        libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
    }
    // Setup the calculators
    final Set<String> contexts = CONTEXTS.isEmpty() ? makeContextStrings(CONTEXT_SIZE) : CONTEXTS;
    final ListMap<String, Calculator> calculators = new ListMap<>();
    for (final String context : contexts) {
        for (final String library : libraries) {
            calculators.add(context, new Calculator(library, context));
        }
    }
    // Load up dbSNP if available
    logger.info("Loading dbSNP File: " + DB_SNP);
    final DbSnpBitSetUtil dbSnp;
    if (DB_SNP != null)
        dbSnp = new DbSnpBitSetUtil(DB_SNP, in.getFileHeader().getSequenceDictionary());
    else
        dbSnp = null;
    // Make an iterator that will filter out funny looking things
    final SamLocusIterator iterator;
    if (INTERVALS != null) {
        final IntervalList intervals = IntervalList.fromFile(INTERVALS);
        iterator = new SamLocusIterator(in, intervals.uniqued(), false);
    } else {
        iterator = new SamLocusIterator(in);
    }
    iterator.setEmitUncoveredLoci(false);
    iterator.setMappingQualityScoreCutoff(MINIMUM_MAPPING_QUALITY);
    final List<SamRecordFilter> filters = new ArrayList<>();
    filters.add(new NotPrimaryAlignmentFilter());
    filters.add(new DuplicateReadFilter());
    if (MINIMUM_INSERT_SIZE > 0 || MAXIMUM_INSERT_SIZE > 0) {
        filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, MAXIMUM_INSERT_SIZE));
    }
    iterator.setSamFilters(filters);
    logger.info("Starting iteration.");
    long nextLogTime = 0;
    int sites = 0;
    for (final SamLocusIterator.LocusInfo info : iterator) {
        // Skip dbSNP sites
        final String chrom = info.getSequenceName();
        final int pos = info.getPosition();
        final int index = pos - 1;
        if (dbSnp != null && dbSnp.isDbSnpSite(chrom, pos))
            continue;
        // Skip sites at the end of chromosomes 
        final byte[] bases = refWalker.get(info.getSequenceIndex()).getBases();
        if (pos < 3 || pos > bases.length - 3)
            continue;
        // Skip non C-G bases
        final byte base = StringUtil.toUpperCase(bases[index]);
        if (base != 'C' && base != 'G')
            continue;
        // Get the context string
        final String context;
        {
            final String tmp = StringUtil.bytesToString(bases, index - CONTEXT_SIZE, 1 + (2 * CONTEXT_SIZE)).toUpperCase();
            if (base == 'C')
                context = tmp;
            else
                /* if G */
                context = SequenceUtil.reverseComplement(tmp);
        }
        final List<Calculator> calculatorsForContext = calculators.get(context);
        // happens if we get ambiguous bases in the reference
        if (calculatorsForContext == null)
            continue;
        for (final Calculator calc : calculatorsForContext) calc.accept(info, base);
        // See if we need to stop
        if (++sites % 100 == 0) {
            final long now = System.currentTimeMillis();
            if (now > nextLogTime) {
                logger.info("Visited " + sites + " sites of interest. Last site: " + chrom + ":" + pos);
                nextLogTime = now + 60000;
            }
        }
        if (sites >= STOP_AFTER)
            break;
    }
    final MetricsFile<CpcgMetrics, Integer> file = getMetricsFile();
    for (final List<Calculator> calcs : calculators.values()) {
        for (final Calculator calc : calcs) {
            final CpcgMetrics m = calc.finish();
            m.SAMPLE_ALIAS = StringUtil.join(",", new ArrayList<>(samples));
            file.addMetric(m);
        }
    }
    file.write(OUTPUT);
    CloserUtil.close(in);
    return null;
}
Also used : SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) SamReader(htsjdk.samtools.SamReader) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) HashSet(java.util.HashSet) DbSnpBitSetUtil(org.broadinstitute.hellbender.utils.variant.DbSnpBitSetUtil) InsertSizeFilter(htsjdk.samtools.filter.InsertSizeFilter) NotPrimaryAlignmentFilter(htsjdk.samtools.filter.NotPrimaryAlignmentFilter)

Example 13 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class BuildBamIndex method doWork.

/**
     * Main method for the program.  Checks that all input files are present and
     * readable and that the output file can be written to.  Then iterates through
     * all the records generating a BAM Index, then writes the bai file.
     */
@Override
protected Object doWork() {
    try {
        inputUrl = new URL(INPUT);
    } catch (MalformedURLException e) {
        inputFile = new File(INPUT);
    }
    // set default output file - input-file.bai
    if (OUTPUT == null) {
        final String baseFileName;
        if (inputUrl != null) {
            final String path = inputUrl.getPath();
            final int lastSlash = path.lastIndexOf('/');
            baseFileName = path.substring(lastSlash + 1, path.length());
        } else {
            baseFileName = inputFile.getAbsolutePath();
        }
        if (baseFileName.endsWith(BamFileIoUtils.BAM_FILE_EXTENSION)) {
            final int index = baseFileName.lastIndexOf('.');
            OUTPUT = new File(baseFileName.substring(0, index) + BAMIndex.BAMIndexSuffix);
        } else {
            OUTPUT = new File(baseFileName + BAMIndex.BAMIndexSuffix);
        }
    }
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader bam;
    if (inputUrl != null) {
        // remote input
        bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).disable(SamReaderFactory.Option.EAGERLY_DECODE).enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS).open(SamInputResource.of(inputUrl));
    } else {
        // input from a normal file
        IOUtil.assertFileIsReadable(inputFile);
        bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS).open(inputFile);
    }
    if (bam.type() != SamReader.Type.BAM_TYPE) {
        throw new SAMException("Input file must be bam file, not sam file.");
    }
    if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
        throw new SAMException("Input bam file must be sorted by coordinates");
    }
    BAMIndexer.createIndex(bam, OUTPUT);
    logger.info("Successfully wrote bam index file " + OUTPUT);
    CloserUtil.close(bam);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) MalformedURLException(java.net.MalformedURLException) SAMException(htsjdk.samtools.SAMException) File(java.io.File) URL(java.net.URL)

Example 14 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class CollectJumpingLibraryMetrics method getOutieMode.

/**
     * Calculates the mode for outward-facing pairs, using the first SAMPLE_FOR_MODE
     * outward-facing pairs found in INPUT
     */
private double getOutieMode() {
    int samplePerFile = SAMPLE_FOR_MODE / INPUT.size();
    Histogram<Integer> histo = new Histogram<>();
    for (File f : INPUT) {
        SamReader reader = SamReaderFactory.makeDefault().open(f);
        int sampled = 0;
        for (Iterator<SAMRecord> it = reader.iterator(); it.hasNext() && sampled < samplePerFile; ) {
            SAMRecord sam = it.next();
            if (!sam.getFirstOfPairFlag()) {
                continue;
            }
            // If we get here we've hit the end of the aligned reads
            if (sam.getReadUnmappedFlag() && sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                break;
            } else if (sam.getReadUnmappedFlag() || sam.getMateUnmappedFlag()) {
                continue;
            } else {
                if ((sam.getAttribute(SAMTag.MQ.name()) == null || sam.getIntegerAttribute(SAMTag.MQ.name()) >= MINIMUM_MAPPING_QUALITY) && sam.getMappingQuality() >= MINIMUM_MAPPING_QUALITY && sam.getMateNegativeStrandFlag() != sam.getReadNegativeStrandFlag() && sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                    if (SamPairUtil.getPairOrientation(sam) == PairOrientation.RF) {
                        histo.increment(Math.abs(sam.getInferredInsertSize()));
                        sampled++;
                    }
                }
            }
        }
        CloserUtil.close(reader);
    }
    return histo.size() > 0 ? histo.getMode() : 0;
}
Also used : SamReader(htsjdk.samtools.SamReader) Histogram(htsjdk.samtools.util.Histogram) SAMRecord(htsjdk.samtools.SAMRecord) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 15 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class GATKToolUnitTest method testBestSequenceDictionary_fromReads.

@Test
public void testBestSequenceDictionary_fromReads() throws Exception {
    final GATKTool tool = new TestGATKToolWithReads();
    final CommandLineParser clp = new CommandLineArgumentParser(tool);
    final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
    final String[] args = { "-I", bamFile.getCanonicalPath() };
    clp.parseArguments(System.out, args);
    tool.onStartup();
    //read the dict back in and compare to bam dict
    final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
    try (final SamReader open = SamReaderFactory.makeDefault().open(bamFile)) {
        final SAMSequenceDictionary bamDict = open.getFileHeader().getSequenceDictionary();
        toolDict.assertSameDictionary(bamDict);
        bamDict.assertSameDictionary(toolDict);
        Assert.assertEquals(toolDict, bamDict);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) CommandLineArgumentParser(org.broadinstitute.barclay.argparser.CommandLineArgumentParser) CommandLineParser(org.broadinstitute.barclay.argparser.CommandLineParser) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26