Search in sources :

Example 1 with InsertSizeFilter

use of htsjdk.samtools.filter.InsertSizeFilter in project gatk by broadinstitute.

the class CollectOxoGMetrics method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (INTERVALS != null)
        IOUtil.assertFileIsReadable(INTERVALS);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = SamReaderFactory.makeDefault().open(INPUT);
    final Set<String> samples = new HashSet<>();
    final Set<String> libraries = new HashSet<>();
    for (final SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
        samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
        libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
    }
    // Setup the calculators
    final Set<String> contexts = CONTEXTS.isEmpty() ? makeContextStrings(CONTEXT_SIZE) : CONTEXTS;
    final ListMap<String, Calculator> calculators = new ListMap<>();
    for (final String context : contexts) {
        for (final String library : libraries) {
            calculators.add(context, new Calculator(library, context));
        }
    }
    // Load up dbSNP if available
    logger.info("Loading dbSNP File: " + DB_SNP);
    final DbSnpBitSetUtil dbSnp;
    if (DB_SNP != null)
        dbSnp = new DbSnpBitSetUtil(DB_SNP, in.getFileHeader().getSequenceDictionary());
    else
        dbSnp = null;
    // Make an iterator that will filter out funny looking things
    final SamLocusIterator iterator;
    if (INTERVALS != null) {
        final IntervalList intervals = IntervalList.fromFile(INTERVALS);
        iterator = new SamLocusIterator(in, intervals.uniqued(), false);
    } else {
        iterator = new SamLocusIterator(in);
    }
    iterator.setEmitUncoveredLoci(false);
    iterator.setMappingQualityScoreCutoff(MINIMUM_MAPPING_QUALITY);
    final List<SamRecordFilter> filters = new ArrayList<>();
    filters.add(new NotPrimaryAlignmentFilter());
    filters.add(new DuplicateReadFilter());
    if (MINIMUM_INSERT_SIZE > 0 || MAXIMUM_INSERT_SIZE > 0) {
        filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, MAXIMUM_INSERT_SIZE));
    }
    iterator.setSamFilters(filters);
    logger.info("Starting iteration.");
    long nextLogTime = 0;
    int sites = 0;
    for (final SamLocusIterator.LocusInfo info : iterator) {
        // Skip dbSNP sites
        final String chrom = info.getSequenceName();
        final int pos = info.getPosition();
        final int index = pos - 1;
        if (dbSnp != null && dbSnp.isDbSnpSite(chrom, pos))
            continue;
        // Skip sites at the end of chromosomes 
        final byte[] bases = refWalker.get(info.getSequenceIndex()).getBases();
        if (pos < 3 || pos > bases.length - 3)
            continue;
        // Skip non C-G bases
        final byte base = StringUtil.toUpperCase(bases[index]);
        if (base != 'C' && base != 'G')
            continue;
        // Get the context string
        final String context;
        {
            final String tmp = StringUtil.bytesToString(bases, index - CONTEXT_SIZE, 1 + (2 * CONTEXT_SIZE)).toUpperCase();
            if (base == 'C')
                context = tmp;
            else
                /* if G */
                context = SequenceUtil.reverseComplement(tmp);
        }
        final List<Calculator> calculatorsForContext = calculators.get(context);
        // happens if we get ambiguous bases in the reference
        if (calculatorsForContext == null)
            continue;
        for (final Calculator calc : calculatorsForContext) calc.accept(info, base);
        // See if we need to stop
        if (++sites % 100 == 0) {
            final long now = System.currentTimeMillis();
            if (now > nextLogTime) {
                logger.info("Visited " + sites + " sites of interest. Last site: " + chrom + ":" + pos);
                nextLogTime = now + 60000;
            }
        }
        if (sites >= STOP_AFTER)
            break;
    }
    final MetricsFile<CpcgMetrics, Integer> file = getMetricsFile();
    for (final List<Calculator> calcs : calculators.values()) {
        for (final Calculator calc : calcs) {
            final CpcgMetrics m = calc.finish();
            m.SAMPLE_ALIAS = StringUtil.join(",", new ArrayList<>(samples));
            file.addMetric(m);
        }
    }
    file.write(OUTPUT);
    CloserUtil.close(in);
    return null;
}
Also used : SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) SamReader(htsjdk.samtools.SamReader) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) HashSet(java.util.HashSet) DbSnpBitSetUtil(org.broadinstitute.hellbender.utils.variant.DbSnpBitSetUtil) InsertSizeFilter(htsjdk.samtools.filter.InsertSizeFilter) NotPrimaryAlignmentFilter(htsjdk.samtools.filter.NotPrimaryAlignmentFilter)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 SamReader (htsjdk.samtools.SamReader)1 DuplicateReadFilter (htsjdk.samtools.filter.DuplicateReadFilter)1 InsertSizeFilter (htsjdk.samtools.filter.InsertSizeFilter)1 NotPrimaryAlignmentFilter (htsjdk.samtools.filter.NotPrimaryAlignmentFilter)1 SamRecordFilter (htsjdk.samtools.filter.SamRecordFilter)1 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 DbSnpBitSetUtil (org.broadinstitute.hellbender.utils.variant.DbSnpBitSetUtil)1