Search in sources :

Example 6 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class CollectSequencingArtifactMetrics method setup.

@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    preAdapterSummaryOut = new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT);
    preAdapterDetailsOut = new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT);
    baitBiasSummaryOut = new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT);
    baitBiasDetailsOut = new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT);
    IOUtil.assertFileIsWritable(preAdapterSummaryOut);
    IOUtil.assertFileIsWritable(preAdapterDetailsOut);
    IOUtil.assertFileIsWritable(baitBiasSummaryOut);
    IOUtil.assertFileIsWritable(baitBiasDetailsOut);
    for (final SAMReadGroupRecord rec : header.getReadGroups()) {
        samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
        libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
    }
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
        intervalMask = new IntervalListReferenceSequenceMask(IntervalList.fromFile(INTERVALS).uniqued());
    }
    if (DB_SNP != null) {
        IOUtil.assertFileIsReadable(DB_SNP);
        dbSnpMask = new DbSnpBitSetUtil(DB_SNP, header.getSequenceDictionary());
    }
    // set record-level filters
    final List<SamRecordFilter> filters = new ArrayList<>();
    filters.add(new FailsVendorReadQualityFilter());
    filters.add(new NotPrimaryAlignmentFilter());
    filters.add(new DuplicateReadFilter());
    // discard unmapped reads
    filters.add(new AlignedFilter(true));
    filters.add(new MappingQualityFilter(MINIMUM_MAPPING_QUALITY));
    if (!INCLUDE_UNPAIRED) {
        final int effectiveMaxInsertSize = (MAXIMUM_INSERT_SIZE == 0) ? Integer.MAX_VALUE : MAXIMUM_INSERT_SIZE;
        filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, effectiveMaxInsertSize));
    }
    recordFilter = new AggregateFilter(filters);
    // set up the artifact counters
    final String sampleAlias = StringUtil.join(",", new ArrayList<>(samples));
    for (final String library : libraries) {
        artifactCounters.put(library, new ArtifactCounter(sampleAlias, library, CONTEXT_SIZE, TANDEM_READS));
    }
}
Also used : SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) DbSnpBitSetUtil(org.broadinstitute.hellbender.utils.variant.DbSnpBitSetUtil) IntervalListReferenceSequenceMask(htsjdk.samtools.util.IntervalListReferenceSequenceMask) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File)

Example 7 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class CollectOxoGMetrics method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (INTERVALS != null)
        IOUtil.assertFileIsReadable(INTERVALS);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = SamReaderFactory.makeDefault().open(INPUT);
    final Set<String> samples = new HashSet<>();
    final Set<String> libraries = new HashSet<>();
    for (final SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
        samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
        libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
    }
    // Setup the calculators
    final Set<String> contexts = CONTEXTS.isEmpty() ? makeContextStrings(CONTEXT_SIZE) : CONTEXTS;
    final ListMap<String, Calculator> calculators = new ListMap<>();
    for (final String context : contexts) {
        for (final String library : libraries) {
            calculators.add(context, new Calculator(library, context));
        }
    }
    // Load up dbSNP if available
    logger.info("Loading dbSNP File: " + DB_SNP);
    final DbSnpBitSetUtil dbSnp;
    if (DB_SNP != null)
        dbSnp = new DbSnpBitSetUtil(DB_SNP, in.getFileHeader().getSequenceDictionary());
    else
        dbSnp = null;
    // Make an iterator that will filter out funny looking things
    final SamLocusIterator iterator;
    if (INTERVALS != null) {
        final IntervalList intervals = IntervalList.fromFile(INTERVALS);
        iterator = new SamLocusIterator(in, intervals.uniqued(), false);
    } else {
        iterator = new SamLocusIterator(in);
    }
    iterator.setEmitUncoveredLoci(false);
    iterator.setMappingQualityScoreCutoff(MINIMUM_MAPPING_QUALITY);
    final List<SamRecordFilter> filters = new ArrayList<>();
    filters.add(new NotPrimaryAlignmentFilter());
    filters.add(new DuplicateReadFilter());
    if (MINIMUM_INSERT_SIZE > 0 || MAXIMUM_INSERT_SIZE > 0) {
        filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, MAXIMUM_INSERT_SIZE));
    }
    iterator.setSamFilters(filters);
    logger.info("Starting iteration.");
    long nextLogTime = 0;
    int sites = 0;
    for (final SamLocusIterator.LocusInfo info : iterator) {
        // Skip dbSNP sites
        final String chrom = info.getSequenceName();
        final int pos = info.getPosition();
        final int index = pos - 1;
        if (dbSnp != null && dbSnp.isDbSnpSite(chrom, pos))
            continue;
        // Skip sites at the end of chromosomes 
        final byte[] bases = refWalker.get(info.getSequenceIndex()).getBases();
        if (pos < 3 || pos > bases.length - 3)
            continue;
        // Skip non C-G bases
        final byte base = StringUtil.toUpperCase(bases[index]);
        if (base != 'C' && base != 'G')
            continue;
        // Get the context string
        final String context;
        {
            final String tmp = StringUtil.bytesToString(bases, index - CONTEXT_SIZE, 1 + (2 * CONTEXT_SIZE)).toUpperCase();
            if (base == 'C')
                context = tmp;
            else
                /* if G */
                context = SequenceUtil.reverseComplement(tmp);
        }
        final List<Calculator> calculatorsForContext = calculators.get(context);
        // happens if we get ambiguous bases in the reference
        if (calculatorsForContext == null)
            continue;
        for (final Calculator calc : calculatorsForContext) calc.accept(info, base);
        // See if we need to stop
        if (++sites % 100 == 0) {
            final long now = System.currentTimeMillis();
            if (now > nextLogTime) {
                logger.info("Visited " + sites + " sites of interest. Last site: " + chrom + ":" + pos);
                nextLogTime = now + 60000;
            }
        }
        if (sites >= STOP_AFTER)
            break;
    }
    final MetricsFile<CpcgMetrics, Integer> file = getMetricsFile();
    for (final List<Calculator> calcs : calculators.values()) {
        for (final Calculator calc : calcs) {
            final CpcgMetrics m = calc.finish();
            m.SAMPLE_ALIAS = StringUtil.join(",", new ArrayList<>(samples));
            file.addMetric(m);
        }
    }
    file.write(OUTPUT);
    CloserUtil.close(in);
    return null;
}
Also used : SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) SamReader(htsjdk.samtools.SamReader) DuplicateReadFilter(htsjdk.samtools.filter.DuplicateReadFilter) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) HashSet(java.util.HashSet) DbSnpBitSetUtil(org.broadinstitute.hellbender.utils.variant.DbSnpBitSetUtil) InsertSizeFilter(htsjdk.samtools.filter.InsertSizeFilter) NotPrimaryAlignmentFilter(htsjdk.samtools.filter.NotPrimaryAlignmentFilter)

Example 8 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class NGSPlatformUnitTest method testPLFromReadWithRGButNoPL.

@Test()
public void testPLFromReadWithRGButNoPL() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
    final String rgID = "ID";
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(rgID);
    header.addReadGroup(rg);
    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, 1, 10);
    read.setAttribute("RG", rgID);
    Assert.assertEquals(NGSPlatform.fromRead(read, header), NGSPlatform.UNKNOWN);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 9 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class ReadGroupCovariateUnitTest method testMissingPlatformUnit.

@Test
public void testMissingPlatformUnit() {
    final String expected = "MY.7";
    final ReadGroupCovariate covariate = new ReadGroupCovariate(new RecalibrationArgumentCollection(), Arrays.asList(expected));
    SAMReadGroupRecord rg = new SAMReadGroupRecord(expected);
    runTest(rg, expected, covariate);
}
Also used : RecalibrationArgumentCollection(org.broadinstitute.hellbender.utils.recalibration.RecalibrationArgumentCollection) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Test(org.testng.annotations.Test)

Example 10 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.

the class ReadGroupCovariateUnitTest method testMaxValue.

@Test
public void testMaxValue() {
    final String id = "MY.ID";
    final String expected = "SAMPLE.1";
    final ReadGroupCovariate covariate = new ReadGroupCovariate(new RecalibrationArgumentCollection(), Arrays.asList(expected));
    SAMReadGroupRecord rg = new SAMReadGroupRecord(id);
    rg.setPlatformUnit(expected);
    //there's just 1 read group, so 0 is the max value
    Assert.assertEquals(covariate.maximumKeyValue(), 0);
}
Also used : RecalibrationArgumentCollection(org.broadinstitute.hellbender.utils.recalibration.RecalibrationArgumentCollection) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Test(org.testng.annotations.Test)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)81 SAMFileHeader (htsjdk.samtools.SAMFileHeader)48 SAMRecord (htsjdk.samtools.SAMRecord)33 Test (org.testng.annotations.Test)31 SamReader (htsjdk.samtools.SamReader)29 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)26 File (java.io.File)23 ArrayList (java.util.ArrayList)22 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)20 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)20 HashMap (java.util.HashMap)18 CigarElement (htsjdk.samtools.CigarElement)17 Cigar (htsjdk.samtools.Cigar)16 HashSet (java.util.HashSet)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 CigarOperator (htsjdk.samtools.CigarOperator)14 IOException (java.io.IOException)14 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)13 List (java.util.List)12