Search in sources :

Example 56 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk-protected by broadinstitute.

the class ReadThreadingGraphUnitTest method testNsInReadsAreNotUsedForGraph.

@Test(enabled = !DEBUG)
public void testNsInReadsAreNotUsedForGraph() {
    final int length = 100;
    final byte[] ref = Utils.dupBytes((byte) 'A', length);
    final ReadThreadingGraph rtgraph = new ReadThreadingGraph(25);
    rtgraph.addSequence("ref", ref, true);
    // add reads with Ns at any position
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    for (int i = 0; i < length; i++) {
        final byte[] bases = ref.clone();
        bases[i] = 'N';
        final GATKRead read = ArtificialReadUtils.createArtificialRead(bases, Utils.dupBytes((byte) 30, length), length + "M");
        rtgraph.addRead(read, header);
    }
    rtgraph.buildGraphIfNecessary();
    final SeqGraph graph = rtgraph.toSequenceGraph();
    Assert.assertEquals(new KBestHaplotypeFinder(graph, graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex()).size(), 1);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SeqGraph(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph) KBestHaplotypeFinder(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.KBestHaplotypeFinder) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 57 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk-protected by broadinstitute.

the class ReadThreadingGraphUnitTest method testCyclesInGraph.

@Test(enabled = !DEBUG)
public void testCyclesInGraph() {
    // b37 20:12655200-12655850
    final String ref = "CAATTGTCATAGAGAGTGACAAATGTTTCAAAAGCTTATTGACCCCAAGGTGCAGCGGTGCACATTAGAGGGCACCTAAGACAGCCTACAGGGGTCAGAAAAGATGTCTCAGAGGGACTCACACCTGAGCTGAGTTGTGAAGGAAGAGCAGGATAGAATGAGCCAAAGATAAAGACTCCAGGCAAAAGCAAATGAGCCTGAGGGAAACTGGAGCCAAGGCAAGAGCAGCAGAAAAGAGCAAAGCCAGCCGGTGGTCAAGGTGGGCTACTGTGTATGCAGAATGAGGAAGCTGGCCAAGTAGACATGTTTCAGATGATGAACATCCTGTATACTAGATGCATTGGAACTTTTTTCATCCCCTCAACTCCACCAAGCCTCTGTCCACTCTTGGTACCTCTCTCCAAGTAGACATATTTCAGATCATGAACATCCTGTGTACTAGATGCATTGGAAATTTTTTCATCCCCTCAACTCCACCCAGCCTCTGTCCACACTTGGTACCTCTCTCTATTCATATCTCTGGCCTCAAGGAGGGTATTTGGCATTAGTAAATAAATTCCAGAGATACTAAAGTCAGATTTTCTAAGACTGGGTGAATGACTCCATGGAAGAAGTGAAAAAGAGGAAGTTGTAATAGGGAGACCTCTTCGG";
    // SNP at 20:12655528 creates a cycle for small kmers
    final String alt = "CAATTGTCATAGAGAGTGACAAATGTTTCAAAAGCTTATTGACCCCAAGGTGCAGCGGTGCACATTAGAGGGCACCTAAGACAGCCTACAGGGGTCAGAAAAGATGTCTCAGAGGGACTCACACCTGAGCTGAGTTGTGAAGGAAGAGCAGGATAGAATGAGCCAAAGATAAAGACTCCAGGCAAAAGCAAATGAGCCTGAGGGAAACTGGAGCCAAGGCAAGAGCAGCAGAAAAGAGCAAAGCCAGCCGGTGGTCAAGGTGGGCTACTGTGTATGCAGAATGAGGAAGCTGGCCAAGTAGACATGTTTCAGATGATGAACATCCTGTGTACTAGATGCATTGGAACTTTTTTCATCCCCTCAACTCCACCAAGCCTCTGTCCACTCTTGGTACCTCTCTCCAAGTAGACATATTTCAGATCATGAACATCCTGTGTACTAGATGCATTGGAAATTTTTTCATCCCCTCAACTCCACCCAGCCTCTGTCCACACTTGGTACCTCTCTCTATTCATATCTCTGGCCTCAAGGAGGGTATTTGGCATTAGTAAATAAATTCCAGAGATACTAAAGTCAGATTTTCTAAGACTGGGTGAATGACTCCATGGAAGAAGTGAAAAAGAGGAAGTTGTAATAGGGAGACCTCTTCGG";
    final List<GATKRead> reads = new ArrayList<>();
    for (int index = 0; index < alt.length() - 100; index += 20) {
        reads.add(ArtificialReadUtils.createArtificialRead(Arrays.copyOfRange(alt.getBytes(), index, index + 100), Utils.dupBytes((byte) 30, 100), 100 + "M"));
    }
    // test that there are cycles detected for small kmer
    final ReadThreadingGraph rtgraph25 = new ReadThreadingGraph(25);
    rtgraph25.addSequence("ref", ref.getBytes(), true);
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    for (final GATKRead read : reads) {
        rtgraph25.addRead(read, header);
    }
    rtgraph25.buildGraphIfNecessary();
    Assert.assertTrue(rtgraph25.hasCycles());
    // test that there are no cycles detected for large kmer
    final ReadThreadingGraph rtgraph75 = new ReadThreadingGraph(75);
    rtgraph75.addSequence("ref", ref.getBytes(), true);
    for (final GATKRead read : reads) {
        rtgraph75.addRead(read, header);
    }
    rtgraph75.buildGraphIfNecessary();
    Assert.assertFalse(rtgraph75.hasCycles());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 58 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class LocusWalker method traverse.

/**
     * Implementation of locus-based traversal.
     * Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
     *
     * The default implementation iterates over all positions in the reference covered by reads (filtered and transformed)
     * for all samples in the read groups, using the downsampling method provided by {@link #getDownsamplingInfo()}
     * and including deletions only if {@link #includeDeletions()} returns {@code true}.
     */
@Override
public void traverse() {
    final SAMFileHeader header = getHeaderForReads();
    // get the samples from the read groups
    final Set<String> samples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
    final CountingReadFilter countedFilter = makeReadFilter();
    // get the filter and transformed iterator
    final Iterator<GATKRead> readIterator = getTransformedReadStream(countedFilter).iterator();
    // get the LIBS
    final LocusIteratorByState libs = new LocusIteratorByState(readIterator, getDownsamplingInfo(), keepUniqueReadListInLibs(), samples, header, includeDeletions(), includeNs());
    final Iterator<AlignmentContext> iterator = createAlignmentContextIterator(header, libs);
    // iterate over each alignment, and apply the function
    final Spliterator<AlignmentContext> spliterator = Spliterators.spliteratorUnknownSize(iterator, 0);
    StreamSupport.stream(spliterator, false).forEach(alignmentContext -> {
        final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
        apply(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(features, alignmentInterval));
        progressMeter.update(alignmentInterval);
    });
    logger.info(countedFilter.getSummaryLine());
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) LocusIteratorByState(org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState) CountingReadFilter(org.broadinstitute.hellbender.engine.filters.CountingReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 59 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReplaceSamHeader method doWork.

/**
     * Do the work after command line has been parsed.
     * RuntimeException may be thrown by this method, and are reported appropriately.
     *
     * @return program exit status.
     */
@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(HEADER);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SAMFileHeader replacementHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(HEADER);
    if (BamFileIoUtils.isBamFile(INPUT)) {
        blockCopyReheader(replacementHeader);
    } else {
        standardReheader(replacementHeader);
    }
    return null;
}
Also used : SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 60 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class RevertOriginalBaseQualitiesAndAddMateCigar method doWork.

@Override
public Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    boolean foundPairedMappedReads = false;
    // Check if we can skip this file since it does not have OQ tags and the mate cigar tag is already there.
    final CanSkipSamFile skipSamFile = RevertOriginalBaseQualitiesAndAddMateCigar.canSkipSAMFile(INPUT, MAX_RECORDS_TO_EXAMINE, RESTORE_ORIGINAL_QUALITIES, REFERENCE_SEQUENCE);
    logger.info(skipSamFile.getMessage(MAX_RECORDS_TO_EXAMINE));
    if (skipSamFile.canSkip())
        return null;
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).enable(SamReaderFactory.Option.EAGERLY_DECODE).open(INPUT);
    final SAMFileHeader inHeader = in.getFileHeader();
    // Build the output writer based on the correct sort order
    final SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(inHeader);
    // same as the input
    if (null == SORT_ORDER)
        this.SORT_ORDER = inHeader.getSortOrder();
    outHeader.setSortOrder(SORT_ORDER);
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
        // Iterate over the records, revert original base qualities, and push them into a SortingCollection by queryname
        final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
        final ProgressLogger revertingProgress = new ProgressLogger(logger, 1000000, " reverted OQs");
        int numOriginalQualitiesRestored = 0;
        for (final SAMRecord record : in) {
            // Clean up reads that map off the end of the reference
            AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(record);
            if (RESTORE_ORIGINAL_QUALITIES && null != record.getOriginalBaseQualities()) {
                // revert the original base qualities
                record.setBaseQualities(record.getOriginalBaseQualities());
                record.setOriginalBaseQualities(null);
                numOriginalQualitiesRestored++;
            }
            if (!foundPairedMappedReads && record.getReadPairedFlag() && !record.getReadUnmappedFlag())
                foundPairedMappedReads = true;
            revertingProgress.record(record);
            sorter.add(record);
        }
        CloserUtil.close(in);
        logger.info("Reverted the original base qualities for " + numOriginalQualitiesRestored + " records");
        /**
             * Iterator through sorting collection output
             * 1. Set mate cigar string and mate information
             * 2. push record into SAMFileWriter to the output
             */
        try (final SamPairUtil.SetMateInfoIterator sorterIterator = new SamPairUtil.SetMateInfoIterator(sorter.iterator(), true)) {
            final ProgressLogger sorterProgress = new ProgressLogger(logger, 1000000, " mate cigars added");
            while (sorterIterator.hasNext()) {
                final SAMRecord record = sorterIterator.next();
                out.addAlignment(record);
                sorterProgress.record(record);
            }
            CloserUtil.close(out);
            logger.info("Updated " + sorterIterator.getNumMateCigarsAdded() + " records with mate cigar");
            if (!foundPairedMappedReads)
                logger.info("Did not find any paired mapped reads.");
        }
    }
    return null;
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamPairUtil(htsjdk.samtools.SamPairUtil) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMRecord(htsjdk.samtools.SAMRecord) SAMRecordQueryNameComparator(htsjdk.samtools.SAMRecordQueryNameComparator) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SAMFileHeader (htsjdk.samtools.SAMFileHeader)148 Test (org.testng.annotations.Test)89 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)85 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)71 File (java.io.File)23 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)17 DataProvider (org.testng.annotations.DataProvider)17 java.util (java.util)15 UserException (org.broadinstitute.hellbender.exceptions.UserException)15 ArrayList (java.util.ArrayList)14 List (java.util.List)12 Collectors (java.util.stream.Collectors)12 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)12 SAMRecord (htsjdk.samtools.SAMRecord)11 Locatable (htsjdk.samtools.util.Locatable)11 BeforeClass (org.testng.annotations.BeforeClass)11 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)10