Search in sources :

Example 21 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class FixMateInformation method doWork.

@Override
protected Object doWork() {
    // Open up the input
    boolean allQueryNameSorted = true;
    final List<SamReader> readers = new ArrayList<>();
    for (final File f : INPUT) {
        IOUtil.assertFileIsReadable(f);
        final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(f);
        readers.add(reader);
        if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.queryname)
            allQueryNameSorted = false;
    }
    // or into a temporary file that will overwrite the INPUT file eventually
    if (OUTPUT != null)
        OUTPUT = OUTPUT.getAbsoluteFile();
    final boolean differentOutputSpecified = OUTPUT != null;
    if (differentOutputSpecified) {
        IOUtil.assertFileIsWritable(OUTPUT);
    } else if (INPUT.size() != 1) {
        throw new UserException("Must specify either an explicit OUTPUT file or a single INPUT file to be overridden.");
    } else {
        final File soleInput = INPUT.get(0).getAbsoluteFile();
        final File dir = soleInput.getParentFile().getAbsoluteFile();
        try {
            IOUtil.assertFileIsWritable(soleInput);
            IOUtil.assertDirectoryIsWritable(dir);
            OUTPUT = File.createTempFile(soleInput.getName() + ".being_fixed.", BamFileIoUtils.BAM_FILE_EXTENSION, dir);
        } catch (final IOException ioe) {
            throw new RuntimeIOException("Could not create tmp file in " + dir.getAbsolutePath());
        }
    }
    // Get the input records merged and sorted by query name as needed
    final PeekableIterator<SAMRecord> iterator;
    final SAMFileHeader header;
    {
        // Deal with merging if necessary
        final Iterator<SAMRecord> tmp;
        if (INPUT.size() > 1) {
            final List<SAMFileHeader> headers = new ArrayList<>(readers.size());
            for (final SamReader reader : readers) {
                headers.add(reader.getFileHeader());
            }
            final SAMFileHeader.SortOrder sortOrder = (allQueryNameSorted ? SAMFileHeader.SortOrder.queryname : SAMFileHeader.SortOrder.unsorted);
            final SamFileHeaderMerger merger = new SamFileHeaderMerger(sortOrder, headers, false);
            tmp = new MergingSamRecordIterator(merger, readers, false);
            header = merger.getMergedHeader();
        } else {
            tmp = readers.get(0).iterator();
            header = readers.get(0).getFileHeader();
        }
        // And now deal with re-sorting if necessary
        if (ASSUME_SORTED || allQueryNameSorted) {
            iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<>(tmp), ADD_MATE_CIGAR);
        } else {
            logger.info("Sorting input into queryname order.");
            final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
            while (tmp.hasNext()) {
                sorter.add(tmp.next());
            }
            iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(sorter.iterator()) {

                @Override
                public void close() {
                    super.close();
                    sorter.cleanup();
                }
            }, ADD_MATE_CIGAR);
            logger.info("Sorting by queryname complete.");
        }
        // Deal with the various sorting complications
        final SAMFileHeader.SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER;
        logger.info("Output will be sorted by " + outputSortOrder);
        header.setSortOrder(outputSortOrder);
    }
    if (CREATE_INDEX && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new UserException("Can't CREATE_INDEX unless sort order is coordinate");
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, header.getSortOrder() == SAMFileHeader.SortOrder.queryname)) {
        logger.info("Traversing query name sorted records and fixing up mate pair information.");
        final ProgressLogger progress = new ProgressLogger(logger);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        iterator.close();
        if (header.getSortOrder() == SAMFileHeader.SortOrder.queryname) {
            logger.info("Closing output file.");
        } else {
            logger.info("Finished processing reads; re-sorting output file.");
        }
    }
    // TODO throw appropriate exceptions instead of writing to log.error and returning
    if (!differentOutputSpecified) {
        logger.info("Replacing input file with fixed file.");
        final File soleInput = INPUT.get(0).getAbsoluteFile();
        final File old = new File(soleInput.getParentFile(), soleInput.getName() + ".old");
        if (!old.exists() && soleInput.renameTo(old)) {
            if (OUTPUT.renameTo(soleInput)) {
                if (!old.delete()) {
                    logger.warn("Could not delete old file: " + old.getAbsolutePath());
                    return null;
                }
                if (CREATE_INDEX) {
                    final File newIndex = new File(OUTPUT.getParent(), OUTPUT.getName().substring(0, OUTPUT.getName().length() - 4) + ".bai");
                    final File oldIndex = new File(soleInput.getParent(), soleInput.getName().substring(0, soleInput.getName().length() - 4) + ".bai");
                    if (!newIndex.renameTo(oldIndex)) {
                        logger.warn("Could not overwrite index file: " + oldIndex.getAbsolutePath());
                    }
                }
            } else {
                logger.error("Could not move new file to " + soleInput.getAbsolutePath());
                logger.error("Input file preserved as: " + old.getAbsolutePath());
                logger.error("New file preserved as: " + OUTPUT.getAbsolutePath());
                return null;
            }
        } else {
            logger.error("Could not move input file out of the way: " + soleInput.getAbsolutePath());
            if (!OUTPUT.delete()) {
                logger.error("Could not delete temporary file: " + OUTPUT.getAbsolutePath());
            }
            return null;
        }
    }
    CloserUtil.close(readers);
    return null;
}
Also used : MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) ArrayList(java.util.ArrayList) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) PeekableIterator(htsjdk.samtools.util.PeekableIterator) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) UserException(org.broadinstitute.hellbender.exceptions.UserException) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMRecord(htsjdk.samtools.SAMRecord) SAMRecordQueryNameComparator(htsjdk.samtools.SAMRecordQueryNameComparator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 22 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class ReorderSam method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SAMSequenceDictionary refDict = reference.getSequenceDictionary();
    if (refDict == null) {
        CloserUtil.close(in);
        throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
    }
    printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
    printDictionary("Reference", refDict);
    Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
    // has to be after we create the newOrder
    SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
    outHeader.setSequenceDictionary(refDict);
    logger.info("Writing reads...");
    if (in.hasIndex()) {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
            // write the reads in contig order
            for (final SAMSequenceRecord contig : refDict.getSequences()) {
                final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
                writeReads(out, it, newOrder, contig.getSequenceName());
            }
            // don't forget the unmapped reads
            writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
        }
    } else {
        try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
            writeReads(out, in.iterator(), newOrder, "All reads");
        }
    }
    // cleanup
    CloserUtil.close(in);
    return null;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 23 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class AbstractMarkDuplicatesCommandLineProgramTest method testStackOverFlowPairSetSwap.

@Test
public void testStackOverFlowPairSetSwap() {
    final AbstractMarkDuplicatesTester tester = getTester();
    File input = new File(TEST_DATA_DIR, "markDuplicatesWithMateCigar.pairSet.swap.sam");
    SamReader reader = SamReaderFactory.makeDefault().open(input);
    tester.setHeader(reader.getFileHeader());
    for (final SAMRecord record : reader) {
        tester.addRecord(record);
    }
    CloserUtil.close(reader);
    tester.setExpectedOpticalDuplicate(1);
    tester.runTest();
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 24 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class AbstractMarkDuplicatesTester method test.

@Override
public void test() {
    try {
        updateExpectedDuplicationMetrics();
        // Read the output and check the duplicate flag
        int outputRecords = 0;
        final SamReader reader = SamReaderFactory.makeDefault().open(getOutput());
        for (final SAMRecord record : reader) {
            outputRecords++;
            final String key = samRecordToDuplicatesFlagsKey(record);
            if (!this.duplicateFlags.containsKey(key)) {
                System.err.println("DOES NOT CONTAIN KEY: " + key);
            }
            Assert.assertTrue(this.duplicateFlags.containsKey(key));
            final boolean value = this.duplicateFlags.get(key);
            this.duplicateFlags.remove(key);
            if (value != record.getDuplicateReadFlag()) {
                System.err.println("Mismatching read:");
                System.err.print(record.getSAMString());
            }
            Assert.assertEquals(record.getDuplicateReadFlag(), value);
        }
        CloserUtil.close(reader);
        // Ensure the program output the same number of records as were read in
        Assert.assertEquals(outputRecords, this.getNumberOfRecords(), ("saw " + outputRecords + " output records, vs. " + this.getNumberOfRecords() + " input records"));
        // Check the values written to metrics.txt against our input expectations
        final MetricsFile<DuplicationMetrics, Comparable<?>> metricsOutput = new MetricsFile<>();
        try {
            metricsOutput.read(new FileReader(metricsFile));
        } catch (final FileNotFoundException ex) {
            System.err.println("Metrics file not found: " + ex);
        }
        // NB: Test writes an initial metrics line with a null entry for LIBRARY and 0 values for all metrics. So we find the first non-null one
        final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().stream().filter(metric -> metric.LIBRARY != null).findFirst().get();
        Assert.assertEquals(observedMetrics.UNPAIRED_READS_EXAMINED, expectedMetrics.UNPAIRED_READS_EXAMINED, "UNPAIRED_READS_EXAMINED does not match expected");
        Assert.assertEquals(observedMetrics.READ_PAIRS_EXAMINED, expectedMetrics.READ_PAIRS_EXAMINED, "READ_PAIRS_EXAMINED does not match expected");
        Assert.assertEquals(observedMetrics.UNMAPPED_READS, expectedMetrics.UNMAPPED_READS, "UNMAPPED_READS does not match expected");
        Assert.assertEquals(observedMetrics.UNPAIRED_READ_DUPLICATES, expectedMetrics.UNPAIRED_READ_DUPLICATES, "UNPAIRED_READ_DUPLICATES does not match expected");
        Assert.assertEquals(observedMetrics.READ_PAIR_DUPLICATES, expectedMetrics.READ_PAIR_DUPLICATES, "READ_PAIR_DUPLICATES does not match expected");
        Assert.assertEquals(observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match expected");
        Assert.assertEquals(observedMetrics.PERCENT_DUPLICATION, expectedMetrics.PERCENT_DUPLICATION, "PERCENT_DUPLICATION does not match expected");
        // coded and needs to have real values for each field.
        if (observedMetrics.ESTIMATED_LIBRARY_SIZE == null) {
            observedMetrics.ESTIMATED_LIBRARY_SIZE = 0L;
        }
        if (expectedMetrics.ESTIMATED_LIBRARY_SIZE == null) {
            expectedMetrics.ESTIMATED_LIBRARY_SIZE = 0L;
        }
        Assert.assertEquals(observedMetrics.ESTIMATED_LIBRARY_SIZE, expectedMetrics.ESTIMATED_LIBRARY_SIZE, "ESTIMATED_LIBRARY_SIZE does not match expected");
    } finally {
        TestUtil.recursiveDelete(getOutputDir());
    }
}
Also used : MetricsFile(htsjdk.samtools.metrics.MetricsFile) SamReader(htsjdk.samtools.SamReader) DuplicationMetrics(org.broadinstitute.hellbender.utils.read.markduplicates.DuplicationMetrics) SAMRecord(htsjdk.samtools.SAMRecord) FileNotFoundException(java.io.FileNotFoundException) FileReader(java.io.FileReader)

Example 25 with SamReader

use of htsjdk.samtools.SamReader in project gatk by broadinstitute.

the class ArtificialBAMBuilderUnitTest method testBamProvider.

@Test(dataProvider = "ArtificialBAMBuilderUnitTestProvider")
public void testBamProvider(final ArtificialBAMBuilder bamBuilder, int readLength, int skips, int start, int nSamples, int nReadsPerLocus, int nLoci) throws IOException {
    Assert.assertEquals(bamBuilder.getReadLength(), readLength);
    Assert.assertEquals(bamBuilder.getSkipNLoci(), skips);
    Assert.assertEquals(bamBuilder.getAlignmentStart(), start);
    Assert.assertEquals(bamBuilder.getAlignmentEnd(), start + nLoci * (skips + 1) + readLength);
    Assert.assertEquals(bamBuilder.getNSamples(), nSamples);
    Assert.assertEquals(bamBuilder.getnReadsPerLocus(), nReadsPerLocus);
    Assert.assertEquals(bamBuilder.getnLoci(), nLoci);
    Assert.assertEquals(bamBuilder.getSamples().size(), bamBuilder.getNSamples());
    Assert.assertNull(bamBuilder.getReference());
    final List<GATKRead> reads = bamBuilder.makeReads();
    Assert.assertEquals(reads.size(), bamBuilder.expectedNumberOfReads());
    for (final GATKRead read : reads) {
        assertGoodRead(read.convertToSAMRecord(bamBuilder.getHeader()), bamBuilder);
    }
    final File bam = bamBuilder.makeTemporaryBAMFile();
    final SamReader reader = SamReaderFactory.makeDefault().open(bam);
    Assert.assertTrue(reader.hasIndex());
    final Iterator<SAMRecord> bamIt = reader.iterator();
    int nReadsFromBam = 0;
    int lastStart = -1;
    while (bamIt.hasNext()) {
        final SAMRecord read = bamIt.next();
        assertGoodRead(read, bamBuilder);
        nReadsFromBam++;
        Assert.assertTrue(read.getAlignmentStart() >= lastStart);
        lastStart = read.getAlignmentStart();
    }
    Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads());
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26