Search in sources :

Example 36 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class MergeSamFiles method doWork.

/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
    boolean matchedSortOrders = true;
    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        // Used to try and reduce redundant SDs in memory
        SAMSequenceDictionary dict = null;
        for (final File inFile : INPUT) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            readers.add(in);
            headers.add(in.getFileHeader());
            // replace the duplicate copies with a single dictionary to reduce the memory footprint.
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }
            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }
    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;
    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
        logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        logger.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
        // Lastly loop through and write out the records
        final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        logger.info("Finished reading inputs.");
        CloserUtil.close(readers);
    }
    return null;
}
Also used : SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 37 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class ReorderSam method writeReads.

/**
     * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way
     * according to the newOrder mapping from dictionary index -> index.  Name is used for printing only.
     */
private void writeReads(final SAMFileWriter out, final SAMRecordIterator it, final Map<Integer, Integer> newOrder, final String name) {
    long counter = 0;
    logger.info("  Processing " + name);
    while (it.hasNext()) {
        counter++;
        final SAMRecord read = it.next();
        final int oldRefIndex = read.getReferenceIndex();
        final int oldMateIndex = read.getMateReferenceIndex();
        final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);
        read.setHeaderStrict(out.getFileHeader());
        read.setReferenceIndex(newRefIndex);
        final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
        if (oldMateIndex != -1 && newMateIndex == -1) {
            // becoming unmapped
            read.setMateAlignmentStart(0);
            read.setMateUnmappedFlag(true);
            // Set the Mate Cigar String to null
            read.setAttribute(SAMTag.MC.name(), null);
        }
        read.setMateReferenceIndex(newMateIndex);
        out.addAlignment(read);
    }
    it.close();
    logger.info("Wrote " + counter + " reads");
}
Also used : SAMRecord(htsjdk.samtools.SAMRecord)

Example 38 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class RevertSam method fetchByReadName.

/**
     * Generates a list by consuming from the iterator in order starting with the first available
     * read and continuing while subsequent reads share the same read name. If there are no reads
     * remaining returns an empty list.
     */
private List<SAMRecord> fetchByReadName(final PeekableIterator<SAMRecord> iterator) {
    final List<SAMRecord> out = new LinkedList<>();
    if (iterator.hasNext()) {
        final SAMRecord first = iterator.next();
        out.add(first);
        while (iterator.hasNext() && iterator.peek().getReadName().equals(first.getReadName())) {
            out.add(iterator.next());
        }
    }
    return out;
}
Also used : SAMRecord(htsjdk.samtools.SAMRecord) LinkedList(java.util.LinkedList)

Example 39 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class HaplotypeBAMWriter method writeHaplotype.

/**
     * Write out a representation of this haplotype as a read
     *
     * @param haplotype a haplotype to write out, must not be null
     * @param paddedRefLoc the reference location, must not be null
     * @param isAmongBestHaplotypes true if among the best haplotypes, false if it was just one possible haplotype
     */
private void writeHaplotype(final Haplotype haplotype, final Locatable paddedRefLoc, final boolean isAmongBestHaplotypes) {
    Utils.nonNull(haplotype, "haplotype cannot be null");
    Utils.nonNull(paddedRefLoc, "paddedRefLoc cannot be null");
    final SAMRecord record = new SAMRecord(output.getBAMOutputHeader());
    record.setReadBases(haplotype.getBases());
    record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef());
    // Use a base quality value "!" for it's display value (quality value is not meaningful)
    record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length));
    record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar()));
    record.setMappingQuality(isAmongBestHaplotypes ? bestHaplotypeMQ : otherMQ);
    record.setReadName(output.getHaplotypeSampleTag() + uniqueNameCounter++);
    record.setAttribute(output.getHaplotypeSampleTag(), haplotype.hashCode());
    record.setReadUnmappedFlag(false);
    record.setReferenceIndex(output.getBAMOutputHeader().getSequenceIndex(paddedRefLoc.getContig()));
    record.setAttribute(SAMTag.RG.toString(), output.getHaplotypeReadGroupID());
    record.setFlags(SAMFlag.READ_REVERSE_STRAND.intValue());
    output.add(new SAMRecordToGATKReadAdapter(record));
}
Also used : SAMRecord(htsjdk.samtools.SAMRecord)

Example 40 with SAMRecord

use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.

the class FilterReadsIntegrationTest method getReadCounts.

private int getReadCounts(final String resultFileName, final String referenceFileName) {
    final File path = new File(resultFileName);
    IOUtil.assertFileIsReadable(path);
    final File refFile = null == referenceFileName ? null : new File(TEST_DATA_DIR, referenceFileName);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(refFile).open(path);
    int count = 0;
    for (@SuppressWarnings("unused") final SAMRecord rec : in) {
        count++;
    }
    CloserUtil.close(in);
    return count;
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Aggregations

SAMRecord (htsjdk.samtools.SAMRecord)53 SamReader (htsjdk.samtools.SamReader)31 File (java.io.File)20 Test (org.testng.annotations.Test)15 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)12 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)11 UserException (org.broadinstitute.hellbender.exceptions.UserException)10 SAMFileHeader (htsjdk.samtools.SAMFileHeader)9 SAMFileWriter (htsjdk.samtools.SAMFileWriter)9 MetricsFile (htsjdk.samtools.metrics.MetricsFile)9 ArrayList (java.util.ArrayList)8 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)6 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)4 SamReaderFactory (htsjdk.samtools.SamReaderFactory)4 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)4 SAMRecordToGATKReadAdapter (org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter)4 BAMRecordCodec (htsjdk.samtools.BAMRecordCodec)3 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)3 SAMRecordQueryNameComparator (htsjdk.samtools.SAMRecordQueryNameComparator)3 Histogram (htsjdk.samtools.util.Histogram)3