Search in sources :

Example 41 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gatk by broadinstitute.

the class MergeSamFiles method doWork.

/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
    boolean matchedSortOrders = true;
    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        // Used to try and reduce redundant SDs in memory
        SAMSequenceDictionary dict = null;
        for (final File inFile : INPUT) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            readers.add(in);
            headers.add(in.getFileHeader());
            // replace the duplicate copies with a single dictionary to reduce the memory footprint.
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }
            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }
    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;
    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
        logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        logger.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
        // Lastly loop through and write out the records
        final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        logger.info("Finished reading inputs.");
        CloserUtil.close(readers);
    }
    return null;
}
Also used : SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 42 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gatk by broadinstitute.

the class CollectRnaSeqMetricsTest method basic.

@Test
public void basic() throws Exception {
    final String sequence = "chr1";
    final String ignoredSequence = "chrM";
    // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
    // Set seed so that strandedness is consistent among runs.
    builder.setRandomSeed(0);
    final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);
    builder.addPair("pair1", sequenceIndex, 45, 475);
    builder.addPair("pair2", sequenceIndex, 90, 225);
    builder.addPair("pair3", sequenceIndex, 120, 600);
    builder.addFrag("frag1", sequenceIndex, 150, true);
    builder.addFrag("frag2", sequenceIndex, 450, true);
    builder.addFrag("frag3", sequenceIndex, 225, false);
    builder.addPair("rrnaPair", sequenceIndex, 400, 500);
    builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);
    final File samFile = BaseTest.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
    try (final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) {
        for (final SAMRecord rec : builder.getRecords()) samWriter.addAlignment(rec);
    }
    // Create an interval list with one ribosomal interval.
    final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
    final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
    rRnaIntervalList.add(rRnaInterval);
    final File rRnaIntervalsFile = BaseTest.createTempFile("tmp.rRna.", ".interval_list");
    rRnaIntervalList.write(rRnaIntervalsFile);
    // Generate the metrics.
    final File metricsFile = BaseTest.createTempFile("tmp.", ".rna_metrics");
    final String[] args = new String[] { "--input", samFile.getAbsolutePath(), "--output", metricsFile.getAbsolutePath(), "--REF_FLAT", getRefFlatFile(sequence).getAbsolutePath(), "--RIBOSOMAL_INTERVALS", rRnaIntervalsFile.getAbsolutePath(), "--STRAND_SPECIFICITY", "SECOND_READ_TRANSCRIPTION_STRAND", "--IGNORE_SEQUENCE", ignoredSequence };
    runCommandLine(args);
    final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>();
    output.read(new FileReader(metricsFile));
    final RnaSeqMetrics metrics = output.getMetrics().get(0);
    Assert.assertEquals(metrics.PF_ALIGNED_BASES, 396);
    Assert.assertEquals(metrics.PF_BASES, 432);
    Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 108L);
    Assert.assertEquals(metrics.CODING_BASES, 136);
    Assert.assertEquals(metrics.UTR_BASES, 51);
    Assert.assertEquals(metrics.INTRONIC_BASES, 50);
    Assert.assertEquals(metrics.INTERGENIC_BASES, 51);
    Assert.assertEquals(metrics.CORRECT_STRAND_READS, 3);
    Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 4);
    Assert.assertEquals(metrics.IGNORED_READS, 1);
}
Also used : MetricsFile(htsjdk.samtools.metrics.MetricsFile) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMRecordSetBuilder(htsjdk.samtools.SAMRecordSetBuilder) IntervalList(htsjdk.samtools.util.IntervalList) SAMRecord(htsjdk.samtools.SAMRecord) FileReader(java.io.FileReader) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) Interval(htsjdk.samtools.util.Interval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 43 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project gatk by broadinstitute.

the class AssemblyBasedCallerUtils method assembleReads.

/**
     * High-level function that runs the assembler on the given region's reads,
     * returning a data structure with the resulting information needed
     * for further HC steps
     */
public static AssemblyResultSet assembleReads(final AssemblyRegion region, final List<VariantContext> givenAlleles, final AssemblyBasedCallerArgumentCollection argumentCollection, final SAMFileHeader header, final SampleList sampleList, final Logger logger, final ReferenceSequenceFile referenceReader, final ReadThreadingAssembler assemblyEngine) {
    finalizeRegion(region, argumentCollection.errorCorrectReads, argumentCollection.dontUseSoftClippedBases, (byte) (argumentCollection.minBaseQualityScore - 1), header, sampleList);
    if (argumentCollection.debug) {
        logger.info("Assembling " + region.getSpan() + " with " + region.size() + " reads:    (with overlap region = " + region.getExtendedSpan() + ")");
    }
    final byte[] fullReferenceWithPadding = region.getAssemblyRegionReference(referenceReader, REFERENCE_PADDING_FOR_ASSEMBLY);
    final SimpleInterval paddedReferenceLoc = getPaddedReferenceLoc(region, REFERENCE_PADDING_FOR_ASSEMBLY, referenceReader);
    final Haplotype referenceHaplotype = createReferenceHaplotype(region, paddedReferenceLoc, referenceReader);
    final ReadErrorCorrector readErrorCorrector = argumentCollection.errorCorrectReads ? new ReadErrorCorrector(argumentCollection.assemblerArgs.kmerLengthForReadErrorCorrection, HaplotypeCallerEngine.MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION, argumentCollection.assemblerArgs.minObservationsForKmerToBeSolid, argumentCollection.debug, fullReferenceWithPadding) : null;
    try {
        final AssemblyResultSet assemblyResultSet = assemblyEngine.runLocalAssembly(region, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, givenAlleles, readErrorCorrector, header);
        assemblyResultSet.debugDump(logger);
        return assemblyResultSet;
    } catch (final Exception e) {
        // Capture any exception that might be thrown, and write out the assembly failure BAM if requested
        if (argumentCollection.captureAssemblyFailureBAM) {
            try (final SAMFileWriter writer = ReadUtils.createCommonSAMWriter(new File("assemblyFailure.bam"), null, header, false, false, false)) {
                for (final GATKRead read : region.getReads()) {
                    writer.addAlignment(read.convertToSAMRecord(header));
                }
            }
        }
        throw e;
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Haplotype(org.broadinstitute.hellbender.utils.haplotype.Haplotype) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) CachingIndexedFastaSequenceFile(org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 44 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project hmftools by hartwigmedical.

the class BamSlicerApplication method sliceFromVCF.

private static void sliceFromVCF(@NotNull final CommandLine cmd) throws IOException {
    final String inputPath = cmd.getOptionValue(INPUT);
    final String vcfPath = cmd.getOptionValue(VCF);
    final int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500"));
    final SamReader reader = SamReaderFactory.makeDefault().open(new File(inputPath));
    final QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity);
    final CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals);
    final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
    writeToSlice(writer, iterator);
    writer.close();
    reader.close();
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) QueryInterval(htsjdk.samtools.QueryInterval) File(java.io.File)

Example 45 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project polyGembler by c-zhou.

the class SAMtools method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(mySamFile));
    samHeader = inputSam.getFileHeader();
    samHeader.setSortOrder(SortOrder.unsorted);
    SAMRecordIterator iter = inputSam.iterator();
    Set<Entry<String, String>> attr = samHeader.getAttributes();
    List<SAMReadGroupRecord> rgs = samHeader.getReadGroups();
    SAMReadGroupRecord rg = new SAMReadGroupRecord("cz1");
    rg.setSample("cz1");
    samHeader.addReadGroup(rg);
    // samHeader.setAttribute("RG", "cz1");
    final SAMFileWriter outSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, true, new File(myOutput));
    for (int i = 0; i < 100; i++) {
        SAMRecord record = iter.next();
        List<SAMTagAndValue> tags = record.getAttributes();
        record.setAttribute("RG", "cz1");
        List<SAMTagAndValue> tags2 = record.getAttributes();
        outSam.addAlignment(record);
    }
    myLogger.info("exit...");
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    outSam.close();
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Entry(java.util.Map.Entry) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8