Search in sources :

Example 21 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project gatk by broadinstitute.

the class MergeSamFiles method doWork.

/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
    boolean matchedSortOrders = true;
    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        // Used to try and reduce redundant SDs in memory
        SAMSequenceDictionary dict = null;
        for (final File inFile : INPUT) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            readers.add(in);
            headers.add(in.getFileHeader());
            // replace the duplicate copies with a single dictionary to reduce the memory footprint.
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }
            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }
    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;
    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
        logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        logger.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
        // Lastly loop through and write out the records
        final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        logger.info("Finished reading inputs.");
        CloserUtil.close(readers);
    }
    return null;
}
Also used : SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 22 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project gatk by broadinstitute.

the class CollectRnaSeqMetricsTest method basic.

@Test
public void basic() throws Exception {
    final String sequence = "chr1";
    final String ignoredSequence = "chrM";
    // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
    // Set seed so that strandedness is consistent among runs.
    builder.setRandomSeed(0);
    final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);
    builder.addPair("pair1", sequenceIndex, 45, 475);
    builder.addPair("pair2", sequenceIndex, 90, 225);
    builder.addPair("pair3", sequenceIndex, 120, 600);
    builder.addFrag("frag1", sequenceIndex, 150, true);
    builder.addFrag("frag2", sequenceIndex, 450, true);
    builder.addFrag("frag3", sequenceIndex, 225, false);
    builder.addPair("rrnaPair", sequenceIndex, 400, 500);
    builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);
    final File samFile = BaseTest.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
    try (final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) {
        for (final SAMRecord rec : builder.getRecords()) samWriter.addAlignment(rec);
    }
    // Create an interval list with one ribosomal interval.
    final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
    final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
    rRnaIntervalList.add(rRnaInterval);
    final File rRnaIntervalsFile = BaseTest.createTempFile("tmp.rRna.", ".interval_list");
    rRnaIntervalList.write(rRnaIntervalsFile);
    // Generate the metrics.
    final File metricsFile = BaseTest.createTempFile("tmp.", ".rna_metrics");
    final String[] args = new String[] { "--input", samFile.getAbsolutePath(), "--output", metricsFile.getAbsolutePath(), "--REF_FLAT", getRefFlatFile(sequence).getAbsolutePath(), "--RIBOSOMAL_INTERVALS", rRnaIntervalsFile.getAbsolutePath(), "--STRAND_SPECIFICITY", "SECOND_READ_TRANSCRIPTION_STRAND", "--IGNORE_SEQUENCE", ignoredSequence };
    runCommandLine(args);
    final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>();
    output.read(new FileReader(metricsFile));
    final RnaSeqMetrics metrics = output.getMetrics().get(0);
    Assert.assertEquals(metrics.PF_ALIGNED_BASES, 396);
    Assert.assertEquals(metrics.PF_BASES, 432);
    Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 108L);
    Assert.assertEquals(metrics.CODING_BASES, 136);
    Assert.assertEquals(metrics.UTR_BASES, 51);
    Assert.assertEquals(metrics.INTRONIC_BASES, 50);
    Assert.assertEquals(metrics.INTERGENIC_BASES, 51);
    Assert.assertEquals(metrics.CORRECT_STRAND_READS, 3);
    Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 4);
    Assert.assertEquals(metrics.IGNORED_READS, 1);
}
Also used : MetricsFile(htsjdk.samtools.metrics.MetricsFile) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMRecordSetBuilder(htsjdk.samtools.SAMRecordSetBuilder) IntervalList(htsjdk.samtools.util.IntervalList) SAMRecord(htsjdk.samtools.SAMRecord) FileReader(java.io.FileReader) MetricsFile(htsjdk.samtools.metrics.MetricsFile) File(java.io.File) Interval(htsjdk.samtools.util.Interval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Example 23 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project hmftools by hartwigmedical.

the class BamSlicerApplication method sliceFromVCF.

private static void sliceFromVCF(@NotNull final CommandLine cmd) throws IOException {
    final String inputPath = cmd.getOptionValue(INPUT);
    final String vcfPath = cmd.getOptionValue(VCF);
    final int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500"));
    final SamReader reader = SamReaderFactory.makeDefault().open(new File(inputPath));
    final QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity);
    final CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals);
    final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));
    writeToSlice(writer, iterator);
    writer.close();
    reader.close();
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) QueryInterval(htsjdk.samtools.QueryInterval) File(java.io.File)

Example 24 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project polyGembler by c-zhou.

the class SAMtools method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
    final SamReader inputSam = factory.open(new File(mySamFile));
    samHeader = inputSam.getFileHeader();
    samHeader.setSortOrder(SortOrder.unsorted);
    SAMRecordIterator iter = inputSam.iterator();
    Set<Entry<String, String>> attr = samHeader.getAttributes();
    List<SAMReadGroupRecord> rgs = samHeader.getReadGroups();
    SAMReadGroupRecord rg = new SAMReadGroupRecord("cz1");
    rg.setSample("cz1");
    samHeader.addReadGroup(rg);
    // samHeader.setAttribute("RG", "cz1");
    final SAMFileWriter outSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, true, new File(myOutput));
    for (int i = 0; i < 100; i++) {
        SAMRecord record = iter.next();
        List<SAMTagAndValue> tags = record.getAttributes();
        record.setAttribute("RG", "cz1");
        List<SAMTagAndValue> tags2 = record.getAttributes();
        outSam.addAlignment(record);
    }
    myLogger.info("exit...");
    try {
        inputSam.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    outSam.close();
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Entry(java.util.Map.Entry) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue)

Example 25 with SAMFileWriterFactory

use of htsjdk.samtools.SAMFileWriterFactory in project polyGembler by c-zhou.

the class SamFileSplit method run.

@Override
public void run() {
    // TODO Auto-generated method stub
    Utils.makeOutputDir(bam_out);
    final File[] beds = new File(bed_in).listFiles();
    final String[] out_prefix = new String[beds.length];
    for (int i = 0; i < beds.length; i++) {
        out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
        Utils.makeOutputDir(out_prefix[i]);
    }
    final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".bam");
        }
    });
    this.initial_thread_pool();
    for (File bam : bams) {
        executor.submit(new Runnable() {

            private File bam;

            @Override
            public void run() {
                // TODO Auto-generated method stub
                try {
                    final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
                    final SamReader inputSam = factory.open(bam);
                    final SAMFileHeader header = inputSam.getFileHeader();
                    final SAMRecordIterator iter = inputSam.iterator();
                    final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
                    final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
                    final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
                    final Map<String, Integer> outMap = new HashMap<String, Integer>();
                    final String out = bam.getName();
                    for (int i = 0; i < beds.length; i++) {
                        Set<String> bed_seq = new HashSet<String>();
                        String tmp;
                        BufferedReader br = new BufferedReader(new FileReader(beds[i]));
                        String line;
                        while ((line = br.readLine()) != null) {
                            tmp = line.split("\\s+")[0];
                            bed_seq.add(tmp);
                            outMap.put(tmp, i);
                        }
                        br.close();
                        final SAMFileHeader header_i = new SAMFileHeader();
                        final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
                        header_i.setAttribute("VN", header.getAttribute("VN"));
                        header_i.setAttribute("SO", header.getAttribute("SO"));
                        List<SAMSequenceRecord> seqs = seqdic.getSequences();
                        for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
                            seqdic_i.addSequence(seq);
                        header_i.setSequenceDictionary(seqdic_i);
                        for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
                        for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
                        outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
                        seqdics[i] = seqdic_i;
                    }
                    Set<String> refs = outMap.keySet();
                    String ref;
                    int f;
                    while (iter.hasNext()) {
                        SAMRecord rec = iter.next();
                        if (refs.contains(ref = rec.getReferenceName())) {
                            f = outMap.get(ref);
                            rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
                            outputSam[f].addAlignment(rec);
                        }
                    }
                    iter.close();
                    inputSam.close();
                    for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
                    myLogger.info(out + " return true");
                } catch (Exception e) {
                    Thread t = Thread.currentThread();
                    t.getUncaughtExceptionHandler().uncaughtException(t, e);
                    e.printStackTrace();
                    executor.shutdown();
                    System.exit(1);
                }
            }

            public Runnable init(File bam) {
                this.bam = bam;
                return (this);
            }
        }.init(bam));
    }
    this.waitFor();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) FilenameFilter(java.io.FilenameFilter) SamReader(htsjdk.samtools.SamReader) FileReader(java.io.FileReader) HashSet(java.util.HashSet) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) IOException(java.io.IOException) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Aggregations

SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)33 SAMFileWriter (htsjdk.samtools.SAMFileWriter)26 File (java.io.File)26 SAMRecord (htsjdk.samtools.SAMRecord)22 SAMFileHeader (htsjdk.samtools.SAMFileHeader)20 SamReader (htsjdk.samtools.SamReader)17 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)14 IOException (java.io.IOException)14 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)8 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)7 ArrayList (java.util.ArrayList)7 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)5 Test (org.testng.annotations.Test)5 Interval (htsjdk.samtools.util.Interval)4 BufferedReader (java.io.BufferedReader)4 List (java.util.List)4 BAMIndex (htsjdk.samtools.BAMIndex)3 DefaultSAMRecordFactory (htsjdk.samtools.DefaultSAMRecordFactory)3 SAMProgramRecord (htsjdk.samtools.SAMProgramRecord)3