Search in sources :

Example 56 with SamReader

use of htsjdk.samtools.SamReader in project gridss by PapenfussLab.

the class BamToBed method extract.

public static void extract(File input, File splitReads, File spanningReads, final double minLengthPortion, final int minMapq, final int minIndelSize, final int maxMappedBases, final int minIndelComponentSize) throws IOException {
    SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
    SamReader reader = factory.open(input);
    BufferedWriter srwriter = null;
    if (splitReads != null) {
        srwriter = new BufferedWriter(new FileWriter(splitReads));
    }
    BufferedWriter spwriter = null;
    if (spanningReads != null) {
        spwriter = new BufferedWriter(new FileWriter(spanningReads));
    }
    SAMRecordIterator it = reader.iterator();
    while (it.hasNext()) {
        final SAMRecord r = it.next();
        if (r.isSecondaryOrSupplementary())
            continue;
        if (splitReads != null) {
            for (SimpleBEDFeature f : ChimericAlignment.getChimericAlignments(r).stream().map(ca -> getSplitReadDeletion(r, ca, minLengthPortion, minMapq, minIndelSize)).filter(f -> f != null).collect(Collectors.toList())) {
                writeBed(srwriter, f);
            }
        }
        if (spanningReads != null) {
            for (SimpleBEDFeature f : getSpanningDeletion(r, minMapq, minIndelSize, maxMappedBases, minIndelComponentSize)) {
                writeBed(spwriter, f);
            }
        }
    }
    CloserUtil.close(srwriter);
    CloserUtil.close(spwriter);
}
Also used : CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) BufferedWriter(java.io.BufferedWriter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Argument(org.broadinstitute.barclay.argparser.Argument) CigarElement(htsjdk.samtools.CigarElement) FileWriter(java.io.FileWriter) CigarOperator(htsjdk.samtools.CigarOperator) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) ValidationStringency(htsjdk.samtools.ValidationStringency) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) StandardOptionDefinitions(picard.cmdline.StandardOptionDefinitions) ArrayList(java.util.ArrayList) SimpleBEDFeature(htsjdk.tribble.bed.SimpleBEDFeature) CommandLineProgram(picard.cmdline.CommandLineProgram) List(java.util.List) Writer(java.io.Writer) CigarUtil(au.edu.wehi.idsv.sam.CigarUtil) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ChimericAlignment(au.edu.wehi.idsv.sam.ChimericAlignment) CloserUtil(htsjdk.samtools.util.CloserUtil) SamReader(htsjdk.samtools.SamReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SimpleBEDFeature(htsjdk.tribble.bed.SimpleBEDFeature) FileWriter(java.io.FileWriter) SAMRecord(htsjdk.samtools.SAMRecord) BufferedWriter(java.io.BufferedWriter)

Example 57 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamToSVG method doWork.

@Override
public int doWork(List<String> args) {
    /* parse interval */
    if (this.intervalStr == null) {
        LOG.error("bed.interval0.undefined");
        return -1;
    }
    int colon = this.intervalStr.indexOf(':');
    int hyphen = this.intervalStr.indexOf('-', colon + 1);
    if (colon < 1 || hyphen <= colon || hyphen + 1 == intervalStr.length()) {
        LOG.error("Bad interval " + this.intervalStr);
        return -1;
    }
    this.interval = new Interval();
    this.interval.chrom = this.intervalStr.substring(0, colon);
    this.interval.start = Integer.parseInt(this.intervalStr.substring(colon + 1, hyphen)) + 1;
    this.interval.end = Integer.parseInt(this.intervalStr.substring(hyphen + 1));
    this.drawinAreaWidth = Math.max(100, this.drawinAreaWidth);
    SamReader in = null;
    SAMRecordIterator iter = null;
    SamReaderFactory sfrf = SamReaderFactory.makeDefault();
    sfrf.validationStringency(ValidationStringency.SILENT);
    XMLStreamWriter w = null;
    FileOutputStream fout = null;
    try {
        /* get genomic sequence */
        if (this.referenceFile != null) {
            LOG.info("opening " + this.referenceFile);
            this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFile);
            this.genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, this.interval.chrom);
        }
        for (String vcf : this.vcfFileSet) {
            readVariantFile(vcf);
        }
        /* read SAM data */
        if (args.isEmpty()) {
            LOG.info("Reading from stdin");
            in = sfrf.open(SamInputResource.of(stdin()));
            iter = in.iterator();
            readBamStream(iter);
            iter.close();
            in.close();
        } else {
            for (String arg : args) {
                File filename = new File(arg);
                LOG.info("Reading from " + filename);
                in = sfrf.open(SamInputResource.of(filename));
                if (in.hasIndex()) {
                    iter = in.query(this.interval.getChrom(), this.interval.getStart(), this.interval.getEnd(), false);
                } else {
                    LOG.info("Bam file not indexed !! " + filename);
                    iter = in.iterator();
                }
                readBamStream(iter);
                iter.close();
                in.close();
            }
        }
        this.featureWidth = this.drawinAreaWidth / (double) ((this.interval.end - this.interval.start) + 1);
        this.featureHeight = Math.min(Math.max(5.0, this.featureWidth), 30);
        this.HEIGHT_RULER = (int) (this.niceIntFormat.format(this.interval.end).length() * this.featureHeight + 5);
        LOG.info("Feature height:" + this.featureHeight);
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        if (this.outputFile == null) {
            w = xof.createXMLStreamWriter(stdout(), "UTF-8");
        } else {
            fout = new FileOutputStream(this.outputFile);
            w = xof.createXMLStreamWriter(fout, "UTF-8");
        }
        w.writeStartDocument("UTF-8", "1.0");
        printDocument(w, intervalStr);
        w.writeEndDocument();
        w.flush();
        w.close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(fout);
        CloserUtil.close(in);
        CloserUtil.close(this.indexedFastaSequenceFile);
        this.indexedFastaSequenceFile = null;
        this.interval = null;
    }
}
Also used : XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) FileOutputStream(java.io.FileOutputStream) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 58 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class Bam2Wig method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.win_shift <= 0) {
        LOG.error("window shift<=0");
        return -1;
    }
    if (this.window_span <= 0) {
        LOG.error("window size<=0");
        return -1;
    }
    final Interval interval;
    PrintWriter pw = null;
    CloseableIterator<SAMRecord> samRecordIterator = null;
    final List<SamReader> samReaders = new ArrayList<>();
    final List<CloseableIterator<SAMRecord>> merginIterators = new ArrayList<>();
    try {
        final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(htsjdk.samtools.ValidationStringency.LENIENT);
        if (args.isEmpty()) {
            if (!StringUtil.isBlank(region_str)) {
                LOG.error("cannot specify region for stdin");
                return -1;
            }
            interval = null;
            samReaders.add(srf.open(SamInputResource.of(stdin())));
            samRecordIterator = samReaders.get(0).iterator();
        } else if (args.size() == 1 && !args.get(0).endsWith(".list")) {
            samReaders.add(srf.open(SamInputResource.of(new File(args.get(0)))));
            if (StringUtil.isBlank(this.region_str)) {
                samRecordIterator = samReaders.get(0).iterator();
                interval = null;
            } else {
                interval = new IntervalParser(samReaders.get(0).getFileHeader().getSequenceDictionary()).setContigNameIsWholeContig(true).parse(region_str);
                if (interval == null) {
                    LOG.error("Cannot parse interval " + this.region_str);
                    return -1;
                }
                LOG.debug("interval " + interval);
                samRecordIterator = samReaders.get(0).query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
            }
        } else {
            final List<File> samFiles;
            if (args.size() == 1 && args.get(0).endsWith(".list")) {
                samFiles = IOUtils.unrollFile(new File(args.get(0)));
            } else {
                samFiles = args.stream().map(S -> new File(S)).collect(Collectors.toList());
            }
            if (samFiles.isEmpty()) {
                LOG.error("No Input SAM file");
                return -1;
            }
            final SAMSequenceDictionary dict0 = SAMSequenceDictionaryExtractor.extractDictionary(samFiles.get(0));
            if (dict0 == null)
                throw new JvarkitException.DictionaryMissing(samFiles.get(0).getPath());
            samFiles.stream().forEach(F -> {
                final SAMSequenceDictionary dicti = SAMSequenceDictionaryExtractor.extractDictionary(F);
                if (dicti == null)
                    throw new JvarkitException.DictionaryMissing(F.getPath());
                if (!SequenceUtil.areSequenceDictionariesEqual(dicti, dict0)) {
                    throw new JvarkitException.DictionariesAreNotTheSame(dict0, dicti);
                }
            });
            for (final File bamFile : samFiles) {
                LOG.info("opening " + bamFile);
                samReaders.add(srf.open(bamFile));
            }
            final SamFileHeaderMerger mergedheader = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, samReaders.stream().map(SR -> SR.getFileHeader()).collect(Collectors.toList()), false);
            final Map<SamReader, CloseableIterator<SAMRecord>> reader2iter = new HashMap<>();
            if (StringUtil.isBlank(this.region_str)) {
                for (final SamReader sr : samReaders) {
                    reader2iter.put(sr, sr.iterator());
                }
                interval = null;
            } else {
                interval = new IntervalParser(dict0).setContigNameIsWholeContig(true).parse(region_str);
                if (interval == null) {
                    LOG.error("Cannot parse interval " + this.region_str);
                    return -1;
                }
                LOG.info("interval :" + interval);
                for (final SamReader sr : samReaders) {
                    reader2iter.put(sr, sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false));
                }
            }
            merginIterators.addAll(reader2iter.values());
            samRecordIterator = new MergingSamRecordIterator(mergedheader, reader2iter, true);
        }
        for (final SamReader sr : samReaders) {
            if (sr.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
                LOG.error("one of your bam input is not sorted on coordinate");
                return -1;
            }
        }
        pw = openFileOrStdoutAsPrintWriter(this.outputFile);
        run(pw, samRecordIterator, samReaders.get(0).getFileHeader().getSequenceDictionary(), interval);
        samRecordIterator.close();
        samRecordIterator = null;
        CloserUtil.close(samReaders);
        samReaders.clear();
        pw.flush();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(merginIterators);
        CloserUtil.close(samRecordIterator);
        CloserUtil.close(samReaders);
        CloserUtil.close(pw);
        pw = null;
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) SAMSequenceDictionaryExtractor(htsjdk.variant.utils.SAMSequenceDictionaryExtractor) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Counter(com.github.lindenb.jvarkit.util.Counter) Predicate(java.util.function.Predicate) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) SamFilterParser(com.github.lindenb.jvarkit.util.bio.samfilter.SamFilterParser) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) Objects(java.util.Objects) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) CloseableIterator(htsjdk.samtools.util.CloseableIterator) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SamReaderFactory(htsjdk.samtools.SamReaderFactory) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Interval(htsjdk.samtools.util.Interval) PrintWriter(java.io.PrintWriter)

Example 59 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamIndexReadNames method indexBamFile.

private void indexBamFile(File bamFile) throws IOException {
    NameIndexDef indexDef = new NameIndexDef();
    SortingCollection<NameAndPos> sorting = null;
    LOG.info("Opening " + bamFile);
    SamReader sfr = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
    sorting = SortingCollection.newInstance(NameAndPos.class, new NameAndPosCodec(), new NameAndPosComparator(), maxRecordsInRAM, bamFile.getParentFile().toPath());
    sorting.setDestructiveIteration(true);
    if (sfr.getFileHeader().getSortOrder() != SortOrder.coordinate) {
        throw new IOException("not SortOrder.coordinate " + sfr.getFileHeader().getSortOrder());
    }
    SAMRecordIterator iter = sfr.iterator();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary());
    while (iter.hasNext()) {
        SAMRecord rec = iter.next();
        progress.watch(rec);
        NameAndPos nap = new NameAndPos();
        nap.name = rec.getReadName();
        indexDef.maxNameLengt = Math.max(nap.name.length() + 1, indexDef.maxNameLengt);
        nap.tid = rec.getReferenceIndex();
        nap.pos = rec.getAlignmentStart();
        indexDef.countReads++;
        sorting.add(nap);
    }
    progress.finish();
    iter.close();
    sfr.close();
    sorting.doneAdding();
    LOG.info("Done Adding. N=" + indexDef.countReads);
    File indexFile = new File(bamFile.getParentFile(), bamFile.getName() + NAME_IDX_EXTENSION);
    LOG.info("Writing index " + indexFile);
    FileOutputStream raf = new FileOutputStream(indexFile);
    ByteBuffer byteBuff = ByteBuffer.allocate(8 + 4);
    byteBuff.putLong(indexDef.countReads);
    byteBuff.putInt(indexDef.maxNameLengt);
    raf.write(byteBuff.array());
    byteBuff = ByteBuffer.allocate(indexDef.maxNameLengt + 4 + 4);
    CloseableIterator<NameAndPos> iter2 = sorting.iterator();
    while (iter2.hasNext()) {
        byteBuff.rewind();
        NameAndPos nap = iter2.next();
        for (int i = 0; i < nap.name.length(); ++i) {
            byteBuff.put((byte) nap.name.charAt(i));
        }
        for (int i = nap.name.length(); i < indexDef.maxNameLengt; ++i) {
            byteBuff.put((byte) '\0');
        }
        byteBuff.putInt(nap.tid);
        byteBuff.putInt(nap.pos);
        raf.write(byteBuff.array());
    }
    raf.flush();
    raf.close();
    sorting.cleanup();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 60 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class BamStats01 method doWork.

@Override
public int doWork(final List<String> inputs) {
    final List<String> args = new ArrayList<>(IOUtils.unrollFiles(inputs));
    try {
        this.out = super.openFileOrStdoutAsPrintStream(this.outputFile);
        out.print("#Filename\tSample");
        for (Category2 cat2 : Category2.values()) {
            for (Category cat1 : Category.values()) {
                // je je suis libertineuuh, je suis une cat1
                out.print("\t" + cat2 + "_" + cat1);
            }
            if (bedFile == null)
                break;
        }
        out.println();
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (args.isEmpty()) {
            final SamReader r = srf.open(SamInputResource.of(stdin()));
            run("stdin", r);
            r.close();
        } else {
            for (final String filename : args) {
                LOG.info("Reading from " + filename);
                final SamReader sfr = srf.open(new File(filename));
                run(filename, sfr);
                sfr.close();
            }
        }
        out.flush();
        this.out.flush();
        this.out.close();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ArrayList(java.util.ArrayList) File(java.io.File) IOException(java.io.IOException)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26