Search in sources :

Example 96 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class NgsFilesScanner method readBam.

@Override
protected void readBam(final File f) {
    if (!f.canRead())
        return;
    SamReader r = null;
    try {
        StringWriter sw = new StringWriter();
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        XMLStreamWriter out = xof.createXMLStreamWriter(new StreamResult(sw));
        out.writeStartElement("bam");
        writeFile(out, f);
        r = super.openSamReader(f.getPath());
        final SAMFileHeader h = r.getFileHeader();
        out.writeStartElement("samples");
        if (h != null && h.getReadGroups() != null) {
            Set<String> seen = new HashSet<String>();
            for (SAMReadGroupRecord rg : h.getReadGroups()) {
                String sample = rg.getSample();
                if (sample == null || sample.isEmpty() || seen.contains(sample))
                    continue;
                seen.add(sample);
                out.writeStartElement("sample");
                out.writeCharacters(sample);
                out.writeEndElement();
            }
        }
        out.writeEndElement();
        out.writeEndElement();
        out.flush();
        out.close();
        sw.flush();
        put(f, sw.toString());
    } catch (Exception e) {
        LOG.warning(e);
    } finally {
        CloserUtil.close(r);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) StringWriter(java.io.StringWriter) StreamResult(javax.xml.transform.stream.StreamResult) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 97 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class NgsFilesSummary method readBam.

@Override
protected void readBam(final File f) {
    if (!f.canRead())
        return;
    SamReader r = null;
    try {
        r = super.openSamReader(f.getPath());
        SAMFileHeader h = r.getFileHeader();
        if (h != null && h.getReadGroups() != null && h.getReadGroups().isEmpty()) {
            for (final SAMReadGroupRecord rg : h.getReadGroups()) {
                String sample = rg.getSample();
                if (StringUtil.isBlank(sample)) {
                    sample = "_NO_SAMPLE_RG_";
                }
                print(sample, InfoType.BAM, f);
            }
        } else {
            print("_NO_READ_GROUP_", InfoType.BAM, f);
        }
    } catch (final Exception e) {
        LOG.warning(e);
    } finally {
        CloserUtil.close(r);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IOException(java.io.IOException)

Example 98 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class PcrSliceReads method doWork.

@Override
public int doWork(List<String> args) {
    if (bedFile == null) {
        LOG.error("undefined bed file");
        return -1;
    }
    BufferedReader r = null;
    SamReader samReader = null;
    try {
        samReader = super.openSamReader(oneFileOrNull(args));
        final BedLineCodec codec = new BedLineCodec();
        r = IOUtils.openFileForBufferedReading(bedFile);
        String line;
        while ((line = r.readLine()) != null) {
            final BedLine bed = codec.decode(line);
            if (bed == null)
                continue;
            final String chrom = bed.getContig();
            int chromStart1 = bed.getStart();
            int chromEnd1 = bed.getEnd();
            if (chromStart1 < 1 || chromStart1 > chromEnd1) {
                LOG.error("Bad bed line " + line);
                return -1;
            }
            final String name = bed.get(3).trim();
            if (name == null || name.isEmpty()) {
                LOG.error("Bad bed line (name missing) in  " + line);
                return -1;
            }
            final Interval i = new Interval(chrom, chromStart1, chromEnd1, false, name);
            this.bedIntervals.put(i, i);
        }
        return run(samReader);
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(samReader);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Interval(htsjdk.samtools.util.Interval)

Example 99 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SamSlop method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("Reference was not specified.");
        return -1;
    }
    if (this.defaultQual.length() != 1) {
        LOG.error("default quality should have length==1 " + this.defaultQual);
    }
    GenomicSequence genomicSequence = null;
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    final char defaultQUAL = this.defaultQual.charAt(0);
    try {
        final String inputName = oneFileOrNull(args);
        LOG.info("Loading reference");
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        sfr = openSamReader(inputName);
        final SAMFileHeader header = sfr.getFileHeader();
        header.setSortOrder(SortOrder.unsorted);
        sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Cigar cigar = rec.getCigar();
            if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
                sfw.addAlignment(rec);
                continue;
            }
            final StringBuilder sbs = new StringBuilder(rec.getReadString());
            final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
            if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
                genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
            }
            final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
            final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
            final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
            if (!this.removeClip) {
                // replace clip S/H by match M
                for (int i = 0; i < cl.size(); ++i) {
                    final CigarElement ce = cl.get(i);
                    if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
                        cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
                    }
                }
            }
            if (this.extend5 > 0 && refPos1 > 1) {
                if (this.removeClip) {
                    // /remove hard + soft clip 5'
                    while (!cl.isEmpty()) {
                        // first
                        final CigarElement ce = cl.get(0);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.replace(0, ce.getLength(), "");
                            sbq.replace(0, ce.getLength(), "");
                        }
                        // remove first
                        cl.remove(0);
                    }
                }
                final StringBuilder prefix = new StringBuilder(this.extend5);
                // /append + soft clip 5'
                for (int i = 0; i < this.extend5; ++i) {
                    int x1 = (refPos1 - 1) - i;
                    // break if out of genome
                    if (x1 < 1)
                        break;
                    prefix.insert(0, genomicSequence.charAt(x1 - 1));
                }
                sbs.insert(0, prefix.toString());
                // preprend quality
                for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
                // prepend cigar
                cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
                // update start pos
                rec.setAlignmentStart(refPos1 - prefix.length());
            }
            if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
                if (this.removeClip) {
                    // /remove hard + soft clip 3'
                    while (!cl.isEmpty()) {
                        // last
                        final CigarElement ce = cl.get(cl.size() - 1);
                        // not a clip
                        if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
                            break;
                        }
                        if (ce.getOperator() == CigarOperator.S) {
                            sbs.setLength(sbs.length() - ce.getLength());
                            sbq.setLength(sbq.length() - ce.getLength());
                        }
                        // remove last
                        cl.remove(cl.size() - 1);
                    }
                }
                int extend = 0;
                for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
                    sbs.append(genomicSequence.charAt(pos1 - 1));
                    sbq.append(defaultQUAL);
                    ++extend;
                }
                // append cigar
                cl.add(new CigarElement(extend, CigarOperator.M));
            }
            // simplify cigar
            int idx = 0;
            while (idx + 1 < cl.size()) {
                final CigarElement ce1 = cl.get(idx);
                final CigarElement ce2 = cl.get(idx + 1);
                if (ce1.getOperator() == ce2.getOperator()) {
                    cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
                    cl.remove(idx + 1);
                } else {
                    idx++;
                }
            }
            rec.setCigar(new Cigar(cl));
            rec.setReadString(sbs.toString());
            rec.setBaseQualityString(sbq.toString());
            final List<SAMValidationError> errors = rec.isValid();
            if (errors != null && !errors.isEmpty()) {
                for (SAMValidationError err : errors) {
                    LOG.error(err.getMessage());
                }
            }
            // info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
            sfw.addAlignment(rec);
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(indexedFastaSequenceFile);
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 100 with SamReader

use of htsjdk.samtools.SamReader in project jvarkit by lindenb.

the class SamToJson method call.

private int call(final String inputName) throws Exception {
    PrintWriter out = null;
    SamReader sfr = null;
    final SamJsonWriterFactory factory = SamJsonWriterFactory.newInstance().printHeader(this.print_header).printReadName(!this.disable_readName).printAttributes(!this.disable_atts).expandFlag(this.expflag).expandCigar(this.excigar);
    SAMFileWriter swf = null;
    try {
        sfr = super.openSamReader(inputName);
        out = super.openFileOrStdoutAsPrintWriter(this.output);
        swf = factory.open(sfr.getFileHeader(), out);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext() && !out.checkError()) {
            swf.addAlignment(iter.next());
        }
        iter.close();
        out.flush();
        swf.close();
        swf = null;
        LOG.info("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(swf);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SamJsonWriterFactory(com.github.lindenb.jvarkit.util.samtools.SamJsonWriterFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) PrintWriter(java.io.PrintWriter)

Aggregations

SamReader (htsjdk.samtools.SamReader)211 SAMRecord (htsjdk.samtools.SAMRecord)137 File (java.io.File)111 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)89 SAMFileHeader (htsjdk.samtools.SAMFileHeader)83 IOException (java.io.IOException)71 SamReaderFactory (htsjdk.samtools.SamReaderFactory)65 ArrayList (java.util.ArrayList)63 SAMFileWriter (htsjdk.samtools.SAMFileWriter)58 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)44 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)42 List (java.util.List)39 CigarElement (htsjdk.samtools.CigarElement)32 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)32 HashMap (java.util.HashMap)31 Cigar (htsjdk.samtools.Cigar)30 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)30 PrintWriter (java.io.PrintWriter)27 Interval (htsjdk.samtools.util.Interval)26 HashSet (java.util.HashSet)26