Search in sources :

Example 21 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BamStats01 method run.

private void run(final String filename, SamReader samFileReader) throws IOException {
    Map<String, Histogram> sample2hist = new HashMap<String, BamStats01.Histogram>();
    SAMSequenceDictionary currDict = samFileReader.getFileHeader().getSequenceDictionary();
    if (samSequenceDictionary == null) {
        samSequenceDictionary = currDict;
    }
    if (this.bedFile != null) {
        if (!SequenceUtil.areSequenceDictionariesEqual(currDict, samSequenceDictionary)) {
            samFileReader.close();
            throw new IOException("incompatible sequence dictionaries." + filename);
        }
        if (intervalTreeMap == null) {
            final BedLineCodec bedCodec = new BedLineCodec();
            intervalTreeMap = new IntervalTreeMap<>();
            LOG.info("opening " + this.bedFile);
            String line;
            final BufferedReader bedIn = IOUtils.openFileForBufferedReading(bedFile);
            while ((line = bedIn.readLine()) != null) {
                final BedLine bedLine = bedCodec.decode(line);
                if (bedLine == null)
                    continue;
                int seqIndex = currDict.getSequenceIndex(bedLine.getContig());
                if (seqIndex == -1) {
                    throw new IOException("unknown chromosome from dict in  in " + line + " " + this.bedFile);
                }
                intervalTreeMap.put(bedLine.toInterval(), Boolean.TRUE);
            }
            bedIn.close();
            LOG.info("done reading " + this.bedFile);
        }
    }
    this.chrX_index = -1;
    this.chrY_index = -1;
    for (final SAMSequenceRecord rec : currDict.getSequences()) {
        final String chromName = rec.getSequenceName().toLowerCase();
        if (chromName.equals("x") || chromName.equals("chrx")) {
            this.chrX_index = rec.getSequenceIndex();
        } else if (chromName.equals("y") || chromName.equals("chry")) {
            this.chrY_index = rec.getSequenceIndex();
        }
    }
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(currDict);
    final SAMRecordIterator iter = samFileReader.iterator();
    while (iter.hasNext()) {
        final SAMRecord rec = progess.watch(iter.next());
        String sampleName = groupBy.getPartion(rec);
        if (sampleName == null || sampleName.isEmpty())
            sampleName = "undefined";
        Histogram hist = sample2hist.get(sampleName);
        if (hist == null) {
            hist = new Histogram();
            sample2hist.put(sampleName, hist);
        }
        hist.histograms[Category2.ALL.ordinal()].watch(rec);
        if (intervalTreeMap == null)
            continue;
        if (rec.getReadUnmappedFlag()) {
            continue;
        }
        if (!intervalTreeMap.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
            hist.histograms[Category2.OFF_TARGET.ordinal()].watch(rec);
        } else {
            hist.histograms[Category2.IN_TARGET.ordinal()].watch(rec);
        }
    }
    progess.finish();
    samFileReader.close();
    samFileReader = null;
    for (final String sampleName : sample2hist.keySet()) {
        final Histogram hist = sample2hist.get(sampleName);
        out.print(filename + "\t" + sampleName);
        for (final Category2 cat2 : Category2.values()) {
            for (// je je suis libertineuuh, je suis une cat1
            final Category cat1 : // je je suis libertineuuh, je suis une cat1
            Category.values()) {
                out.print("\t");
                out.print(hist.histograms[cat2.ordinal()].counts[cat1.ordinal()]);
            }
            if (intervalTreeMap == null)
                break;
        }
        out.println();
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) Interval(htsjdk.samtools.util.Interval)

Example 22 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BamStats02 method run.

private void run(String filename, SamReader r, PrintWriter out) {
    final Counter<Category> counter = new Counter<>();
    SAMRecordIterator iter = null;
    try {
        iter = r.iterator();
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader().getSequenceDictionary());
        while (iter.hasNext()) {
            final SAMRecord record = progress.watch(iter.next());
            final Category cat = new Category();
            cat.set(STRING_PROPS.filename, filename);
            cat.flag = record.getFlags();
            cat.set(INT_PROPS.inTarget, -1);
            final SAMReadGroupRecord g = record.getReadGroup();
            if (g != null) {
                cat.set(STRING_PROPS.samplename, g.getSample());
                cat.set(STRING_PROPS.platform, g.getPlatform());
                cat.set(STRING_PROPS.platformUnit, g.getPlatformUnit());
                cat.set(STRING_PROPS.library, g.getLibrary());
            }
            final ShortReadName readName = ShortReadName.parse(record);
            if (readName.isValid()) {
                cat.set(STRING_PROPS.instrument, readName.getInstrumentName());
                cat.set(STRING_PROPS.flowcell, readName.getFlowCellId());
                cat.set(INT_PROPS.lane, readName.getFlowCellLane());
                cat.set(INT_PROPS.run, readName.getRunId());
            }
            if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
                cat.set(STRING_PROPS.mate_chromosome, record.getMateReferenceName());
            }
            if (!record.getReadUnmappedFlag()) {
                cat.set(INT_PROPS.mapq, (int) (Math.ceil(record.getMappingQuality() / 10.0) * 10));
                cat.set(STRING_PROPS.chromosome, record.getReferenceName());
                if (this.intervals != null) {
                    if (this.intervals.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
                        cat.set(INT_PROPS.inTarget, 1);
                    } else {
                        cat.set(INT_PROPS.inTarget, 0);
                    }
                }
            }
            counter.incr(cat);
        }
        progress.finish();
        for (final Category cat : counter.keySetDecreasing()) {
            cat.print(out);
            out.print("\t");
            out.print(counter.count(cat));
            out.println();
        }
        out.flush();
    } finally {
        CloserUtil.close(iter);
    }
}
Also used : Counter(com.github.lindenb.jvarkit.util.Counter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ShortReadName(com.github.lindenb.jvarkit.util.illumina.ShortReadName) SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Interval(htsjdk.samtools.util.Interval)

Example 23 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar154220 method doWork.

@SuppressWarnings("resource")
private int doWork(final SamReader in) throws IOException {
    SAMFileHeader header = in.getFileHeader();
    if (header.getSortOrder() != SAMFileHeader.SortOrder.unsorted) {
        LOG.error("input should be unsorted, reads sorted on REF/query-name e.g: see https://github.com/lindenb/jvarkit/wiki/SortSamRefName");
        return -1;
    }
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null) {
        LOG.error("no dict !");
        return -1;
    }
    SAMFileWriter out = null;
    SAMRecordIterator iter = null;
    int prev_tid = -1;
    int[] depth_array = null;
    try {
        SAMFileHeader header2 = header.clone();
        header2.addComment("Biostar154220" + " " + getVersion() + " " + getProgramCommandLine());
        out = this.writingBams.openSAMFileWriter(outputFile, header2, true);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        iter = in.iterator();
        List<SAMRecord> buffer = new ArrayList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
            }
            if (rec != null && rec.getReadUnmappedFlag()) {
                out.addAlignment(rec);
                continue;
            }
            // no more record or
            if (!buffer.isEmpty() && rec != null && buffer.get(0).getReadName().equals(rec.getReadName()) && buffer.get(0).getReferenceIndex().equals(rec.getReferenceIndex())) {
                buffer.add(progress.watch(rec));
            } else if (buffer.isEmpty() && rec != null) {
                buffer.add(progress.watch(rec));
            } else // dump buffer
            {
                if (!buffer.isEmpty()) {
                    final int tid = buffer.get(0).getReferenceIndex();
                    if (prev_tid == -1 || prev_tid != tid) {
                        SAMSequenceRecord ssr = dict.getSequence(tid);
                        prev_tid = tid;
                        depth_array = null;
                        System.gc();
                        LOG.info("Alloc memory for contig " + ssr.getSequenceName() + " N=" + ssr.getSequenceLength() + "*sizeof(int)");
                        // use a +1 pos
                        depth_array = new int[ssr.getSequenceLength() + 1];
                        Arrays.fill(depth_array, 0);
                    }
                    // position->coverage for this set of reads
                    Counter<Integer> readposition2coverage = new Counter<Integer>();
                    boolean dump_this_buffer = true;
                    for (final SAMRecord sr : buffer) {
                        if (!dump_this_buffer)
                            break;
                        if (this.filter.filterOut(sr))
                            continue;
                        final Cigar cigar = sr.getCigar();
                        if (cigar == null) {
                            throw new IOException("Cigar missing in " + rec.getSAMString());
                        }
                        int refPos1 = sr.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (!op.consumesReferenceBases())
                                continue;
                            if (op.consumesReadBases()) {
                                for (int x = 0; x < ce.getLength() && refPos1 + x < depth_array.length; ++x) {
                                    int cov = (int) readposition2coverage.incr(refPos1 + x);
                                    if (depth_array[refPos1 + x] + cov > this.capDepth) {
                                        dump_this_buffer = false;
                                        break;
                                    }
                                }
                            }
                            if (!dump_this_buffer)
                                break;
                            refPos1 += ce.getLength();
                        }
                    }
                    if (dump_this_buffer) {
                        // consumme this coverage
                        for (Integer pos : readposition2coverage.keySet()) {
                            depth_array[pos] += (int) readposition2coverage.count(pos);
                        }
                        for (SAMRecord sr : buffer) {
                            out.addAlignment(sr);
                        }
                    }
                    buffer.clear();
                }
                if (rec == null)
                    break;
                buffer.add(rec);
            }
        }
        depth_array = null;
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(out);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) Counter(com.github.lindenb.jvarkit.util.Counter) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 24 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BamStats05 method doWork.

protected int doWork(final PrintWriter pw, final Map<String, List<Interval>> gene2interval, final String filename, final SamReader IN) throws Exception {
    try {
        LOG.info("Scanning " + filename);
        final SAMFileHeader header = IN.getFileHeader();
        final List<SAMReadGroupRecord> rgs = header.getReadGroups();
        if (rgs == null || rgs.isEmpty())
            throw new IOException("No read groups in " + filename);
        final Set<String> groupNames = this.groupBy.getPartitions(rgs);
        for (final String partition : groupNames) {
            if (partition.isEmpty())
                throw new IOException("Empty " + groupBy.name());
            for (final String gene : gene2interval.keySet()) {
                int geneStart = Integer.MAX_VALUE;
                int geneEnd = 0;
                final List<Integer> counts = new ArrayList<>();
                for (final Interval interval : gene2interval.get(gene)) {
                    geneStart = Math.min(geneStart, interval.getStart() - 1);
                    geneEnd = Math.max(geneEnd, interval.getEnd());
                    /* picard javadoc:  - Sequence name - Start position (1-based) - End position (1-based, end inclusive)  */
                    int[] interval_counts = new int[interval.getEnd() - interval.getStart() + 1];
                    if (interval_counts.length == 0)
                        continue;
                    Arrays.fill(interval_counts, 0);
                    if (IN.getFileHeader().getSequenceIndex(interval.getContig()) == -1) {
                        throw new IllegalArgumentException("NO DICT FOR \"" + interval.getContig() + "\"");
                    }
                    /**
                     *     start - 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
                     *	   end - 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
                     */
                    SAMRecordIterator r = IN.query(new QueryInterval[] { new QueryInterval(header.getSequenceIndex(interval.getContig()), interval.getStart(), interval.getEnd()) }, false);
                    while (r.hasNext()) {
                        final SAMRecord rec = r.next();
                        if (rec.getReadUnmappedFlag())
                            continue;
                        if (filter.filterOut(rec))
                            continue;
                        if (!rec.getReferenceName().equals(interval.getContig()))
                            continue;
                        final SAMReadGroupRecord rg = rec.getReadGroup();
                        if (rg == null || !partition.equals(this.groupBy.apply(rg)))
                            continue;
                        final Cigar cigar = rec.getCigar();
                        if (cigar == null)
                            continue;
                        int refpos1 = rec.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (!op.consumesReferenceBases())
                                continue;
                            if (op.consumesReadBases()) {
                                for (int i = 0; i < ce.getLength(); ++i) {
                                    if (refpos1 + i >= interval.getStart() && refpos1 + i <= interval.getEnd()) {
                                        interval_counts[refpos1 + i - interval.getStart()]++;
                                    }
                                }
                            }
                            refpos1 += ce.getLength();
                        }
                    }
                    /* end while r */
                    r.close();
                    for (int d : interval_counts) {
                        counts.add(d);
                    }
                }
                /* end interval */
                Collections.sort(counts);
                int count_no_coverage = 0;
                double mean = 0;
                for (int cov : counts) {
                    if (cov <= MIN_COVERAGE)
                        ++count_no_coverage;
                    mean += cov;
                }
                mean /= counts.size();
                pw.println(gene2interval.get(gene).get(0).getContig() + "\t" + geneStart + "\t" + geneEnd + "\t" + gene + "\t" + partition + "\t" + counts.size() + "\t" + counts.get(0) + "\t" + counts.get(counts.size() - 1) + "\t" + mean + "\t" + count_no_coverage + "\t" + (int) (((counts.size() - count_no_coverage) / (double) counts.size()) * 100.0));
            }
        // end gene
        }
        // end sample
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(IN);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) QueryInterval(htsjdk.samtools.QueryInterval) IOException(java.io.IOException) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval)

Example 25 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class BioAlcidae method execute_bam.

private int execute_bam(String source) throws IOException {
    SamReader in = null;
    SAMRecordIterator iter = null;
    try {
        SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
        if (source == null) {
            in = srf.open(SamInputResource.of(stdin()));
        } else {
            in = srf.open(SamInputResource.of(source));
        }
        iter = in.iterator();
        bindings.put("header", in.getFileHeader());
        bindings.put("iter", iter);
        bindings.put("format", "sam");
        this.script.eval(bindings);
        this.writer.flush();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(iter);
        bindings.remove("header");
        bindings.remove("iter");
        bindings.remove("format");
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) JAXBException(javax.xml.bind.JAXBException)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14