Examples with SAMSequenceDictionaryProgress - com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress

Example 1 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class Bam2Xml method run.

private int run(SamReader samReader) {
    OutputStream fout = null;
    SAMRecordIterator iter = null;
    XMLStreamWriter w = null;
    try {
        XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
        if (this.outputFile != null) {
            fout = IOUtils.openFileForWriting(this.outputFile);
            w = xmlfactory.createXMLStreamWriter(fout, "UTF-8");
        } else {
            w = xmlfactory.createXMLStreamWriter(stdout(), "UTF-8");
        }
        w.writeStartDocument("UTF-8", "1.0");
        final SAMFileHeader header = samReader.getFileHeader();
        final SAMXMLWriter xw = new SAMXMLWriter(w, header);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        iter = samReader.iterator();
        while (iter.hasNext()) {
            xw.addAlignment(progress.watch(iter.next()));
        }
        xw.close();
        w.writeEndDocument();
        if (fout != null)
            fout.flush();
    } catch (Exception e) {
        e.printStackTrace();
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(w);
        CloserUtil.close(iter);
        CloserUtil.close(fout);
    }
    return 0;
}

Also used : XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) OutputStream(java.io.OutputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) XMLStreamException(javax.xml.stream.XMLStreamException)

Example 2 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class BamIndexReadNames method indexBamFile.

private void indexBamFile(File bamFile) throws IOException {
    NameIndexDef indexDef = new NameIndexDef();
    SortingCollection<NameAndPos> sorting = null;
    LOG.info("Opening " + bamFile);
    SamReader sfr = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
    sorting = SortingCollection.newInstance(NameAndPos.class, new NameAndPosCodec(), new NameAndPosComparator(), maxRecordsInRAM, bamFile.getParentFile().toPath());
    sorting.setDestructiveIteration(true);
    if (sfr.getFileHeader().getSortOrder() != SortOrder.coordinate) {
        throw new IOException("not SortOrder.coordinate " + sfr.getFileHeader().getSortOrder());
    }
    SAMRecordIterator iter = sfr.iterator();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary());
    while (iter.hasNext()) {
        SAMRecord rec = iter.next();
        progress.watch(rec);
        NameAndPos nap = new NameAndPos();
        nap.name = rec.getReadName();
        indexDef.maxNameLengt = Math.max(nap.name.length() + 1, indexDef.maxNameLengt);
        nap.tid = rec.getReferenceIndex();
        nap.pos = rec.getAlignmentStart();
        indexDef.countReads++;
        sorting.add(nap);
    }
    progress.finish();
    iter.close();
    sfr.close();
    sorting.doneAdding();
    LOG.info("Done Adding. N=" + indexDef.countReads);
    File indexFile = new File(bamFile.getParentFile(), bamFile.getName() + NAME_IDX_EXTENSION);
    LOG.info("Writing index " + indexFile);
    FileOutputStream raf = new FileOutputStream(indexFile);
    ByteBuffer byteBuff = ByteBuffer.allocate(8 + 4);
    byteBuff.putLong(indexDef.countReads);
    byteBuff.putInt(indexDef.maxNameLengt);
    raf.write(byteBuff.array());
    byteBuff = ByteBuffer.allocate(indexDef.maxNameLengt + 4 + 4);
    CloseableIterator<NameAndPos> iter2 = sorting.iterator();
    while (iter2.hasNext()) {
        byteBuff.rewind();
        NameAndPos nap = iter2.next();
        for (int i = 0; i < nap.name.length(); ++i) {
            byteBuff.put((byte) nap.name.charAt(i));
        }
        for (int i = nap.name.length(); i < indexDef.maxNameLengt; ++i) {
            byteBuff.put((byte) '\0');
        }
        byteBuff.putInt(nap.tid);
        byteBuff.putInt(nap.pos);
        raf.write(byteBuff.array());
    }
    raf.flush();
    raf.close();
    sorting.cleanup();
}

Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 3 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class BamStats01 method run.

private void run(final String filename, SamReader samFileReader) throws IOException {
    Map<String, Histogram> sample2hist = new HashMap<String, BamStats01.Histogram>();
    SAMSequenceDictionary currDict = samFileReader.getFileHeader().getSequenceDictionary();
    if (samSequenceDictionary == null) {
        samSequenceDictionary = currDict;
    }
    if (this.bedFile != null) {
        if (!SequenceUtil.areSequenceDictionariesEqual(currDict, samSequenceDictionary)) {
            samFileReader.close();
            throw new IOException("incompatible sequence dictionaries." + filename);
        }
        if (intervalTreeMap == null) {
            final BedLineCodec bedCodec = new BedLineCodec();
            intervalTreeMap = new IntervalTreeMap<>();
            LOG.info("opening " + this.bedFile);
            String line;
            final BufferedReader bedIn = IOUtils.openFileForBufferedReading(bedFile);
            while ((line = bedIn.readLine()) != null) {
                final BedLine bedLine = bedCodec.decode(line);
                if (bedLine == null)
                    continue;
                int seqIndex = currDict.getSequenceIndex(bedLine.getContig());
                if (seqIndex == -1) {
                    throw new IOException("unknown chromosome from dict in  in " + line + " " + this.bedFile);
                }
                intervalTreeMap.put(bedLine.toInterval(), Boolean.TRUE);
            }
            bedIn.close();
            LOG.info("done reading " + this.bedFile);
        }
    }
    this.chrX_index = -1;
    this.chrY_index = -1;
    for (final SAMSequenceRecord rec : currDict.getSequences()) {
        final String chromName = rec.getSequenceName().toLowerCase();
        if (chromName.equals("x") || chromName.equals("chrx")) {
            this.chrX_index = rec.getSequenceIndex();
        } else if (chromName.equals("y") || chromName.equals("chry")) {
            this.chrY_index = rec.getSequenceIndex();
        }
    }
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(currDict);
    final SAMRecordIterator iter = samFileReader.iterator();
    while (iter.hasNext()) {
        final SAMRecord rec = progess.watch(iter.next());
        String sampleName = groupBy.getPartion(rec);
        if (sampleName == null || sampleName.isEmpty())
            sampleName = "undefined";
        Histogram hist = sample2hist.get(sampleName);
        if (hist == null) {
            hist = new Histogram();
            sample2hist.put(sampleName, hist);
        }
        hist.histograms[Category2.ALL.ordinal()].watch(rec);
        if (intervalTreeMap == null)
            continue;
        if (rec.getReadUnmappedFlag()) {
            continue;
        }
        if (!intervalTreeMap.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
            hist.histograms[Category2.OFF_TARGET.ordinal()].watch(rec);
        } else {
            hist.histograms[Category2.IN_TARGET.ordinal()].watch(rec);
        }
    }
    progess.finish();
    samFileReader.close();
    samFileReader = null;
    for (final String sampleName : sample2hist.keySet()) {
        final Histogram hist = sample2hist.get(sampleName);
        out.print(filename + "\t" + sampleName);
        for (final Category2 cat2 : Category2.values()) {
            for (// je je suis libertineuuh, je suis une cat1
            final Category cat1 : // je je suis libertineuuh, je suis une cat1
            Category.values()) {
                out.print("\t");
                out.print(hist.histograms[cat2.ordinal()].counts[cat1.ordinal()]);
            }
            if (intervalTreeMap == null)
                break;
        }
        out.println();
    }
}

Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) Interval(htsjdk.samtools.util.Interval)

Example 4 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class BamStats02 method run.

private void run(String filename, SamReader r, PrintWriter out) {
    final Counter<Category> counter = new Counter<>();
    SAMRecordIterator iter = null;
    try {
        iter = r.iterator();
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader().getSequenceDictionary());
        while (iter.hasNext()) {
            final SAMRecord record = progress.watch(iter.next());
            final Category cat = new Category();
            cat.set(STRING_PROPS.filename, filename);
            cat.flag = record.getFlags();
            cat.set(INT_PROPS.inTarget, -1);
            final SAMReadGroupRecord g = record.getReadGroup();
            if (g != null) {
                cat.set(STRING_PROPS.samplename, g.getSample());
                cat.set(STRING_PROPS.platform, g.getPlatform());
                cat.set(STRING_PROPS.platformUnit, g.getPlatformUnit());
                cat.set(STRING_PROPS.library, g.getLibrary());
            }
            final ShortReadName readName = ShortReadName.parse(record);
            if (readName.isValid()) {
                cat.set(STRING_PROPS.instrument, readName.getInstrumentName());
                cat.set(STRING_PROPS.flowcell, readName.getFlowCellId());
                cat.set(INT_PROPS.lane, readName.getFlowCellLane());
                cat.set(INT_PROPS.run, readName.getRunId());
            }
            if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
                cat.set(STRING_PROPS.mate_chromosome, record.getMateReferenceName());
            }
            if (!record.getReadUnmappedFlag()) {
                cat.set(INT_PROPS.mapq, (int) (Math.ceil(record.getMappingQuality() / 10.0) * 10));
                cat.set(STRING_PROPS.chromosome, record.getReferenceName());
                if (this.intervals != null) {
                    if (this.intervals.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
                        cat.set(INT_PROPS.inTarget, 1);
                    } else {
                        cat.set(INT_PROPS.inTarget, 0);
                    }
                }
            }
            counter.incr(cat);
        }
        progress.finish();
        for (final Category cat : counter.keySetDecreasing()) {
            cat.print(out);
            out.print("\t");
            out.print(counter.count(cat));
            out.println();
        }
        out.flush();
    } finally {
        CloserUtil.close(iter);
    }
}

Also used : Counter(com.github.lindenb.jvarkit.util.Counter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ShortReadName(com.github.lindenb.jvarkit.util.illumina.ShortReadName) SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Interval(htsjdk.samtools.util.Interval)

Example 5 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class Biostar154220 method doWork.

@SuppressWarnings("resource")
private int doWork(final SamReader in) throws IOException {
    SAMFileHeader header = in.getFileHeader();
    if (header.getSortOrder() != SAMFileHeader.SortOrder.unsorted) {
        LOG.error("input should be unsorted, reads sorted on REF/query-name e.g: see https://github.com/lindenb/jvarkit/wiki/SortSamRefName");
        return -1;
    }
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict == null) {
        LOG.error("no dict !");
        return -1;
    }
    SAMFileWriter out = null;
    SAMRecordIterator iter = null;
    int prev_tid = -1;
    int[] depth_array = null;
    try {
        SAMFileHeader header2 = header.clone();
        header2.addComment("Biostar154220" + " " + getVersion() + " " + getProgramCommandLine());
        out = this.writingBams.openSAMFileWriter(outputFile, header2, true);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        iter = in.iterator();
        List<SAMRecord> buffer = new ArrayList<>();
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
            }
            if (rec != null && rec.getReadUnmappedFlag()) {
                out.addAlignment(rec);
                continue;
            }
            // no more record or
            if (!buffer.isEmpty() && rec != null && buffer.get(0).getReadName().equals(rec.getReadName()) && buffer.get(0).getReferenceIndex().equals(rec.getReferenceIndex())) {
                buffer.add(progress.watch(rec));
            } else if (buffer.isEmpty() && rec != null) {
                buffer.add(progress.watch(rec));
            } else // dump buffer
            {
                if (!buffer.isEmpty()) {
                    final int tid = buffer.get(0).getReferenceIndex();
                    if (prev_tid == -1 || prev_tid != tid) {
                        SAMSequenceRecord ssr = dict.getSequence(tid);
                        prev_tid = tid;
                        depth_array = null;
                        System.gc();
                        LOG.info("Alloc memory for contig " + ssr.getSequenceName() + " N=" + ssr.getSequenceLength() + "*sizeof(int)");
                        // use a +1 pos
                        depth_array = new int[ssr.getSequenceLength() + 1];
                        Arrays.fill(depth_array, 0);
                    }
                    // position->coverage for this set of reads
                    Counter<Integer> readposition2coverage = new Counter<Integer>();
                    boolean dump_this_buffer = true;
                    for (final SAMRecord sr : buffer) {
                        if (!dump_this_buffer)
                            break;
                        if (this.filter.filterOut(sr))
                            continue;
                        final Cigar cigar = sr.getCigar();
                        if (cigar == null) {
                            throw new IOException("Cigar missing in " + rec.getSAMString());
                        }
                        int refPos1 = sr.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (!op.consumesReferenceBases())
                                continue;
                            if (op.consumesReadBases()) {
                                for (int x = 0; x < ce.getLength() && refPos1 + x < depth_array.length; ++x) {
                                    int cov = (int) readposition2coverage.incr(refPos1 + x);
                                    if (depth_array[refPos1 + x] + cov > this.capDepth) {
                                        dump_this_buffer = false;
                                        break;
                                    }
                                }
                            }
                            if (!dump_this_buffer)
                                break;
                            refPos1 += ce.getLength();
                        }
                    }
                    if (dump_this_buffer) {
                        // consumme this coverage
                        for (Integer pos : readposition2coverage.keySet()) {
                            depth_array[pos] += (int) readposition2coverage.count(pos);
                        }
                        for (SAMRecord sr : buffer) {
                            out.addAlignment(sr);
                        }
                    }
                    buffer.clear();
                }
                if (rec == null)
                    break;
                buffer.add(rec);
            }
        }
        depth_array = null;
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(out);
    }
}

Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) CigarOperator(htsjdk.samtools.CigarOperator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) Counter(com.github.lindenb.jvarkit.util.Counter) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27