Search in sources :

Example 91 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class VcfStrechToSvg method run.

private void run(final ArchiveFactory archive, final BedLine bed, final VCFHeader header, final VCFReader in, final PrintWriter manifest) {
    LOG.info("processing " + bed);
    final Set<String> limitSamples;
    if (StringUtils.isBlank(this.sampleStr)) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
    } else if (this.sampleStr.startsWith("^")) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
        limitSamples.removeAll(Arrays.stream(this.sampleStr.substring(1).split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet()));
    } else {
        limitSamples = Arrays.stream(this.sampleStr.split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toCollection(TreeSet::new));
    }
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    try (CloseableIterator<VariantContext> iter = in.query(bed)) {
        final List<VariantSet> L = iter.stream().filter(V -> acceptVariant(V)).map(V -> new VariantSet(V)).collect(Collectors.toCollection(ArrayList::new));
        if (L.isEmpty()) {
            LOG.warn("No valid variant found for \"" + bed + "\"");
            return;
        }
        int i = 0;
        while (i + 1 < L.size()) {
            if (L.get(i).withinDistanceOf(L.get(i + 1), this.withinDistance)) {
                L.get(i).variants.addAll(L.get(i + 1).variants);
                L.remove(i + 1);
            } else {
                i++;
            }
        }
        final int margin_left = 50;
        final int margin_right = 10;
        final double drawingAreaWidth = image_width_pixel - (margin_left + margin_right);
        final int intervalLength = L.stream().mapToInt(V -> V.getLengthOnReference()).sum();
        double x = 0;
        for (i = 0; i < L.size(); i++) {
            L.get(i).x = x;
            x += (L.get(i).getLengthOnReference() / (double) intervalLength) * drawingAreaWidth;
        }
        for (i = 0; i < L.size(); i++) {
            L.get(i).width = (i + 1 < L.size() ? L.get(i + 1).x : drawingAreaWidth) - L.get(i).x;
        }
        try {
            final DocumentBuilderFactory db = DocumentBuilderFactory.newInstance();
            final DocumentBuilder dom = db.newDocumentBuilder();
            this.document = dom.newDocument();
            final Element svgRoot = element("svg");
            this.document.appendChild(svgRoot);
            final String mainTitleStr = SequenceDictionaryUtils.getBuildName(dict).orElse("") + " " + new SimpleInterval(bed).toNiceString() + " length:" + StringUtils.niceInt(bed.getLengthOnReference());
            /* SVG title */
            {
                final Element title = element("title");
                svgRoot.appendChild(title);
                title.appendChild(text(mainTitleStr));
            }
            /* SVG style */
            {
                final String gtopacity = this.dynamicParams.getOrDefault("gt.opacity", "0.7");
                final Element style = element("style");
                svgRoot.appendChild(style);
                style.appendChild(text(".maintitle {text-anchor:middle;fill:blue} " + ".vc {stroke-width:0.5px;} " + ".transcript {stroke:black;stroke-width:1px;opacity:1;}" + ".exon {stroke:black;stroke-width:0.5px;fill:blue;opacity:1;}" + ".sample {fill:blue;font-size:7px;} " + ".samplelabel {stroke:gray;stroke-width:0.5px;font-size:" + this.dynamicParams.getOrDefault("sample.fontsize", "7") + "px;}\n" + ".coverage { fill:gray; stroke:yellow;opacity:0.2;} " + ".frame { fill:none; stroke: darkgray;} " + ".area0 {fill:white;}\n" + ".area1 {fill:floralwhite;}\n" + "circle.HOM_REF {fill:green;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HET {fill:blue;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HOM_VAR {fill:red;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "a {cursor: pointer;}\n"));
            }
            /* desc */
            {
                final Element descr = element("desc");
                svgRoot.appendChild(descr);
                descr.appendChild(text("Author: Pierre Lindenbaum\n" + JVarkitVersion.getInstance().getCompilationDate() + "\n" + JVarkitVersion.getInstance().getGitHash()));
            }
            // main title
            {
                final Element gtitle = element("text", mainTitleStr);
                gtitle.setAttribute("class", "maintitle");
                gtitle.setAttribute("x", format(this.image_width_pixel / 2.0));
                gtitle.setAttribute("y", "15");
                svgRoot.appendChild(wrapLoc(gtitle, bed));
            }
            int margin_top = 50;
            double y = margin_top;
            final double min_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r1", "1"));
            final double max_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r2", "7"));
            final Element main_g = element("g");
            svgRoot.appendChild(main_g);
            /**
             * plot genes
             */
            if (!this.all_genes.isEmpty()) {
                final double transcript_height = 5;
                final double exon_height = (transcript_height - 1);
                final double save_y = y;
                final Element g_genes = element("g");
                g_genes.setAttribute("transform", "translate(" + margin_left + ",0)");
                main_g.appendChild(g_genes);
                /* loop over each vset */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    // find transcript in this vset
                    final List<Transcript> transcripts = this.all_genes.getOverlapping(vset).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.overlaps(vset)).collect(Collectors.toList());
                    if (transcripts.isEmpty())
                        continue;
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_genes.appendChild(g_vset);
                    // y in this vset
                    double y2 = save_y;
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    /* loop over transcripts */
                    for (final Transcript tr : transcripts) {
                        final Element g_tr = element("g");
                        g_tr.setAttribute("transform", "translate(0," + format(y2) + ")");
                        g_vset.appendChild(g_tr);
                        final Element line = element("line");
                        line.setAttribute("class", "transcript");
                        line.setAttribute("x1", format(Math.max(0, base2pixel.applyAsDouble(tr.getStart()))));
                        line.setAttribute("y1", format(transcript_height / 2.0));
                        line.setAttribute("x2", format(Math.min(vset.width, base2pixel.applyAsDouble(tr.getEnd()))));
                        line.setAttribute("y2", format(transcript_height / 2.0));
                        line.appendChild(element("title", tr.getId()));
                        g_tr.appendChild(wrapLoc(line, tr));
                        /* loop over exons */
                        for (final Exon exon : tr.getExons()) {
                            if (!exon.overlaps(vset))
                                continue;
                            final Element exRect = element("rect");
                            exRect.setAttribute("class", "exon");
                            final double x_start = Math.max(0, base2pixel.applyAsDouble(exon.getStart()));
                            exRect.setAttribute("x", format(x_start));
                            exRect.setAttribute("y", format(transcript_height / 2.0 - exon_height / 2.0));
                            final double x_end = Math.min(vset.width, base2pixel.applyAsDouble(exon.getEnd()));
                            exRect.setAttribute("width", format(x_end - x_start));
                            exRect.setAttribute("height", format(exon_height));
                            exRect.appendChild(element("title", exon.getName()));
                            g_tr.appendChild(wrapLoc(exRect, exon));
                        }
                        y2 += transcript_height + 0.5;
                    }
                    y = Math.max(y, y2);
                }
                y++;
            }
            final double sample_height = Double.parseDouble(this.dynamicParams.getOrDefault("sample.height", "25"));
            final double sample_height2 = sample_height - (max_circle_radius * 2.0);
            int space_between_samples = 2;
            int got_n_samples = 0;
            for (final String sn : header.getSampleNamesInOrder()) {
                if (!limitSamples.contains(sn))
                    continue;
                boolean got_this_sample = false;
                final Element g_sample = element("g");
                g_sample.setAttribute("transform", "translate(" + margin_left + "," + format(y) + ")");
                /* get coverage */
                final int maxCoverage;
                if (this.sample2bam.containsKey(sn)) {
                    final CoverageFactory coverageFactory = new CoverageFactory();
                    try (SamReader sr = this.openSamReader(this.sample2bam.get(sn))) {
                        /* loop over each variant set */
                        for (final VariantSet vset : L) {
                            vset.coverage = coverageFactory.getSimpleCoverage(sr, vset, sn);
                        }
                    }
                    maxCoverage = L.stream().flatMapToInt(V -> V.coverage.stream()).max().orElse(0);
                } else {
                    maxCoverage = 0;
                    for (final VariantSet vset : L) {
                        vset.coverage = null;
                    }
                }
                /* loop over each variant set */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_sample.appendChild(g_vset);
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    // plot set length
                    final Element rect = element("rect");
                    rect.setAttribute("class", "area" + (i % 2));
                    rect.setAttribute("x", "0");
                    rect.setAttribute("y", "0");
                    rect.setAttribute("width", format(vset.width));
                    rect.setAttribute("height", format(sample_height));
                    if (!remove_tooltip)
                        rect.appendChild(element("title", vset.toString()));
                    g_vset.appendChild(rect);
                    // plot coverage
                    if (maxCoverage > 0 && this.sample2bam.containsKey(sn)) {
                        final double[] scaled = vset.coverage.scaleAverage((int) vset.width);
                        final StringBuilder sb = new StringBuilder();
                        sb.append("0," + sample_height);
                        for (int t = 0; t < scaled.length; t++) {
                            if (t > 1 && t + 1 < scaled.length && format(scaled[t - 1]).equals(format(scaled[t + 1])) && format(scaled[t - 1]).equals(format(scaled[t])))
                                continue;
                            sb.append(" ").append(t).append(",");
                            sb.append(format(sample_height * (1.0 - scaled[t] / maxCoverage)));
                        }
                        sb.append(" " + format(vset.width) + "," + sample_height);
                        final Element polyline = element("polyline");
                        polyline.setAttribute("class", "coverage");
                        polyline.setAttribute("points", sb.toString());
                        g_vset.appendChild(polyline);
                        vset.coverage = null;
                    }
                    // plot vertical line if colorTag defined
                    if (!StringUtils.isBlank(this.colorTag)) {
                        for (final VariantContext vc : vset.variants) {
                            if (!vc.hasAttribute(this.colorTag))
                                continue;
                            final String cssColor = vc.getAttributeAsString(this.colorTag, "");
                            if (StringUtils.isBlank(cssColor))
                                continue;
                            final double x0 = base2pixel.applyAsDouble(vc.getStart());
                            final Element line = element("line");
                            line.setAttribute("class", "vc");
                            line.setAttribute("style", "stroke:" + cssColor);
                            line.setAttribute("x1", format(x0));
                            line.setAttribute("y1", "0");
                            line.setAttribute("x2", format(x0));
                            line.setAttribute("y2", format(sample_height));
                            g_vset.appendChild(line);
                        }
                    }
                    // print all variants in this vcfset for this sample
                    for (final VariantContext vc : vset.variants) {
                        final Genotype gt = vc.getGenotype(sn);
                        if (gt.isNoCall())
                            continue;
                        if (hide_hom_ref && gt.isHomRef())
                            continue;
                        if (gt.hasGQ() && gt.getGQ() < this.minGQ)
                            continue;
                        final OptionalDouble alt_ratio = getAltRatio(gt);
                        if (!alt_ratio.isPresent())
                            continue;
                        final OptionalDouble af = getAF(vc);
                        final double circle_radius = min_circle_radius + (max_circle_radius - min_circle_radius) * (1.0 - af.orElse(1.0));
                        // HOMREF=0;  HET =0.5; HOMVAR = 1;
                        final double gtx = base2pixel.applyAsDouble(vc.getStart());
                        final double gty = sample_height - (sample_height2 * alt_ratio.getAsDouble() + (sample_height - sample_height2) / 2.0);
                        final Element circle = element("circle");
                        circle.setAttribute("class", gt.getType().name());
                        circle.setAttribute("cx", format(gtx));
                        circle.setAttribute("cy", format(gty));
                        circle.setAttribute("r", format(circle_radius));
                        if (!remove_tooltip)
                            circle.appendChild(element("title", vc.getStart() + " " + (vc.hasID() ? vc.getID() : "") + " " + vc.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining("/")) + " " + gt.getType().name() + " AF=" + format(af.orElse(-1))));
                        g_vset.appendChild(wrapLoc(circle, vc));
                        got_this_sample = true;
                    }
                }
                final Element frame_sample = element("rect");
                frame_sample.setAttribute("class", "frame");
                frame_sample.setAttribute("x", "0");
                frame_sample.setAttribute("y", "0");
                frame_sample.setAttribute("width", format(drawingAreaWidth));
                frame_sample.setAttribute("height", format(sample_height));
                g_sample.appendChild(frame_sample);
                final Element label = element("text", sn + (maxCoverage == 0 ? "" : " Max Cov. " + maxCoverage));
                label.setAttribute("class", "samplelabel");
                label.setAttribute("x", "0");
                label.setAttribute("y", "0");
                // label.setAttribute("transform", "translate("+format(-10)+","+0+") rotate(90) ");
                label.setAttribute("transform", "translate(12,12)");
                if (!remove_tooltip)
                    label.appendChild(element("title", sn));
                g_sample.appendChild(label);
                if (got_this_sample) {
                    got_n_samples++;
                    main_g.appendChild(g_sample);
                    y += sample_height + space_between_samples;
                } else {
                    LOG.warn("no valid data for sample " + sn + " in " + bed);
                }
            }
            // remove extra sample space
            y -= space_between_samples;
            svgRoot.setAttribute("width", format(this.image_width_pixel + 1));
            svgRoot.setAttribute("height", format(y + 1));
            if (got_n_samples == 0) {
                LOG.info("no sample/genotype found for " + bed);
                return;
            }
            // save
            final Transformer tr = TransformerFactory.newInstance().newTransformer();
            final String filename = bed.getContig() + "_" + bed.getStart() + "_" + bed.getEnd() + ".svg" + (this.compressed_svg ? ".gz" : "");
            LOG.info("writing " + filename);
            if (this.compressed_svg) {
                try (final OutputStream pw = archive.openOuputStream(filename)) {
                    try (GZIPOutputStream gzout = new GZIPOutputStream(pw)) {
                        tr.transform(new DOMSource(this.document), new StreamResult(gzout));
                        gzout.finish();
                        gzout.flush();
                    }
                    pw.flush();
                }
            } else {
                try (final PrintWriter pw = archive.openWriter(filename)) {
                    tr.transform(new DOMSource(this.document), new StreamResult(pw));
                    pw.flush();
                }
            }
            manifest.print(bed.getContig());
            manifest.print("\t");
            manifest.print(bed.getStart() - 1);
            manifest.print("\t");
            manifest.print(bed.getEnd());
            manifest.print("\t");
            manifest.print(filename);
            manifest.println();
        } catch (final Throwable err) {
            throw new RuntimeException(err);
        } finally {
            this.document = null;
        }
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMException(htsjdk.samtools.SAMException) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) ToDoubleFunction(java.util.function.ToDoubleFunction) VariantContext(htsjdk.variant.variantcontext.VariantContext) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFConstants(htsjdk.variant.vcf.VCFConstants) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) Element(org.w3c.dom.Element) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) GZIPOutputStream(java.util.zip.GZIPOutputStream) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StreamResult(javax.xml.transform.stream.StreamResult) Genotype(htsjdk.variant.variantcontext.Genotype) OptionalDouble(java.util.OptionalDouble) DocumentBuilder(javax.xml.parsers.DocumentBuilder)

Example 92 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class KnownDeletion method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.extendDistance <= 0) {
        LOG.error("bad extend factor " + this.extendDistance);
        return -1;
    }
    final Function<SAMRecord, Integer> mateEnd = REC -> SAMUtils.getMateCigar(REC) != null ? SAMUtils.getMateAlignmentEnd(REC) : REC.getMateAlignmentStart();
    PrintWriter pw = null;
    SAMFileWriter sfw = null;
    try {
        pw = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        final SamReaderFactory srf = SamReaderFactory.makeDefault().referenceSequence(this.faidx).validationStringency(ValidationStringency.LENIENT);
        final List<String> filenames = IOUtils.unrollStrings2018(args);
        if (this.bamOut != null && !filenames.isEmpty()) {
            SAMSequenceDictionary theDict = null;
            final Set<String> samples = new TreeSet<>();
            for (final String filename : filenames) {
                final SamInputResource sri = SamInputResource.of(filename);
                try (final SamReader samReader = srf.open(sri)) {
                    final SAMFileHeader header = samReader.getFileHeader();
                    final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                    if (theDict == null) {
                        theDict = dict;
                    } else if (!SequenceUtil.areSequenceDictionariesEqual(theDict, dict)) {
                        throw new JvarkitException.DictionariesAreNotTheSame(theDict, dict);
                    }
                    final String sampleName = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(filename);
                    samples.add(sampleName);
                }
            }
            final SAMFileHeader header = new SAMFileHeader(theDict);
            header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
            JVarkitVersion.getInstance().addMetaData(this, header);
            for (final String sn : samples) {
                final SAMReadGroupRecord srg = new SAMReadGroupRecord(sn);
                srg.setSample(sn);
                header.addReadGroup(srg);
            }
            final SAMFileWriterFactory swf = new SAMFileWriterFactory();
            sfw = swf.makeSAMOrBAMWriter(header, true, this.bamOut);
        }
        for (final String filename : IOUtils.unrollStrings2018(args)) {
            final SamInputResource sri = SamInputResource.of(filename);
            try (final SamReader samReader = srf.open(sri)) {
                final SAMFileHeader header = samReader.getFileHeader();
                final SAMSequenceDictionary dict = header.getSequenceDictionary();
                final String sampleName = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(filename);
                final Function<String, Optional<SimpleInterval>> parser = IntervalParserFactory.newInstance().dictionary(dict).make();
                final SimpleInterval interval = parser.apply(this.regionStr).orElseThrow(IntervalParserFactory.exception(this.regionStr));
                final SAMSequenceRecord ssr = Objects.requireNonNull(dict.getSequence(interval.getContig()));
                final int tid = ssr.getSequenceIndex();
                final QueryInterval qi1 = new QueryInterval(tid, Math.max(0, interval.getStart() - this.extendDistance), Math.min(interval.getStart() + this.extendDistance, ssr.getSequenceLength()));
                final QueryInterval qi2 = new QueryInterval(tid, Math.max(0, interval.getEnd() - this.extendDistance), Math.min(interval.getEnd() + this.extendDistance, ssr.getSequenceLength()));
                if (CoordMath.overlaps(qi1.start, qi1.end, qi2.start, qi2.end)) {
                    LOG.error("after extending the boundaries with " + this.extendDistance + ". They both overlap...");
                    return -1;
                }
                long count_disc = 0L;
                long count_split = 0L;
                long count_del = 0L;
                final QueryInterval[] intervals = QueryInterval.optimizeIntervals(new QueryInterval[] { qi1, qi2 });
                try (final CloseableIterator<SAMRecord> iter = samReader.query(intervals, false)) {
                    while (iter.hasNext()) {
                        final SAMRecord rec = iter.next();
                        if (rec.getReadUnmappedFlag())
                            continue;
                        if (this.samRecordFilter.filterOut(rec))
                            continue;
                        if (rec.getStart() <= qi1.end && rec.getEnd() >= qi2.start) {
                            count_del++;
                            if (sfw != null) {
                                rec.setAttribute("RG", sampleName);
                                sfw.addAlignment(rec);
                            }
                            continue;
                        }
                        if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && rec.getMateReferenceIndex().equals(tid) && ((CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi1.start, qi1.end) && CoordMath.overlaps(rec.getMateAlignmentStart(), mateEnd.apply(rec), qi2.start, qi2.end)) || (CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi2.start, qi2.end) && CoordMath.overlaps(rec.getMateAlignmentStart(), mateEnd.apply(rec), qi1.start, qi1.end)))) {
                            count_disc++;
                            if (sfw != null) {
                                rec.setAttribute("RG", sampleName);
                                sfw.addAlignment(rec);
                            }
                            continue;
                        }
                        for (final SAMRecord rec2 : SAMUtils.getOtherCanonicalAlignments(rec)) {
                            if (rec2.getReferenceIndex().equals(tid) && ((CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi1.start, qi1.end) && CoordMath.overlaps(rec2.getStart(), rec2.getEnd(), qi2.start, qi2.end)) || (CoordMath.overlaps(rec.getStart(), rec.getEnd(), qi2.start, qi2.end) && CoordMath.overlaps(rec2.getStart(), rec2.getEnd(), qi1.start, qi1.end)))) {
                                count_split++;
                                if (sfw != null) {
                                    rec.setAttribute("RG", sampleName);
                                    sfw.addAlignment(rec);
                                }
                                break;
                            }
                        }
                    }
                }
                pw.println(filename + "\t" + sampleName + "\t" + this.regionStr + "\t" + count_disc + "\t" + count_split + "\t" + count_del + "\t" + (count_del + count_disc + count_split));
            }
        }
        pw.flush();
        if (sfw != null)
            sfw.close();
        sfw = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) Function(java.util.function.Function) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) Objects(java.util.Objects) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) QueryInterval(htsjdk.samtools.QueryInterval) CoordMath(htsjdk.samtools.util.CoordMath) SamRecordFilterFactory(com.github.lindenb.jvarkit.util.bio.samfilter.SamRecordFilterFactory) Optional(java.util.Optional) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamInputResource(htsjdk.samtools.SamInputResource) SamReader(htsjdk.samtools.SamReader) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Optional(java.util.Optional) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 93 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class SamInversions method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.max_distance < 100) {
        LOG.error("max size inversion must be >=100");
        return -1;
    }
    VariantContextWriter vcw = null;
    try {
        final Allele REF = Allele.create("N", true);
        final Allele INV = Allele.create("<INV>", false);
        final String input = super.oneFileOrNull(args);
        final Path inputPath = input == null ? null : Paths.get(input);
        try (SamReader samReader = super.createSamReaderFactory().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.referenceFaidx).open(input == null ? SamInputResource.of(stdin()) : SamInputResource.of(inputPath))) {
            final SAMFileHeader samHeader = samReader.getFileHeader();
            final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(samHeader);
            final String sampleName = samHeader.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(inputPath == null ? "SAMPLE" : IOUtils.getFilenameWithoutCommonSuffixes(inputPath));
            final ToIntFunction<SAMRecord> mateEnd = REC -> SAMUtils.getMateCigar(REC) != null ? SAMUtils.getMateAlignmentEnd(REC) : REC.getMateAlignmentStart();
            final ToIntFunction<SAMRecord> recordLen = REC -> {
                final int start = Math.min(REC.getStart(), REC.getMateAlignmentStart());
                final int end = Math.max(REC.getEnd(), mateEnd.applyAsInt(REC));
                return CoordMath.getLength(start, end);
            };
            final QueryInterval[] queryIntervals = this.intervallistProvider == null ? null : this.intervallistProvider.dictionary(dict).optimizedQueryIntervals();
            final Set<VCFHeaderLine> meta = new HashSet<>();
            VCFStandardHeaderLines.addStandardFormatLines(meta, true, VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY);
            VCFStandardHeaderLines.addStandardInfoLines(meta, true, VCFConstants.DEPTH_KEY, VCFConstants.END_KEY);
            meta.add(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length"));
            meta.add(new VCFInfoHeaderLine("SVTYPE", 1, VCFHeaderLineType.String, "Structural variant type"));
            final VCFHeader header = new VCFHeader(meta, Collections.singleton(sampleName));
            JVarkitVersion.getInstance().addMetaData(this, header);
            header.setSequenceDictionary(dict);
            vcw = this.writingVariants.dictionary(dict).open(this.outputFile);
            vcw.writeHeader(header);
            final Predicate<SAMRecord> acceptRecord = rec -> {
                if (!SAMRecordDefaultFilter.accept(rec, this.mapq))
                    return false;
                if (!rec.getReadPairedFlag())
                    return false;
                if (rec.getMateUnmappedFlag())
                    return false;
                if (!rec.getReferenceIndex().equals(rec.getMateReferenceIndex()))
                    return false;
                if (rec.getReadNegativeStrandFlag() != rec.getMateNegativeStrandFlag())
                    return false;
                final int len = recordLen.applyAsInt(rec);
                if (len > max_distance)
                    return false;
                return true;
            };
            final List<Allele> alleles = Arrays.asList(REF, INV);
            final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
            try (final CloseableIterator<SAMRecord> iter0 = (queryIntervals == null ? samReader.iterator() : samReader.query(queryIntervals, false))) {
                final PeekIterator<SAMRecord> iter = new PeekIterator<>(iter0);
                while (iter.hasNext()) {
                    final SAMRecord rec = progress.apply(iter.next());
                    if (!acceptRecord.test(rec))
                        continue;
                    final int invStart = rec.getAlignmentStart();
                    int invEnd = Math.max(rec.getEnd(), mateEnd.applyAsInt(rec));
                    if (CoordMath.getLength(invStart, invEnd) > this.max_distance)
                        continue;
                    int dp = 1;
                    while (iter.hasNext()) {
                        final SAMRecord rec2 = iter.peek();
                        if (!acceptRecord.test(rec2)) {
                            iter.next();
                            continue;
                        }
                        if (rec2.getAlignmentStart() > invEnd || !rec2.contigsMatch(rec) || CoordMath.getLength(invStart, rec2.getAlignmentStart()) > this.max_distance) {
                            break;
                        }
                        // consumme
                        iter.next();
                        invEnd = Math.max(invEnd, Math.max(rec2.getEnd(), mateEnd.applyAsInt(rec2)));
                        dp++;
                    }
                    if (dp >= this.min_supporting_reads) {
                        final VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(rec.getContig());
                        vcb.start(invStart);
                        vcb.stop(invEnd);
                        vcb.alleles(alleles);
                        vcb.attribute(VCFConstants.END_KEY, invEnd);
                        final GenotypeBuilder gb = new GenotypeBuilder(sampleName, alleles);
                        gb.DP(dp);
                        vcb.genotypes(Collections.singletonList(gb.make()));
                        vcb.alleles(alleles);
                        vcb.attribute(VCFConstants.DEPTH_KEY, dp);
                        vcb.attribute(VCFConstants.SVTYPE, "INV");
                        vcb.attribute("SVLEN", CoordMath.getLength(invStart, invEnd));
                        vcw.add(vcb.make());
                    }
                }
            }
            progress.close();
            vcw.close();
        }
        return 0;
    } catch (final Throwable e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(vcw);
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) ParametersDelegate(com.beust.jcommander.ParametersDelegate) HashSet(java.util.HashSet) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ToIntFunction(java.util.function.ToIntFunction) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) Paths(java.nio.file.Paths) QueryInterval(htsjdk.samtools.QueryInterval) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) PeekIterator(htsjdk.samtools.util.PeekIterator) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) Path(java.nio.file.Path) PeekIterator(htsjdk.samtools.util.PeekIterator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 94 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class SamTranslocations method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.fuzzy_distance <= 0) {
        LOG.error("fuzzy_distance <=0 (" + fuzzy_distance + ")");
        return -1;
    }
    final List<SamReader> samReaders = new ArrayList<>();
    final List<CloseableIterator<SAMRecord>> samRecordIterators = new ArrayList<>();
    VariantContextWriter out = null;
    try {
        final List<Path> inputBams = IOUtils.unrollPaths(args);
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (inputBams.isEmpty()) {
            LOG.error("No bam was defined");
            return -1;
        }
        samReaders.addAll(inputBams.stream().map(P -> srf.open(P)).collect(Collectors.toList()));
        final SAMFileHeader header = new SamFileHeaderMerger(SortOrder.coordinate, samReaders.stream().map(SR -> SR.getFileHeader()).collect(Collectors.toList()), false).getMergedHeader();
        samRecordIterators.addAll(samReaders.stream().map(S -> makeIterator(S)).collect(Collectors.toList()));
        final CloseableIterator<SAMRecord> iter = new MergingIterator<>(this.coordinateComparator, samRecordIterators);
        final Set<String> sampleNames = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet());
        final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(header);
        final boolean[] allowedContigs = new boolean[refDict.size()];
        Arrays.fill(allowedContigs, true);
        if (!StringUtils.isBlank(this.chromRegex)) {
            final Pattern pat = Pattern.compile(this.chromRegex);
            refDict.getSequences().stream().filter(SR -> !pat.matcher(SR.getSequenceName()).matches()).forEach(SR -> allowedContigs[SR.getSequenceIndex()] = false);
        }
        final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(refDict).logger(LOG).build();
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY, true));
        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_QUALITY_KEY, true));
        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY, true));
        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY, true));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
        metaData.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Variation type"));
        final VCFFormatHeaderLine supportingReadsFormat = new VCFFormatHeaderLine("SR", 4, VCFHeaderLineType.Integer, "Supporting reads: contig1-forward,contig1-reverse,contig2-forward,contig2-reverse");
        metaData.add(supportingReadsFormat);
        final VCFInfoHeaderLine stdDevContig1Info = new VCFInfoHeaderLine("STDDEV_POS1", 1, VCFHeaderLineType.Integer, "std deviation to position 1");
        metaData.add(stdDevContig1Info);
        final VCFInfoHeaderLine stdDevContig2Info = new VCFInfoHeaderLine("STDDEV_POS2", 1, VCFHeaderLineType.Integer, "std deviation to position 2");
        metaData.add(stdDevContig2Info);
        final VCFInfoHeaderLine chrom2Info = new VCFInfoHeaderLine("CHROM2", 1, VCFHeaderLineType.String, "other chromosome");
        metaData.add(chrom2Info);
        final VCFInfoHeaderLine pos2Info = new VCFInfoHeaderLine("POS2", 1, VCFHeaderLineType.Integer, "other position");
        metaData.add(pos2Info);
        final VCFInfoHeaderLine highQualSampleInfo = new VCFInfoHeaderLine("highQualSamples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "High Quality Samples");
        metaData.add(highQualSampleInfo);
        final VCFInfoHeaderLine lowQualSampleInfo = new VCFInfoHeaderLine("lowQualSamples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Low Quality Samples");
        metaData.add(lowQualSampleInfo);
        final VCFInfoHeaderLine maxEventInfo = new VCFInfoHeaderLine("MAX_DP", 1, VCFHeaderLineType.Integer, "Max number of event per sample");
        metaData.add(maxEventInfo);
        final VCFHeader vcfHeader = new VCFHeader(metaData, sampleNames);
        vcfHeader.setSequenceDictionary(refDict);
        JVarkitVersion.getInstance().addMetaData(this, vcfHeader);
        out = VCFUtils.createVariantContextWriterToPath(this.outputFile);
        out.writeHeader(vcfHeader);
        final Function<SAMRecord, Integer> mateEnd = REC -> SAMUtils.getMateCigar(REC) != null ? SAMUtils.getMateAlignmentEnd(REC) : REC.getMateAlignmentStart();
        final Allele REF = Allele.create("N", true);
        final Allele ALT = Allele.create("<TRANSLOC>", false);
        /**
         * buffer or reads
         */
        final LinkedList<SAMRecord> buffer = new LinkedList<>();
        int prev_tid = -1;
        for (; ; ) {
            final SAMRecord rec;
            if (!buffer.isEmpty()) {
                rec = buffer.pollFirst();
            } else if (iter.hasNext()) {
                rec = progress.apply(iter.next());
            } else {
                rec = null;
            }
            if (rec == null || (rec.getReferenceIndex() != prev_tid)) {
                final int final_prev_tid = prev_tid;
                buffer.removeIf(SR -> SR.getReferenceIndex() <= final_prev_tid);
                if (rec == null)
                    break;
                prev_tid = rec.getReferenceIndex();
            }
            buffer.removeIf(SR -> SR.getUnclippedEnd() < rec.getUnclippedStart());
            if (rec.getReadNegativeStrandFlag())
                continue;
            final List<SAMRecord> candidates = new ArrayList<>();
            candidates.add(rec);
            if (rec.getReadPairedFlag() && !rec.getReadNegativeStrandFlag() && !rec.getMateUnmappedFlag() && !rec.getReferenceIndex().equals(rec.getMateReferenceIndex())) {
                long end = rec.getUnclippedEnd() + this.fuzzy_distance;
                int buffer_index = 0;
                while (buffer_index < buffer.size()) {
                    final SAMRecord rec2 = buffer.get(buffer_index);
                    if (!rec2.getReferenceIndex().equals(rec.getReferenceIndex())) {
                        break;
                    }
                    if (rec2.getAlignmentStart() > end) {
                        // not unclipped to avoid side effect
                        break;
                    }
                    if (rec2.getMateUnmappedFlag()) {
                        buffer_index++;
                        continue;
                    }
                    if (!rec2.getMateReferenceIndex().equals(rec.getMateReferenceIndex())) {
                        buffer_index++;
                        continue;
                    }
                    final int mate1 = rec.getMateNegativeStrandFlag() ? rec.getMateAlignmentStart() : mateEnd.apply(rec);
                    final int mate2 = rec2.getMateNegativeStrandFlag() ? rec2.getMateAlignmentStart() : mateEnd.apply(rec2);
                    if (Math.abs(mate1 - mate2) > this.fuzzy_distance) {
                        buffer_index++;
                        continue;
                    }
                    buffer.remove(buffer_index);
                    candidates.add(rec2);
                }
                while (iter.hasNext()) {
                    final SAMRecord rec2 = iter.next();
                    if (rec2 == null)
                        break;
                    if (!rec2.getReferenceIndex().equals(rec.getReferenceIndex())) {
                        buffer.add(rec2);
                        break;
                    }
                    if (rec2.getAlignmentStart() > end) {
                        // not unclipped to avoid side effect
                        buffer.add(rec2);
                        break;
                    }
                    if (rec2.getMateUnmappedFlag()) {
                        buffer.add(rec2);
                        continue;
                    }
                    if (!rec2.getMateReferenceIndex().equals(rec.getMateReferenceIndex())) {
                        buffer.add(rec2);
                        continue;
                    }
                    final int mate1 = rec.getMateNegativeStrandFlag() ? rec.getMateAlignmentStart() : mateEnd.apply(rec);
                    final int mate2 = rec2.getMateNegativeStrandFlag() ? rec2.getMateAlignmentStart() : mateEnd.apply(rec2);
                    if (Math.abs(mate1 - mate2) > this.fuzzy_distance) {
                        buffer.add(rec2);
                        continue;
                    }
                    candidates.add(rec2);
                }
            } else {
                continue;
            }
            if ((long) buffer.size() > this.max_complexity_buffer * samReaders.size()) {
                // final long end = rec.getUnclippedEnd();
                // buffer.remove(0);
                // buffer.removeIf(R->R.getReferenceIndex().equals(rec.getReferenceIndex()) && R.getStart()<=end);
                buffer.clear();
                LOG.warn("zone of low complexity: clearing buffer near " + rec.getContig() + ":" + rec.getStart());
                continue;
            }
            if (candidates.isEmpty() || candidates.size() < this.min_number_of_events)
                continue;
            // check in both sides
            final int count_plus = (int) candidates.stream().filter(SR -> !SR.getReadNegativeStrandFlag()).count();
            final int count_minus = (int) candidates.stream().filter(SR -> SR.getReadNegativeStrandFlag()).count();
            if (count_plus < this.min_number_of_events || count_minus < this.min_number_of_events) {
                putbackInBuffer(candidates, buffer);
                continue;
            }
            final int pos_ctg1 = (int) candidates.stream().mapToInt(SR -> SR.getReadNegativeStrandFlag() ? SR.getAlignmentStart() : SR.getAlignmentEnd()).average().orElse(-1);
            if (pos_ctg1 < 1) {
                putbackInBuffer(candidates, buffer);
                continue;
            }
            final int stddev_ctg1 = (int) candidates.stream().mapToInt(SR -> SR.getReadNegativeStrandFlag() ? SR.getAlignmentStart() : SR.getAlignmentEnd()).map(X -> Math.abs(X - pos_ctg1)).average().orElse(0.0);
            final int pos_ctg2 = (int) candidates.stream().mapToInt(SR -> SR.getMateNegativeStrandFlag() ? SR.getMateAlignmentStart() : mateEnd.apply(SR)).average().orElse(-1);
            if (pos_ctg2 < 1) {
                putbackInBuffer(candidates, buffer);
                continue;
            }
            final int stddev_ctg2 = (int) candidates.stream().mapToInt(SR -> SR.getMateNegativeStrandFlag() ? SR.getMateAlignmentStart() : mateEnd.apply(SR)).map(X -> Math.abs(X - pos_ctg2)).average().orElse(0.0);
            final List<String> lowQualSamples = new ArrayList<>();
            final List<String> hiQualSamples = new ArrayList<>();
            final VariantContextBuilder vcb = new VariantContextBuilder(null, rec.getContig(), pos_ctg1, pos_ctg1, Arrays.asList(REF, ALT));
            final List<Genotype> genotypes = new ArrayList<>(sampleNames.size());
            int max_dp = 0;
            for (final String sample : sampleNames) {
                final List<SAMRecord> readSample = candidates.stream().filter(SR -> sample.equals(SR.getReadGroup().getSample())).collect(Collectors.toList());
                if (readSample.isEmpty()) {
                    genotypes.add(GenotypeBuilder.createMissing(sample, 2));
                } else {
                    final GenotypeBuilder gb = new GenotypeBuilder(sample, Arrays.asList(REF, ALT));
                    final int sn_contig1_count_plus = (int) readSample.stream().filter(SR -> !SR.getReadNegativeStrandFlag()).count();
                    final int sn_contig1_count_minus = (int) readSample.stream().filter(SR -> SR.getReadNegativeStrandFlag()).count();
                    final int sn_contig2_count_plus = (int) readSample.stream().filter(SR -> !SR.getMateNegativeStrandFlag()).count();
                    final int sn_contig2_count_minus = (int) readSample.stream().filter(SR -> SR.getMateNegativeStrandFlag()).count();
                    max_dp = Math.max(max_dp, readSample.size());
                    gb.DP(readSample.size());
                    gb.attribute(supportingReadsFormat.getID(), new int[] { sn_contig1_count_plus, sn_contig1_count_minus, sn_contig2_count_plus, sn_contig2_count_minus });
                    genotypes.add(gb.make());
                    if (sn_contig1_count_plus > this.min_number_of_events && sn_contig1_count_minus > this.min_number_of_events && sn_contig2_count_plus > this.min_number_of_events && sn_contig2_count_minus > this.min_number_of_events) {
                        hiQualSamples.add(sample);
                    } else {
                        lowQualSamples.add(sample);
                    }
                }
            }
            if (hiQualSamples.isEmpty()) {
                putbackInBuffer(candidates, buffer);
                continue;
            }
            vcb.id(rec.getReferenceName() + ":" + pos_ctg1 + ":" + rec.getMateReferenceName() + ":" + pos_ctg2);
            vcb.attribute(stdDevContig1Info.getID(), stddev_ctg1);
            vcb.attribute(stdDevContig2Info.getID(), stddev_ctg2);
            vcb.attribute(chrom2Info.getID(), rec.getMateReferenceName());
            vcb.attribute(pos2Info.getID(), pos_ctg2);
            vcb.attribute(maxEventInfo.getID(), max_dp);
            vcb.attribute(VCFConstants.SVTYPE, StructuralVariantType.BND.name());
            vcb.attribute(VCFConstants.DEPTH_KEY, candidates.size());
            vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, (hiQualSamples.size() + lowQualSamples.size()));
            vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, sampleNames.size() * 2);
            vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, (hiQualSamples.size() + lowQualSamples.size()) / (sampleNames.size() * 2.0));
            if (!lowQualSamples.isEmpty())
                vcb.attribute(lowQualSampleInfo.getID(), lowQualSamples);
            if (!hiQualSamples.isEmpty())
                vcb.attribute(highQualSampleInfo.getID(), hiQualSamples);
            vcb.genotypes(genotypes);
            vcb.log10PError(candidates.size() / -10.0);
            vcb.alleles(Arrays.asList(REF, ALT));
            out.add(vcb.make());
        }
        progress.close();
        iter.close();
        samRecordIterators.forEach(S -> S.close());
        samRecordIterators.clear();
        samReaders.forEach(CloserUtil::close);
        samReaders.clear();
        out.close();
        out = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        samRecordIterators.forEach(S -> S.close());
        samReaders.forEach(S -> CloserUtil.close(S));
        CloserUtil.close(out);
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Pattern(java.util.regex.Pattern) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) MergingIterator(htsjdk.samtools.util.MergingIterator) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SAMTag(htsjdk.samtools.SAMTag) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) LinkedList(java.util.LinkedList) VCFConstants(htsjdk.variant.vcf.VCFConstants) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) FilterIterator(com.github.lindenb.jvarkit.util.iterator.FilterIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) QueryInterval(htsjdk.samtools.QueryInterval) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) BufferedReader(java.io.BufferedReader) Comparator(java.util.Comparator) Collections(java.util.Collections) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) CloserUtil(htsjdk.samtools.util.CloserUtil) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Path(java.nio.file.Path) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) Pattern(java.util.regex.Pattern) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) LinkedList(java.util.LinkedList) MergingIterator(htsjdk.samtools.util.MergingIterator) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 95 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class ScanStructuralVariants method doWork.

@Override
public int doWork(final List<String> args) {
    final List<VCFReader> casesFiles = new ArrayList<>();
    if (this.svComparator.getBndDistance() < 0) {
        LOG.error("bad max_distance :" + this.svComparator.getBndDistance());
        return -1;
    }
    VariantContextWriter out = null;
    try {
        final List<Path> casesPaths = (IOUtils.unrollPaths(args));
        if (casesPaths.isEmpty()) {
            LOG.error("cases list is empty");
            return -1;
        }
        if (!print_all_ctx && casesPaths.size() == 1) {
            LOG.warning("One case: switching to --all");
            print_all_ctx = true;
        }
        if (this.controlsPath.size() == 1 && this.controlsPath.get(0).toString().endsWith(".list")) {
            this.controlsPath = Files.lines(this.controlsPath.get(0)).filter(L -> !(L.startsWith("#") || StringUtils.isBlank(L))).map(L -> Paths.get(L)).collect(Collectors.toList());
        }
        SAMSequenceDictionary dict = null;
        final Set<VCFHeaderLine> metadata = new HashSet<>();
        for (int side = 0; side < 2; side++) {
            for (final Path input : (side == 0 ? casesPaths : this.controlsPath)) {
                final VCFReader vcfInput = VCFReaderFactory.makeDefault().open(input);
                final VCFHeader header = vcfInput.getHeader();
                if (side == 0) {
                    casesFiles.add(vcfInput);
                } else {
                    vcfInput.close();
                }
                final SAMSequenceDictionary dict2 = SequenceDictionaryUtils.extractRequired(header);
                if (dict == null) {
                    dict = dict2;
                } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
                    LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(dict2, dict));
                    return -1;
                }
            }
        }
        final IntervalTreeMap<Boolean> intervalTreeMap;
        if (intervalListProvider != null) {
            intervalTreeMap = new IntervalTreeMap<>();
            intervalListProvider.dictionary(dict).stream().forEach(R -> intervalTreeMap.put(new Interval(R), true));
        } else {
            intervalTreeMap = null;
        }
        casesFiles.stream().flatMap(F -> F.getHeader().getMetaDataInInputOrder().stream()).forEach(H -> metadata.add(H));
        VCFStandardHeaderLines.addStandardFormatLines(metadata, true, VCFConstants.GENOTYPE_KEY);
        VCFStandardHeaderLines.addStandardInfoLines(metadata, true, VCFConstants.END_KEY);
        metadata.add(new VCFInfoHeaderLine("SAMPLES", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples carrying the SV"));
        metadata.add(new VCFInfoHeaderLine("NSAMPLES", 1, VCFHeaderLineType.Integer, "Number of Samples carrying the SV"));
        metadata.add(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length"));
        metadata.add(new VCFInfoHeaderLine("CIPOS", 2, VCFHeaderLineType.Integer, "Confidence interval around POS for imprecise variants"));
        metadata.add(new VCFInfoHeaderLine("CIEND", 2, VCFHeaderLineType.Integer, "Confidence interval around END for imprecise variants"));
        metadata.add(new VCFInfoHeaderLine("IMPRECISE", 0, VCFHeaderLineType.Flag, "Imprecise structural variation"));
        metadata.add(new VCFInfoHeaderLine(ATT_FILENAME, 1, VCFHeaderLineType.String, "Source of variant"));
        metadata.add(new VCFInfoHeaderLine(ATT_CLUSTER, 1, VCFHeaderLineType.String, "Variant cluster"));
        /*metadata.add(new VCFFormatHeaderLine(
					"OV",1,
					VCFHeaderLineType.Integer,
					"Number calls (with different sample) overlapping this genotype"
					));*/
        metadata.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "SV type"));
        metadata.add(new VCFFilterHeaderLine(ATT_CONTROL, "Variant is found in controls (max MAF=" + this.max_maf + ")"));
        final VCFHeader header = new VCFHeader(metadata);
        header.setSequenceDictionary(dict);
        JVarkitVersion.getInstance().addMetaData(this, header);
        final List<ShadowedVcfReader> controlShadowReaders = new ArrayList<>(this.controlsPath.size());
        for (int i = 0; i < this.controlsPath.size(); i++) {
            boolean large_flag = this.max_control_large_flag < 0 || i >= this.max_control_large_flag;
            controlShadowReaders.add(new ShadowedVcfReader(this.controlsPath.get(i), large_flag));
        }
        out = super.openVariantContextWriter(this.outputFile);
        out.writeHeader(header);
        final CloseableIterator<VariantContext> iter = casesFiles.get(0).iterator();
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
        final Decoy decoy = Decoy.getDefaultInstance();
        VariantContext prevCtx = null;
        while (iter.hasNext()) {
            final VariantContext ctx = progress.apply(iter.next());
            if (decoy.isDecoy(ctx.getContig()))
                continue;
            if (Breakend.parse(ctx).stream().anyMatch(B -> decoy.isDecoy(B.getContig())))
                continue;
            if (intervalTreeMap != null && !intervalTreeMap.containsOverlapping(ctx))
                continue;
            // in manta, I see the same variant multiple times in the same vcf
            if (prevCtx != null && ctx.getContig().equals(prevCtx.getContig()) && ctx.getStart() == prevCtx.getStart() && ctx.getEnd() == prevCtx.getEnd())
                continue;
            prevCtx = ctx;
            final List<VariantContext> candidate = new ArrayList<>(casesFiles.size());
            candidate.add(ctx);
            recursive(ctx, candidate, casesFiles, controlShadowReaders, out);
        }
        iter.close();
        progress.close();
        out.close();
        out = null;
        casesFiles.stream().forEach(F -> {
            try {
                F.close();
            } catch (Exception err) {
            }
        });
        controlShadowReaders.stream().forEach(F -> F.realClose());
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) htsjdk.samtools.util(htsjdk.samtools.util) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Decoy(com.github.lindenb.jvarkit.samtools.Decoy) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Breakend(com.github.lindenb.jvarkit.variant.variantcontext.Breakend) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) StructuralVariantComparator(com.github.lindenb.jvarkit.variant.sv.StructuralVariantComparator) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) Paths(java.nio.file.Paths) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) Path(java.nio.file.Path) Decoy(com.github.lindenb.jvarkit.samtools.Decoy) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException)

Aggregations

CloseableIterator (htsjdk.samtools.util.CloseableIterator)103 List (java.util.List)86 Logger (com.github.lindenb.jvarkit.util.log.Logger)85 Parameter (com.beust.jcommander.Parameter)82 Program (com.github.lindenb.jvarkit.util.jcommander.Program)78 ArrayList (java.util.ArrayList)73 Collectors (java.util.stream.Collectors)71 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)69 Path (java.nio.file.Path)69 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)66 CloserUtil (htsjdk.samtools.util.CloserUtil)64 Set (java.util.Set)64 VCFHeader (htsjdk.variant.vcf.VCFHeader)59 VariantContext (htsjdk.variant.variantcontext.VariantContext)54 IOException (java.io.IOException)53 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)51 SAMRecord (htsjdk.samtools.SAMRecord)51 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)51 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)50 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)49