Search in sources :

Example 1 with CoverageFactory

use of com.github.lindenb.jvarkit.samtools.CoverageFactory in project jvarkit by lindenb.

the class BamStats05 method doWork.

protected int doWork(final PrintWriter pw, final Map<String, List<SimpleInterval>> gene2interval, final String filename, final SamReader IN) throws Exception {
    try {
        LOG.info("Scanning " + filename);
        final SAMFileHeader header = IN.getFileHeader();
        final List<SAMReadGroupRecord> rgs = header.getReadGroups();
        if (rgs == null || rgs.isEmpty())
            throw new IOException("No read groups in " + filename);
        final Set<String> groupNames = this.groupBy.getPartitions(rgs);
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
        final CoverageFactory coverageFactory = new CoverageFactory().setMappingQuality(this.mapq).setPartition(this.groupBy).setRecordFilter(R -> !filter.filterOut(R));
        for (final String partition : groupNames) {
            if (StringUtils.isBlank(partition))
                throw new IOException("Empty read group: " + groupBy.name() + " for " + filename);
            for (final String gene : gene2interval.keySet()) {
                final List<Integer> counts = new ArrayList<>();
                final List<SimpleInterval> intervals = gene2interval.get(gene);
                final String newContig = contigNameConverter.apply(intervals.get(0).getContig());
                if (StringUtil.isBlank(newContig)) {
                    throw new JvarkitException.ContigNotFoundInDictionary(intervals.get(0).getContig(), dict);
                }
                final CoverageFactory.SimpleCoverage coverage = coverageFactory.getSimpleCoverage(IN, intervals.stream().map(R -> R.renameContig(newContig)).collect(Collectors.toList()), partition);
                for (final SimpleInterval interval : intervals) {
                    for (int i = interval.getStart(); i <= interval.getEnd() && i <= coverage.getEnd(); i++) {
                        final int d = coverage.get(i - coverage.getStart());
                        counts.add(d);
                    }
                }
                Collections.sort(counts);
                pw.print(intervals.get(0).getContig() + "\t" + (coverage.getStart() - 1) + "\t" + coverage.getEnd() + "\t" + gene + "\t" + partition + "\t" + intervals.size() + "\t" + counts.size() + "\t" + counts.get(0) + "\t" + counts.get(counts.size() - 1));
                for (final int mc : this.min_coverages) {
                    final DiscreteMedian<Integer> discreteMedian = new DiscreteMedian<>();
                    int count_no_coverage = 0;
                    for (int cov : counts) {
                        if (cov <= mc)
                            ++count_no_coverage;
                        discreteMedian.add(cov);
                    }
                    final OptionalDouble average = discreteMedian.getAverage();
                    final OptionalDouble median = discreteMedian.getMedian();
                    pw.print("\t" + (average.isPresent() ? String.format("%.2f", average.orElse(0.0)) : ".") + "\t" + (median.isPresent() ? String.format("%.2f", median.orElse(-1.0)) : ".") + "\t" + count_no_coverage + "\t" + (int) (((counts.size() - count_no_coverage) / (double) counts.size()) * 100.0));
                }
                pw.println();
            }
        // end gene
        }
        // end sample
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(IN);
    }
}
Also used : CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) OptionalDouble(java.util.OptionalDouble) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)

Example 2 with CoverageFactory

use of com.github.lindenb.jvarkit.samtools.CoverageFactory in project jvarkit by lindenb.

the class VcfStrechToSvg method run.

private void run(final ArchiveFactory archive, final BedLine bed, final VCFHeader header, final VCFReader in, final PrintWriter manifest) {
    LOG.info("processing " + bed);
    final Set<String> limitSamples;
    if (StringUtils.isBlank(this.sampleStr)) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
    } else if (this.sampleStr.startsWith("^")) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
        limitSamples.removeAll(Arrays.stream(this.sampleStr.substring(1).split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet()));
    } else {
        limitSamples = Arrays.stream(this.sampleStr.split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toCollection(TreeSet::new));
    }
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    try (CloseableIterator<VariantContext> iter = in.query(bed)) {
        final List<VariantSet> L = iter.stream().filter(V -> acceptVariant(V)).map(V -> new VariantSet(V)).collect(Collectors.toCollection(ArrayList::new));
        if (L.isEmpty()) {
            LOG.warn("No valid variant found for \"" + bed + "\"");
            return;
        }
        int i = 0;
        while (i + 1 < L.size()) {
            if (L.get(i).withinDistanceOf(L.get(i + 1), this.withinDistance)) {
                L.get(i).variants.addAll(L.get(i + 1).variants);
                L.remove(i + 1);
            } else {
                i++;
            }
        }
        final int margin_left = 50;
        final int margin_right = 10;
        final double drawingAreaWidth = image_width_pixel - (margin_left + margin_right);
        final int intervalLength = L.stream().mapToInt(V -> V.getLengthOnReference()).sum();
        double x = 0;
        for (i = 0; i < L.size(); i++) {
            L.get(i).x = x;
            x += (L.get(i).getLengthOnReference() / (double) intervalLength) * drawingAreaWidth;
        }
        for (i = 0; i < L.size(); i++) {
            L.get(i).width = (i + 1 < L.size() ? L.get(i + 1).x : drawingAreaWidth) - L.get(i).x;
        }
        try {
            final DocumentBuilderFactory db = DocumentBuilderFactory.newInstance();
            final DocumentBuilder dom = db.newDocumentBuilder();
            this.document = dom.newDocument();
            final Element svgRoot = element("svg");
            this.document.appendChild(svgRoot);
            final String mainTitleStr = SequenceDictionaryUtils.getBuildName(dict).orElse("") + " " + new SimpleInterval(bed).toNiceString() + " length:" + StringUtils.niceInt(bed.getLengthOnReference());
            /* SVG title */
            {
                final Element title = element("title");
                svgRoot.appendChild(title);
                title.appendChild(text(mainTitleStr));
            }
            /* SVG style */
            {
                final String gtopacity = this.dynamicParams.getOrDefault("gt.opacity", "0.7");
                final Element style = element("style");
                svgRoot.appendChild(style);
                style.appendChild(text(".maintitle {text-anchor:middle;fill:blue} " + ".vc {stroke-width:0.5px;} " + ".transcript {stroke:black;stroke-width:1px;opacity:1;}" + ".exon {stroke:black;stroke-width:0.5px;fill:blue;opacity:1;}" + ".sample {fill:blue;font-size:7px;} " + ".samplelabel {stroke:gray;stroke-width:0.5px;font-size:" + this.dynamicParams.getOrDefault("sample.fontsize", "7") + "px;}\n" + ".coverage { fill:gray; stroke:yellow;opacity:0.2;} " + ".frame { fill:none; stroke: darkgray;} " + ".area0 {fill:white;}\n" + ".area1 {fill:floralwhite;}\n" + "circle.HOM_REF {fill:green;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HET {fill:blue;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HOM_VAR {fill:red;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "a {cursor: pointer;}\n"));
            }
            /* desc */
            {
                final Element descr = element("desc");
                svgRoot.appendChild(descr);
                descr.appendChild(text("Author: Pierre Lindenbaum\n" + JVarkitVersion.getInstance().getCompilationDate() + "\n" + JVarkitVersion.getInstance().getGitHash()));
            }
            // main title
            {
                final Element gtitle = element("text", mainTitleStr);
                gtitle.setAttribute("class", "maintitle");
                gtitle.setAttribute("x", format(this.image_width_pixel / 2.0));
                gtitle.setAttribute("y", "15");
                svgRoot.appendChild(wrapLoc(gtitle, bed));
            }
            int margin_top = 50;
            double y = margin_top;
            final double min_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r1", "1"));
            final double max_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r2", "7"));
            final Element main_g = element("g");
            svgRoot.appendChild(main_g);
            /**
             * plot genes
             */
            if (!this.all_genes.isEmpty()) {
                final double transcript_height = 5;
                final double exon_height = (transcript_height - 1);
                final double save_y = y;
                final Element g_genes = element("g");
                g_genes.setAttribute("transform", "translate(" + margin_left + ",0)");
                main_g.appendChild(g_genes);
                /* loop over each vset */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    // find transcript in this vset
                    final List<Transcript> transcripts = this.all_genes.getOverlapping(vset).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.overlaps(vset)).collect(Collectors.toList());
                    if (transcripts.isEmpty())
                        continue;
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_genes.appendChild(g_vset);
                    // y in this vset
                    double y2 = save_y;
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    /* loop over transcripts */
                    for (final Transcript tr : transcripts) {
                        final Element g_tr = element("g");
                        g_tr.setAttribute("transform", "translate(0," + format(y2) + ")");
                        g_vset.appendChild(g_tr);
                        final Element line = element("line");
                        line.setAttribute("class", "transcript");
                        line.setAttribute("x1", format(Math.max(0, base2pixel.applyAsDouble(tr.getStart()))));
                        line.setAttribute("y1", format(transcript_height / 2.0));
                        line.setAttribute("x2", format(Math.min(vset.width, base2pixel.applyAsDouble(tr.getEnd()))));
                        line.setAttribute("y2", format(transcript_height / 2.0));
                        line.appendChild(element("title", tr.getId()));
                        g_tr.appendChild(wrapLoc(line, tr));
                        /* loop over exons */
                        for (final Exon exon : tr.getExons()) {
                            if (!exon.overlaps(vset))
                                continue;
                            final Element exRect = element("rect");
                            exRect.setAttribute("class", "exon");
                            final double x_start = Math.max(0, base2pixel.applyAsDouble(exon.getStart()));
                            exRect.setAttribute("x", format(x_start));
                            exRect.setAttribute("y", format(transcript_height / 2.0 - exon_height / 2.0));
                            final double x_end = Math.min(vset.width, base2pixel.applyAsDouble(exon.getEnd()));
                            exRect.setAttribute("width", format(x_end - x_start));
                            exRect.setAttribute("height", format(exon_height));
                            exRect.appendChild(element("title", exon.getName()));
                            g_tr.appendChild(wrapLoc(exRect, exon));
                        }
                        y2 += transcript_height + 0.5;
                    }
                    y = Math.max(y, y2);
                }
                y++;
            }
            final double sample_height = Double.parseDouble(this.dynamicParams.getOrDefault("sample.height", "25"));
            final double sample_height2 = sample_height - (max_circle_radius * 2.0);
            int space_between_samples = 2;
            int got_n_samples = 0;
            for (final String sn : header.getSampleNamesInOrder()) {
                if (!limitSamples.contains(sn))
                    continue;
                boolean got_this_sample = false;
                final Element g_sample = element("g");
                g_sample.setAttribute("transform", "translate(" + margin_left + "," + format(y) + ")");
                /* get coverage */
                final int maxCoverage;
                if (this.sample2bam.containsKey(sn)) {
                    final CoverageFactory coverageFactory = new CoverageFactory();
                    try (SamReader sr = this.openSamReader(this.sample2bam.get(sn))) {
                        /* loop over each variant set */
                        for (final VariantSet vset : L) {
                            vset.coverage = coverageFactory.getSimpleCoverage(sr, vset, sn);
                        }
                    }
                    maxCoverage = L.stream().flatMapToInt(V -> V.coverage.stream()).max().orElse(0);
                } else {
                    maxCoverage = 0;
                    for (final VariantSet vset : L) {
                        vset.coverage = null;
                    }
                }
                /* loop over each variant set */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_sample.appendChild(g_vset);
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    // plot set length
                    final Element rect = element("rect");
                    rect.setAttribute("class", "area" + (i % 2));
                    rect.setAttribute("x", "0");
                    rect.setAttribute("y", "0");
                    rect.setAttribute("width", format(vset.width));
                    rect.setAttribute("height", format(sample_height));
                    if (!remove_tooltip)
                        rect.appendChild(element("title", vset.toString()));
                    g_vset.appendChild(rect);
                    // plot coverage
                    if (maxCoverage > 0 && this.sample2bam.containsKey(sn)) {
                        final double[] scaled = vset.coverage.scaleAverage((int) vset.width);
                        final StringBuilder sb = new StringBuilder();
                        sb.append("0," + sample_height);
                        for (int t = 0; t < scaled.length; t++) {
                            if (t > 1 && t + 1 < scaled.length && format(scaled[t - 1]).equals(format(scaled[t + 1])) && format(scaled[t - 1]).equals(format(scaled[t])))
                                continue;
                            sb.append(" ").append(t).append(",");
                            sb.append(format(sample_height * (1.0 - scaled[t] / maxCoverage)));
                        }
                        sb.append(" " + format(vset.width) + "," + sample_height);
                        final Element polyline = element("polyline");
                        polyline.setAttribute("class", "coverage");
                        polyline.setAttribute("points", sb.toString());
                        g_vset.appendChild(polyline);
                        vset.coverage = null;
                    }
                    // plot vertical line if colorTag defined
                    if (!StringUtils.isBlank(this.colorTag)) {
                        for (final VariantContext vc : vset.variants) {
                            if (!vc.hasAttribute(this.colorTag))
                                continue;
                            final String cssColor = vc.getAttributeAsString(this.colorTag, "");
                            if (StringUtils.isBlank(cssColor))
                                continue;
                            final double x0 = base2pixel.applyAsDouble(vc.getStart());
                            final Element line = element("line");
                            line.setAttribute("class", "vc");
                            line.setAttribute("style", "stroke:" + cssColor);
                            line.setAttribute("x1", format(x0));
                            line.setAttribute("y1", "0");
                            line.setAttribute("x2", format(x0));
                            line.setAttribute("y2", format(sample_height));
                            g_vset.appendChild(line);
                        }
                    }
                    // print all variants in this vcfset for this sample
                    for (final VariantContext vc : vset.variants) {
                        final Genotype gt = vc.getGenotype(sn);
                        if (gt.isNoCall())
                            continue;
                        if (hide_hom_ref && gt.isHomRef())
                            continue;
                        if (gt.hasGQ() && gt.getGQ() < this.minGQ)
                            continue;
                        final OptionalDouble alt_ratio = getAltRatio(gt);
                        if (!alt_ratio.isPresent())
                            continue;
                        final OptionalDouble af = getAF(vc);
                        final double circle_radius = min_circle_radius + (max_circle_radius - min_circle_radius) * (1.0 - af.orElse(1.0));
                        // HOMREF=0;  HET =0.5; HOMVAR = 1;
                        final double gtx = base2pixel.applyAsDouble(vc.getStart());
                        final double gty = sample_height - (sample_height2 * alt_ratio.getAsDouble() + (sample_height - sample_height2) / 2.0);
                        final Element circle = element("circle");
                        circle.setAttribute("class", gt.getType().name());
                        circle.setAttribute("cx", format(gtx));
                        circle.setAttribute("cy", format(gty));
                        circle.setAttribute("r", format(circle_radius));
                        if (!remove_tooltip)
                            circle.appendChild(element("title", vc.getStart() + " " + (vc.hasID() ? vc.getID() : "") + " " + vc.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining("/")) + " " + gt.getType().name() + " AF=" + format(af.orElse(-1))));
                        g_vset.appendChild(wrapLoc(circle, vc));
                        got_this_sample = true;
                    }
                }
                final Element frame_sample = element("rect");
                frame_sample.setAttribute("class", "frame");
                frame_sample.setAttribute("x", "0");
                frame_sample.setAttribute("y", "0");
                frame_sample.setAttribute("width", format(drawingAreaWidth));
                frame_sample.setAttribute("height", format(sample_height));
                g_sample.appendChild(frame_sample);
                final Element label = element("text", sn + (maxCoverage == 0 ? "" : " Max Cov. " + maxCoverage));
                label.setAttribute("class", "samplelabel");
                label.setAttribute("x", "0");
                label.setAttribute("y", "0");
                // label.setAttribute("transform", "translate("+format(-10)+","+0+") rotate(90) ");
                label.setAttribute("transform", "translate(12,12)");
                if (!remove_tooltip)
                    label.appendChild(element("title", sn));
                g_sample.appendChild(label);
                if (got_this_sample) {
                    got_n_samples++;
                    main_g.appendChild(g_sample);
                    y += sample_height + space_between_samples;
                } else {
                    LOG.warn("no valid data for sample " + sn + " in " + bed);
                }
            }
            // remove extra sample space
            y -= space_between_samples;
            svgRoot.setAttribute("width", format(this.image_width_pixel + 1));
            svgRoot.setAttribute("height", format(y + 1));
            if (got_n_samples == 0) {
                LOG.info("no sample/genotype found for " + bed);
                return;
            }
            // save
            final Transformer tr = TransformerFactory.newInstance().newTransformer();
            final String filename = bed.getContig() + "_" + bed.getStart() + "_" + bed.getEnd() + ".svg" + (this.compressed_svg ? ".gz" : "");
            LOG.info("writing " + filename);
            if (this.compressed_svg) {
                try (final OutputStream pw = archive.openOuputStream(filename)) {
                    try (GZIPOutputStream gzout = new GZIPOutputStream(pw)) {
                        tr.transform(new DOMSource(this.document), new StreamResult(gzout));
                        gzout.finish();
                        gzout.flush();
                    }
                    pw.flush();
                }
            } else {
                try (final PrintWriter pw = archive.openWriter(filename)) {
                    tr.transform(new DOMSource(this.document), new StreamResult(pw));
                    pw.flush();
                }
            }
            manifest.print(bed.getContig());
            manifest.print("\t");
            manifest.print(bed.getStart() - 1);
            manifest.print("\t");
            manifest.print(bed.getEnd());
            manifest.print("\t");
            manifest.print(filename);
            manifest.println();
        } catch (final Throwable err) {
            throw new RuntimeException(err);
        } finally {
            this.document = null;
        }
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMException(htsjdk.samtools.SAMException) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) ToDoubleFunction(java.util.function.ToDoubleFunction) VariantContext(htsjdk.variant.variantcontext.VariantContext) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFConstants(htsjdk.variant.vcf.VCFConstants) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) Element(org.w3c.dom.Element) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) GZIPOutputStream(java.util.zip.GZIPOutputStream) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StreamResult(javax.xml.transform.stream.StreamResult) Genotype(htsjdk.variant.variantcontext.Genotype) OptionalDouble(java.util.OptionalDouble) DocumentBuilder(javax.xml.parsers.DocumentBuilder)

Example 3 with CoverageFactory

use of com.github.lindenb.jvarkit.samtools.CoverageFactory in project jvarkit by lindenb.

the class CnvTView method runInterval.

private SampleInfo runInterval(final AbstractViewWriter w, final Path baminput, final Locatable interval0) throws IOException {
    final Locatable interval = extendInterval(interval0);
    try (SamReader samReader = samReaderFactory.open(baminput)) {
        final SAMFileHeader hdr = samReader.getFileHeader();
        SequenceUtil.assertSequenceDictionariesEqual(this.dictionary, hdr.getSequenceDictionary());
        JvarkitException.BamHasIndex.verify(samReader);
        final String sample = samReader.getFileHeader().getReadGroups().stream().map(V -> V.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(baminput));
        final SampleInfo si = new SampleInfo(sample);
        si.pixel_coverage = new double[this.terminalWidth - LEFT_MARGIN];
        final CoverageFactory coverageFactory = new CoverageFactory().setMappingQuality(this.mappingQuality);
        final CoverageFactory.SimpleCoverage cov = coverageFactory.getSimpleCoverage(samReader, interval, null);
        si.pixel_coverage = cov.scale(this.percentile, si.pixel_coverage.length);
        w.sampleInfos.add(si);
        return si;
    }
}
Also used : IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) AnsiColor(com.github.lindenb.jvarkit.ansi.AnsiUtils.AnsiColor) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) Function(java.util.function.Function) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) AnsiUtils(com.github.lindenb.jvarkit.ansi.AnsiUtils) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Collection(java.util.Collection) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) InputStreamReader(java.io.InputStreamReader) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) Closeable(java.io.Closeable) CoordMath(htsjdk.samtools.util.CoordMath) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) InputStream(java.io.InputStream) SamReader(htsjdk.samtools.SamReader) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Locatable(htsjdk.samtools.util.Locatable)

Example 4 with CoverageFactory

use of com.github.lindenb.jvarkit.samtools.CoverageFactory in project jvarkit by lindenb.

the class CopyNumber01 method doWork.

@Override
public int doWork(final List<String> args) {
    ReferenceSequenceFile indexedFastaSequenceFile = null;
    try {
        this.sexContigSet.addAll(Arrays.stream(this.sexContigStr.split("[, \t]")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet()));
        /* loading REF Reference */
        indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile);
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(indexedFastaSequenceFile);
        final Comparator<Locatable> locComparator = new ContigDictComparator(dict).createLocatableComparator();
        final List<Locatable> intervals = new ArrayList<>();
        if (this.bedFile == null) {
            for (final Locatable loc : dict.getSequences()) {
                intervals.add(loc);
            }
        } else {
            final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
            try (BedLineReader br = new BedLineReader(this.bedFile)) {
                br.stream().filter(L -> !StringUtil.isBlank(converter.apply(L.getContig()))).forEach(B -> {
                    final String ctg = converter.apply(B.getContig());
                    intervals.add(new SimpleInterval(ctg, B.getStart(), B.getEnd()));
                });
            }
        }
        if (intervals.isEmpty()) {
            LOG.error("no interval defined.");
            return -1;
        }
        LOG.info("intervals N=" + intervals.size() + " mean-size:" + intervals.stream().mapToInt(R -> R.getLengthOnReference()).average().orElse(0.0));
        final List<GCAndDepth> user_items = new ArrayList<>();
        // split intervals
        for (final Locatable loc : intervals) {
            int pos = loc.getStart();
            while (pos < loc.getEnd()) {
                final int pos_end = Math.min(pos + this.windowSize, loc.getEnd());
                final GCAndDepth dataRow = new GCAndDepth(loc.getContig(), pos, pos_end);
                if (dataRow.getLengthOnReference() < this.windowMin) {
                    break;
                }
                user_items.add(dataRow);
                pos += this.windowShift;
            }
        }
        // free memory
        intervals.clear();
        LOG.info("sorting N=" + user_items.size());
        Collections.sort(user_items, locComparator);
        // fill gc percent
        LOG.info("fill gc% N=" + user_items.size());
        for (final String ctg : user_items.stream().map(T -> T.getContig()).collect(Collectors.toSet())) {
            final GenomicSequence gseq = new GenomicSequence(indexedFastaSequenceFile, ctg);
            for (final GCAndDepth dataRow : user_items) {
                if (!dataRow.getContig().equals(ctg))
                    continue;
                final GCPercent gc = gseq.getGCPercent(dataRow.getStart(), dataRow.getEnd());
                if (gc.isEmpty())
                    continue;
                dataRow.gc = gc.getGCPercent();
            }
        }
        // remove strange gc
        user_items.removeIf(B -> B.gc < this.minGC);
        user_items.removeIf(B -> B.gc > this.maxGC);
        LOG.info("remove high/low gc% N=" + user_items.size());
        if (user_items.stream().allMatch(P -> isSex(P.getContig()))) {
            LOG.error("All chromosomes are defined as sexual. Cannot normalize");
            return -1;
        }
        final CoverageFactory coverageFactory = new CoverageFactory().setMappingQuality(this.mappingQuality);
        try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
            /* header */
            pw.println("#CHROM\tSTART\tEND\tSample\tIDX\tGC\tRAW-DEPTH\tNORM-DEPTH");
            for (final Path bamPath : IOUtils.unrollPaths(args)) {
                // open this samReader
                try (SamReader samReader = super.createSamReaderFactory().referenceSequence(this.refFile).open(bamPath)) {
                    if (!samReader.hasIndex()) {
                        LOG.error("file is not indexed " + bamPath);
                        return -1;
                    }
                    final SAMFileHeader header = samReader.getFileHeader();
                    SequenceUtil.assertSequenceDictionariesEqual(dict, SequenceDictionaryUtils.extractRequired(header));
                    final String sampleName = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet()).stream().collect(HtsCollectors.toSingleton());
                    final List<GCAndDepth> bam_items = new ArrayList<>(user_items.size());
                    /* loop over contigs */
                    for (final SAMSequenceRecord ssr : dict.getSequences()) {
                        /* create a **COPY** of the intervals */
                        final List<GCAndDepth> ctgitems = user_items.stream().filter(T -> T.contigsMatch(ssr)).collect(Collectors.toList());
                        if (ctgitems.isEmpty())
                            continue;
                        LOG.info("Getting coverage for " + ssr.getSequenceName() + " N=" + ctgitems.size());
                        // get coverage
                        final CoverageFactory.SimpleCoverage coverage = coverageFactory.getSimpleCoverage(samReader, ctgitems, sampleName);
                        // fill coverage
                        for (final GCAndDepth gc : ctgitems) {
                            final OptionalDouble optCov;
                            switch(this.univariateDepth) {
                                case median:
                                    optCov = coverage.getMedian(gc);
                                    break;
                                case mean:
                                    optCov = coverage.getAverage(gc);
                                    break;
                                default:
                                    throw new IllegalStateException();
                            }
                            gc.raw_depth = optCov.orElse(-1.0);
                            gc.norm_depth = gc.raw_depth;
                        }
                        ctgitems.removeIf(V -> V.raw_depth < 0);
                        ctgitems.removeIf(V -> V.raw_depth > this.weirdMaxDepth);
                        ctgitems.removeIf(V -> V.raw_depth < this.weirdMinDepth);
                        if (ctgitems.isEmpty())
                            continue;
                        bam_items.addAll(ctgitems);
                    }
                    double[] y = bam_items.stream().filter(R -> !isSex(R.getContig())).mapToDouble(R -> R.raw_depth).toArray();
                    LOG.info("median raw depth " + new Median().evaluate(y, 0, y.length));
                    Collections.sort(bam_items, (a, b) -> {
                        final int i = Double.compare(a.getX(), b.getX());
                        if (i != 0)
                            return i;
                        return Double.compare(a.getY(), b.getY());
                    });
                    double[] x = bam_items.stream().filter(R -> !isSex(R.getContig())).mapToDouble(R -> R.getX()).toArray();
                    y = bam_items.stream().filter(R -> !isSex(R.getContig())).mapToDouble(R -> R.getY()).toArray();
                    // get min GC
                    final double min_x = x[0];
                    // get max GC
                    final double max_x = x[x.length - 1];
                    LOG.info("min/max gc " + min_x + " " + max_x);
                    /* merge adjacent x (GC%) having same values */
                    int i = 0;
                    int k = 0;
                    while (i < x.length) {
                        int j = i + 1;
                        while (j < x.length && Precision.equals(x[i], x[j])) {
                            ++j;
                        }
                        x[k] = x[i];
                        y[k] = this.univariateGCLoess.create().evaluate(y, i, j - i);
                        ++k;
                        i = j;
                    }
                    LOG.info("merged n=" + (x.length - k) + " items.");
                    /* reduce size of x et y */
                    final List<XY> xyL = new ArrayList<>(k);
                    for (int t = 0; t < k; ++t) {
                        xyL.add(new XYImpl(x[t], y[t]));
                    }
                    /* sort on Y */
                    Collections.sort(xyL, (a, b) -> {
                        final int d = Double.compare(a.getX(), b.getX());
                        if (d != 0)
                            return d;
                        return Double.compare(a.getY(), b.getY());
                    });
                    x = xyL.stream().mapToDouble(R -> R.getX()).toArray();
                    y = xyL.stream().mapToDouble(R -> R.getY()).toArray();
                    final UnivariateInterpolator interpolator = createInterpolator();
                    UnivariateFunction spline = null;
                    try {
                        spline = interpolator.interpolate(x, y);
                    } catch (final org.apache.commons.math3.exception.NumberIsTooSmallException err) {
                        spline = null;
                        LOG.error("Cannot use " + interpolator.getClass().getName() + ":" + err.getMessage());
                    }
                    // min depth cal
                    int points_removed = 0;
                    i = 0;
                    while (i < bam_items.size()) {
                        final GCAndDepth r = bam_items.get(i);
                        if (spline == null) {
                            ++i;
                        } else if (r.getX() < min_x || r.getX() > max_x) {
                            bam_items.remove(i);
                            ++points_removed;
                        } else {
                            final double norm;
                            if (this.gcDepthInterpolation.equals(UnivariateInerpolation.identity)) {
                                norm = r.getY();
                            } else {
                                norm = spline.value(r.getX());
                            }
                            if (Double.isNaN(norm) || Double.isInfinite(norm)) {
                                LOG.info("NAN " + r);
                                bam_items.remove(i);
                                ++points_removed;
                                continue;
                            }
                            r.norm_depth = norm;
                            ++i;
                        }
                    }
                    LOG.info("removed " + points_removed + ". now N=" + bam_items.size());
                    if (bam_items.isEmpty())
                        continue;
                    spline = null;
                    // DO NOT NORMALIZE ON MINIMUM DEPTH, STUPID.
                    // normalize on median
                    y = bam_items.stream().mapToDouble(G -> G.getY()).toArray();
                    final double median_depth = this.univariateMid.create().evaluate(y, 0, y.length);
                    LOG.info("median norm depth : " + median_depth);
                    for (i = 0; median_depth > 0 && i < bam_items.size(); ++i) {
                        final GCAndDepth gc = bam_items.get(i);
                        gc.norm_depth /= median_depth;
                    }
                    // restore genomic order
                    Collections.sort(bam_items, locComparator);
                    // smoothing values with neighbours
                    y = bam_items.stream().mapToDouble(V -> V.getY()).toArray();
                    for (i = 0; i < bam_items.size(); ++i) {
                        final GCAndDepth gc = bam_items.get(i);
                        int left = i;
                        for (int j = Math.max(0, i - this.smooth_window); j <= i; ++j) {
                            final GCAndDepth gc2 = bam_items.get(j);
                            if (!gc2.withinDistanceOf(gc, this.smoothDistance))
                                continue;
                            left = j;
                            break;
                        }
                        int right = i;
                        for (int j = i; j <= i + this.smooth_window && j < bam_items.size(); ++j) {
                            final GCAndDepth gc2 = bam_items.get(j);
                            if (!gc2.withinDistanceOf(gc, this.smoothDistance))
                                break;
                            right = j;
                        // no break;
                        }
                        gc.norm_depth = this.univariateSmooth.create().evaluate(y, left, (right - left) + 1);
                    }
                    /* print data */
                    for (final GCAndDepth r : bam_items) {
                        pw.print(r.getContig());
                        pw.print('\t');
                        pw.print(r.getStart() - 1);
                        pw.print('\t');
                        pw.print(r.getEnd());
                        pw.print('\t');
                        pw.print(sampleName);
                        pw.print('\t');
                        pw.print(getGenomicIndex(dict, r.getContig(), r.getStart()) - 1);
                        pw.print('\t');
                        pw.printf("%.3f", r.gc);
                        pw.print('\t');
                        pw.printf("%.3f", r.raw_depth);
                        pw.print('\t');
                        pw.printf("%.3f", r.norm_depth);
                        pw.println();
                    }
                    pw.flush();
                }
            // samReader
            }
            // end loop bamPath
            pw.flush();
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(indexedFastaSequenceFile);
    }
}
Also used : Arrays(java.util.Arrays) Precision(org.apache.commons.math3.util.Precision) NevilleInterpolator(org.apache.commons.math3.analysis.interpolation.NevilleInterpolator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) UnivariateInterpolator(org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) Median(org.apache.commons.math3.stat.descriptive.rank.Median) Path(java.nio.file.Path) MathIllegalArgumentException(org.apache.commons.math3.exception.MathIllegalArgumentException) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) LinearInterpolator(org.apache.commons.math3.analysis.interpolation.LinearInterpolator) LoessInterpolator(org.apache.commons.math3.analysis.interpolation.LoessInterpolator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) SplineInterpolator(org.apache.commons.math3.analysis.interpolation.SplineInterpolator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) OptionalDouble(java.util.OptionalDouble) HtsCollectors(com.github.lindenb.jvarkit.stream.HtsCollectors) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) AbstractUnivariateStatistic(org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Locatable(htsjdk.samtools.util.Locatable) GCPercent(com.github.lindenb.jvarkit.util.picard.GenomicSequence.GCPercent) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DividedDifferenceInterpolator(org.apache.commons.math3.analysis.interpolation.DividedDifferenceInterpolator) Identity(org.apache.commons.math3.analysis.function.Identity) DimensionMismatchException(org.apache.commons.math3.exception.DimensionMismatchException) SamReader(htsjdk.samtools.SamReader) DynamicParameter(com.beust.jcommander.DynamicParameter) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Comparator(java.util.Comparator) Collections(java.util.Collections) ArrayList(java.util.ArrayList) Median(org.apache.commons.math3.stat.descriptive.rank.Median) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) SamReader(htsjdk.samtools.SamReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) UnivariateInterpolator(org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) GCPercent(com.github.lindenb.jvarkit.util.picard.GenomicSequence.GCPercent) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) OptionalDouble(java.util.OptionalDouble) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Locatable(htsjdk.samtools.util.Locatable)

Example 5 with CoverageFactory

use of com.github.lindenb.jvarkit.samtools.CoverageFactory in project jvarkit by lindenb.

the class WesCnvSvg method doWork.

@Override
public int doWork(final List<String> args) {
    XMLStreamWriter w = null;
    BufferedReader r = null;
    OutputStream fout = null;
    VCFReader vcfReader = null;
    try {
        this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
        final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
        final IntervalExtender extender = IntervalExtender.of(refDict, this.extendWhat);
        if (extender.isShriking()) {
            LOG.error("extend factor <1.0");
            return -1;
        }
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(refDict);
        final ContigDictComparator contigDictCompare = new ContigDictComparator(refDict);
        final List<CaptureInterval> userIntervals = this.intervalListProvider.stream().map(loc -> contigNameConverter.convertToSimpleInterval(loc).<RuntimeException>orElseThrow(() -> new RuntimeException(new JvarkitException.ContigNotFoundInDictionary(loc.getContig(), refDict)))).map(extender).collect(HtsCollectors.mergeIntervals()).map(L -> new CaptureInterval(L)).sorted(contigDictCompare.createLocatableComparator()).collect(Collectors.toCollection(ArrayList::new));
        if (userIntervals.isEmpty()) {
            LOG.error("no interval or bed defined");
            return -1;
        }
        this.countBasesToBeDisplayed = userIntervals.stream().mapToInt(R -> R.getLengthOnReference()).sum();
        if (this.countBasesToBeDisplayed < 1) {
            LOG.error("Nothing to display. BED count==0");
            return -1;
        } else {
            double x1 = 0;
            for (int i = 0; i < userIntervals.size(); ++i) {
                final CaptureInterval ci = userIntervals.get(i);
                ci.pixelx = x1;
                x1 += ci.getPixelWidth();
            }
        }
        /* distinct ordered contigs */
        final List<String> distinctContigs = userIntervals.stream().map(R -> R.getContig()).collect(Collectors.toSet()).stream().sorted(contigDictCompare).collect(Collectors.toList());
        final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidx);
        for (final Path bamFile : IOUtils.unrollPaths(args)) {
            final BamInput bi = new BamInput();
            bi.bamPath = bamFile;
            try (SamReader samReader = srf.open(bamFile)) {
                final SAMSequenceDictionary samDict = SequenceDictionaryUtils.extractRequired(samReader.getFileHeader());
                if (!SequenceUtil.areSequenceDictionariesEqual(refDict, samDict)) {
                    throw new JvarkitException.DictionariesAreNotTheSame(refDict, samDict);
                }
                bi.sample = samReader.getFileHeader().getReadGroups().stream().map(V -> V.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bamFile));
                final CoverageFactory covFactory = new CoverageFactory().setMappingQuality(this.minMappingQuality);
                for (final String contig : distinctContigs) {
                    final List<CaptureInterval> contig_intervals = userIntervals.stream().filter(R -> R.getContig().equals(contig)).collect(Collectors.toList());
                    final CoverageFactory.SimpleCoverage coverage = covFactory.getSimpleCoverage(samReader, contig_intervals, null);
                    for (CaptureInterval rgn : contig_intervals) {
                        final double[] array = coverage.getSubCoverage(rgn).scale(this.scaleType, (int) rgn.getPixelWidth());
                        bi.coverages.add(array);
                    }
                }
            }
            this.bamInputs.add(bi);
        }
        if (this.bamInputs.isEmpty()) {
            LOG.error("no bam input");
            return -1;
        }
        if (this.vcfFile != null) {
            vcfReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
            final SAMSequenceDictionary vcfDict = vcfReader.getHeader().getSequenceDictionary();
            if (vcfDict != null)
                SequenceUtil.assertSequenceDictionariesEqual(refDict, vcfDict);
        }
        final XMLOutputFactory xof = XMLOutputFactory.newFactory();
        if (this.outputFile == null) {
            w = xof.createXMLStreamWriter(stdout(), "UTF-8");
        } else {
            fout = Files.newOutputStream(this.outputFile);
            w = xof.createXMLStreamWriter(fout, "UTF-8");
        }
        final Function<List<Point2D.Double>, String> points2str = (L) -> L.stream().map(S -> format(S.getX()) + "," + format(S.getY())).collect(Collectors.joining(" "));
        final Consumer<List<Point2D.Double>> simplifyPoints = (L) -> {
            for (int z = 0; z + 1 < L.size(); ++z) {
                if (L.get(z).getY() == L.get(z + 1).getY()) {
                    L.get(z).x = L.get(z + 1).x;
                    L.remove(z + 1);
                }
            }
        };
        w.writeStartDocument("UTF-8", "1.0");
        final Dimension dim = new Dimension(this.drawinAreaWidth, 0);
        final int bed_header_height = 20;
        dim.height += bed_header_height;
        dim.height += (int) this.bamInputs.stream().mapToDouble(B -> B.getPixelHeight()).sum();
        LOG.debug("drawing area: " + dim);
        w.writeStartElement("svg");
        w.writeAttribute("width", String.valueOf(dim.width));
        w.writeAttribute("height", String.valueOf(dim.height));
        w.writeDefaultNamespace(SVG.NS);
        w.writeNamespace("xlink", XLINK.NS);
        // https://stackoverflow.com/questions/15717970
        w.writeStartElement("style");
        if (this.cssFile != null) {
            w.writeCharacters(IOUtil.slurp(this.cssFile));
        } else {
            w.writeCharacters("g.maing {stroke:black;stroke-width:0.5px;fill:whitesmoke;font-size:10pt;}\n" + "text.sampleLabel {stroke:none;stroke-width:0.5px;fill:blue;}" + "text.captureLabel {stroke:none;stroke-width:0.5px;fill:slategrey;text-anchor:middle;}" + "polygon.area {stroke:darkgray;stroke-width:0.5px;fill:url('#grad01');}" + "line.linedp {stroke:darkcyan;stroke-width:0.3px;opacity:0.4;}" + "text.linedp {fill-opacity:0.6;font-size:7px;stroke:none;stroke-width:0.5px;fill:darkcyan;}" + "rect.sampleFrame { fill:none;stroke:slategray;stroke-width:0.3px;}" + "rect.clickRgn {fill:none;stroke:none;pointer-events:all;}" + "polyline.gc {stroke:lightcoral;stroke-width:0.3px;fill:none;}" + "polyline.clipping {stroke:orange;stroke-width:0.8px;fill:none;}" + "circle.ar {fill:orange;stroke:none;}" + "circle.aa {fill:red;stroke:none;}" + "circle.rr {fill:green;stroke:none;}");
        }
        // style
        w.writeEndElement();
        w.writeStartElement("title");
        w.writeCharacters(this.domSvgTitle);
        w.writeEndElement();
        w.writeStartElement("defs");
        // alleles
        final double genotype_radius = Double.parseDouble(this.dynaParams.getOrDefault("gt.radius", "1.5"));
        w.writeEmptyElement("circle");
        w.writeAttribute("r", format(genotype_radius));
        w.writeAttribute("id", "rr");
        w.writeAttribute("class", "rr");
        w.writeEmptyElement("circle");
        w.writeAttribute("r", format(genotype_radius));
        w.writeAttribute("id", "ar");
        w.writeAttribute("class", "ar");
        w.writeEmptyElement("circle");
        w.writeAttribute("r", format(genotype_radius));
        w.writeAttribute("id", "aa");
        w.writeAttribute("class", "aa");
        // gradient
        w.writeStartElement("linearGradient");
        w.writeAttribute("id", "grad01");
        w.writeAttribute("x1", "50%");
        w.writeAttribute("x2", "50%");
        w.writeAttribute("y1", "0%");
        w.writeAttribute("y2", "100%");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "0%");
        w.writeAttribute("style", "stop-color:lightgray;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "100%");
        w.writeAttribute("style", "stop-color:gray;stop-opacity:1;");
        w.writeEndElement();
        // gc percent
        for (final CaptureInterval ci : userIntervals) {
            final GenomicSequence genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, ci.getContig());
            final int gc_percent_width = (int) ci.getPixelWidth();
            final List<Point2D.Double> points = new ArrayList<>(gc_percent_width);
            for (int x = 0; x < gc_percent_width; ++x) {
                int pos1 = ci.getStart() + (int) (((x + 0) / ci.getPixelWidth()) * ci.getLengthOnReference());
                int pos2 = ci.getStart() + (int) (((x + 1) / ci.getPixelWidth()) * ci.getLengthOnReference());
                double gc_percent = getGcPercent(genomicSequence, pos1, pos2);
                double y = this.sampleTrackHeight - this.sampleTrackHeight * gc_percent;
                points.add(new Point2D.Double(x, y));
            }
            simplifyPoints.accept(points);
            w.writeStartElement("polyline");
            w.writeAttribute("class", "gc");
            w.writeAttribute("id", "z" + ci.getId());
            w.writeAttribute("points", points2str.apply(points));
            w.writeStartElement("title");
            w.writeCharacters("GC %");
            w.writeEndElement();
            w.writeEndElement();
        }
        // defs
        w.writeEndElement();
        w.writeStartElement("script");
        final StringBuilder openBrowserFunction = new StringBuilder("function openGenomeBrowser(contig,chromStart,chromEnd) {\n");
        if (!this.hyperlinkType.isEmpty()) {
            openBrowserFunction.append("var url=\"" + this.hyperlinkType.getPattern() + "\".replace(/__CHROM__/g,contig).replace(/__START__/g,chromStart).replace(/__END__/g,chromEnd);\n");
            openBrowserFunction.append("window.open(url,'_blank');\n");
        } else {
        // nothing
        }
        openBrowserFunction.append("}\n");
        w.writeCData(openBrowserFunction.toString() + "function clicked(evt,contig,chromStart,chromEnd){\n" + "    var e = evt.target;\n" + "    var dim = e.getBoundingClientRect();\n" + "    var x = 1.0 * evt.clientX - dim.left;\n" + "    var cLen = 1.0* (chromEnd - chromStart); if(cLen<1) cLen=1.0;\n" + "    var pos1 = chromStart + parseInt(((x+0)/dim.width)*cLen);\n" + "    var pos2 = chromStart + parseInt(((x+1)/dim.width)*cLen);\n" + "   openGenomeBrowser(contig,pos1,pos2);\n" + "}\n");
        // script
        w.writeEndElement();
        w.writeStartElement("g");
        w.writeAttribute("class", "maing");
        int y = 0;
        w.writeStartElement("g");
        w.writeComment("interval background");
        for (final CaptureInterval ci : userIntervals) {
            w.writeStartElement("text");
            w.writeAttribute("class", "captureLabel");
            w.writeAttribute("textLength", String.valueOf(ci.getPixelWidth() * 0.8));
            w.writeAttribute("lengthAdjust", "spacing");
            w.writeAttribute("x", String.valueOf(ci.getPixelX1() + ci.getPixelWidth() / 2.0));
            w.writeAttribute("y", String.valueOf(bed_header_height - 2));
            w.writeCharacters(ci.getName());
            w.writeStartElement("title");
            w.writeCharacters(ci.toNiceString());
            // title
            w.writeEndElement();
            // text
            w.writeEndElement();
            w.writeStartElement("rect");
            w.writeAttribute("style", "stroke:black;fill:none;");
            w.writeAttribute("x", String.valueOf(ci.getPixelX1()));
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
            w.writeAttribute("height", String.valueOf(dim.height));
            w.writeStartElement("title");
            w.writeCharacters(ci.getName());
            w.writeEndElement();
            w.writeEndElement();
        }
        // interval background
        w.writeEndElement();
        y += bed_header_height;
        for (final BamInput bi : this.bamInputs) {
            w.writeComment(bi.bamPath.toString());
            w.writeStartElement("g");
            w.writeAttribute("transform", "translate(0," + y + ")");
            if (normalize_on_median_flag) {
                final double medianDepth = Math.max(1.0, Percentile.median().evaluate(bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).toArray()).orElse(1.0));
                LOG.info("median" + medianDepth);
                for (final double[] coverage_array : bi.coverages) {
                    for (int px = 0; px < coverage_array.length; px++) {
                        coverage_array[px] /= medianDepth;
                    }
                }
            }
            final double maxDepth = bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).max().orElse(1.0);
            for (int ridx = 0; ridx < userIntervals.size(); ridx++) {
                final CaptureInterval ci = userIntervals.get(ridx);
                final String clickedAttribute = "clicked(evt,\"" + ci.getContig() + "\"," + ci.getStart() + "," + ci.getEnd() + ")";
                final double[] coverage_array = bi.coverages.get(ridx);
                final double leftx = ci.getPixelX1();
                w.writeStartElement("g");
                w.writeAttribute("transform", "translate(" + leftx + ",0)");
                final int segment_width = (int) ci.getPixelWidth();
                // coverage
                {
                    final List<Point2D.Double> points = new ArrayList<>(segment_width);
                    points.add(new Point2D.Double(0, bi.getPixelHeight()));
                    for (int px = 0; px < coverage_array.length; px++) {
                        final double y_avg_cov = coverage_array[px];
                        final double new_y = bi.getPixelHeight() - (y_avg_cov / maxDepth) * bi.getPixelHeight();
                        points.add(new Point2D.Double(px, new_y));
                    }
                    points.add(new Point2D.Double(ci.getPixelWidth(), bi.getPixelHeight()));
                    simplifyPoints.accept(points);
                    // close
                    points.add(new Point2D.Double(leftx, bi.getPixelHeight()));
                    w.writeEmptyElement("polygon");
                    w.writeAttribute("class", "area");
                    w.writeAttribute("title", ci.toNiceString());
                    // w.writeAttribute("onclick", clickedAttribute);
                    w.writeAttribute("points", points2str.apply(points));
                }
                // w.writeEndElement();//g
                int depthshift = 1;
                for (; ; ) {
                    final int numdiv = (int) (maxDepth / depthshift);
                    if (numdiv <= 10)
                        break;
                    depthshift *= 10;
                }
                int depth = depthshift;
                while (depth < maxDepth) {
                    double new_y = bi.getPixelHeight() - (depth / maxDepth) * bi.getPixelHeight();
                    w.writeStartElement("text");
                    w.writeAttribute("class", "linedp");
                    w.writeAttribute("x", "1");
                    w.writeAttribute("y", String.valueOf(new_y + 1));
                    w.writeCharacters(String.valueOf(depth));
                    // text
                    w.writeEndElement();
                    w.writeStartElement("line");
                    w.writeAttribute("class", "linedp");
                    w.writeAttribute("stroke-dasharray", "4");
                    w.writeAttribute("x1", "0");
                    w.writeAttribute("x2", String.valueOf(ci.getPixelWidth()));
                    w.writeAttribute("y1", String.valueOf(new_y));
                    w.writeAttribute("y2", String.valueOf(new_y));
                    w.writeStartElement("title");
                    w.writeCharacters(String.valueOf(depth));
                    w.writeEndElement();
                    // line
                    w.writeEndElement();
                    depth += depthshift;
                }
                // polyline
                w.writeEmptyElement("use");
                w.writeAttribute("xlink", XLINK.NS, "href", "#z" + ci.getId());
                w.writeAttribute("x", "0");
                w.writeAttribute("y", "0");
                // click
                w.writeStartElement("rect");
                w.writeAttribute("class", "clickRgn");
                w.writeAttribute("onclick", clickedAttribute);
                w.writeAttribute("x", "0");
                w.writeAttribute("y", "0");
                w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
                w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
                w.writeEndElement();
                // genotype
                if (vcfReader != null) {
                    try (CloseableIterator<VariantContext> iter = vcfReader.query(ci)) {
                        while (iter.hasNext()) {
                            final VariantContext ctx = iter.next();
                            final Genotype gt = ctx.getGenotype(bi.sample);
                            if (gt == null)
                                break;
                            String allele_id = null;
                            switch(gt.getType()) {
                                case HET:
                                    allele_id = "ar";
                                    break;
                                case HOM_REF:
                                    allele_id = "rr";
                                    break;
                                case HOM_VAR:
                                    allele_id = "aa";
                                    break;
                                default:
                                    allele_id = null;
                                    break;
                            }
                            if (allele_id != null) {
                                w.writeEmptyElement("use");
                                w.writeAttribute("xlink", XLINK.NS, "href", "#" + allele_id);
                                w.writeAttribute("x", format(((ctx.getStart() - ci.getStart()) / (double) ci.getLengthOnReference()) * ci.getPixelWidth()));
                                w.writeAttribute("y", format(bi.getPixelHeight() - 2 * genotype_radius));
                            }
                        }
                    }
                }
                // g
                w.writeEndElement();
            }
            // frame for this sample
            w.writeStartElement("rect");
            w.writeAttribute("class", "sampleFrame");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(dim.width));
            w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
            // rect
            w.writeEndElement();
            w.writeStartElement("text");
            w.writeAttribute("class", "sampleLabel");
            w.writeAttribute("x", "5");
            w.writeAttribute("y", "12");
            w.writeStartElement("title");
            w.writeCharacters(bi.bamPath.toString());
            w.writeEndElement();
            w.writeCharacters(bi.sample);
            // text
            w.writeEndElement();
            // g
            w.writeEndElement();
            y += bi.getPixelHeight();
        }
        w.writeStartElement("g");
        w.writeComment("interval lines");
        for (int n = 0; n <= userIntervals.size(); n++) {
            w.writeEmptyElement("line");
            String x1 = n < userIntervals.size() ? String.valueOf(userIntervals.get(n).getPixelX1()) : String.valueOf(dim.width);
            w.writeAttribute("x1", x1);
            w.writeAttribute("y1", "0");
            w.writeAttribute("x2", x1);
            w.writeAttribute("y2", String.valueOf(dim.height));
        }
        // interval lines
        w.writeEndElement();
        // g
        w.writeEndElement();
        // svg
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        w.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcfReader);
        CloserUtil.close(w);
        CloserUtil.close(fout);
        CloserUtil.close(r);
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(this.bamInputs);
    }
}
Also used : Arrays(java.util.Arrays) XLINK(com.github.lindenb.jvarkit.util.ns.XLINK) Point2D(java.awt.geom.Point2D) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SVG(com.github.lindenb.jvarkit.util.svg.SVG) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) Dimension(java.awt.Dimension) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Genotype(htsjdk.variant.variantcontext.Genotype) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) HtsCollectors(com.github.lindenb.jvarkit.stream.HtsCollectors) HashMap(java.util.HashMap) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) SamReader(htsjdk.samtools.SamReader) File(java.io.File) Consumer(java.util.function.Consumer) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) SamReader(htsjdk.samtools.SamReader) Point2D(java.awt.geom.Point2D) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) VCFReader(htsjdk.variant.vcf.VCFReader) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) List(java.util.List) ArrayList(java.util.ArrayList) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Path(java.nio.file.Path) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) Dimension(java.awt.Dimension) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BufferedReader(java.io.BufferedReader)

Aggregations

CoverageFactory (com.github.lindenb.jvarkit.samtools.CoverageFactory)6 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)6 ArrayList (java.util.ArrayList)6 Parameter (com.beust.jcommander.Parameter)5 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)5 SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)5 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)5 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)5 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)5 Program (com.github.lindenb.jvarkit.util.jcommander.Program)5 Logger (com.github.lindenb.jvarkit.util.log.Logger)5 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 SamReader (htsjdk.samtools.SamReader)5 Locatable (htsjdk.samtools.util.Locatable)5 SequenceUtil (htsjdk.samtools.util.SequenceUtil)5 Path (java.nio.file.Path)5 List (java.util.List)5 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)4 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)4 IOException (java.io.IOException)4