Search in sources :

Example 6 with Exon

use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.

the class VcfBurdenGtf method runBurden.

@Override
protected void runBurden(PrintWriter pw, VCFReader vcfReader, VariantContextWriter vcw) throws IOException {
    final SAMSequenceDictionary vcfDict = SequenceDictionaryUtils.extractRequired(vcfReader.getHeader());
    final List<Gene> all_genes;
    try (GtfReader gtfReader = new GtfReader(this.gtfFile)) {
        gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(vcfDict));
        all_genes = gtfReader.getAllGenes().stream().filter(G -> StringUtil.isBlank(this.intergenic_contig) || this.intergenic_contig.equals("*") || this.intergenic_contig.equals(G.getContig())).sorted(new ContigDictComparator(vcfDict).createLocatableComparator()).collect(Collectors.toCollection(ArrayList::new));
    }
    pw.print("#chrom");
    pw.print("\t");
    pw.print("start0");
    pw.print("\t");
    pw.print("end");
    pw.print("\t");
    pw.print("name");
    pw.print("\t");
    pw.print("length");
    pw.print("\t");
    pw.print("gene");
    pw.print("\t");
    pw.print("type");
    pw.print("\t");
    pw.print("strand");
    pw.print("\t");
    pw.print("transcript");
    pw.print("\t");
    pw.print("gene-id");
    pw.print("\t");
    pw.print("intervals");
    pw.print("\t");
    pw.print("p-value");
    pw.print("\t");
    pw.print("affected_alt");
    pw.print("\t");
    pw.print("affected_hom");
    pw.print("\t");
    pw.print("unaffected_alt");
    pw.print("\t");
    pw.print("unaffected_hom");
    pw.print("\t");
    pw.print("variants.count");
    pw.println();
    final List<SimpleInterval> all_intergenic = new ArrayList<>();
    if (!StringUtil.isBlank(this.intergenic_contig)) {
        for (final SAMSequenceRecord ssr : vcfDict.getSequences()) {
            if (!(this.intergenic_contig.equals("*") || this.intergenic_contig.equals(ssr.getSequenceName())))
                continue;
            final BitSet filled = new BitSet(ssr.getSequenceLength() + 2);
            all_genes.stream().filter(G -> G.getContig().equals(ssr.getSequenceName())).forEach(G -> filled.set(G.getStart(), 1 + /* bit set is 0 based */
            Math.min(G.getEnd(), ssr.getSequenceLength())));
            int i = 1;
            while (i < ssr.getSequenceLength()) {
                if (filled.get(i)) {
                    i++;
                    continue;
                }
                int j = i;
                while (j < ssr.getSequenceLength() && !filled.get(j)) {
                    j++;
                }
                all_intergenic.add(new SimpleInterval(ssr.getSequenceName(), i, j));
                i = j + 1;
            }
            all_genes.removeIf(G -> G.getContig().equals(ssr.getSequenceName()));
        }
        all_genes.clear();
    }
    final ProgressFactory.Watcher<Gene> progress = ProgressFactory.newInstance().logger(LOG).dictionary(vcfDict).build();
    /* run genes */
    for (final Gene gene : all_genes) {
        progress.apply(gene);
        final IntervalTree<VariantContext> intervalTree = new IntervalTree<>();
        vcfReader.query(gene).stream().filter(V -> accept(V)).forEach(V -> intervalTree.put(V.getStart(), V.getEnd(), V));
        if (intervalTree.size() == 0)
            continue;
        for (final Transcript transcript : gene.getTranscripts()) {
            final List<SubPartOfTranscript> parts = new ArrayList<>();
            parts.addAll(transcript.getExons().stream().map(R -> new SubPartOfTranscript(R)).collect(Collectors.toList()));
            parts.addAll(transcript.getIntrons().stream().map(R -> new SubPartOfTranscript(R)).collect(Collectors.toList()));
            final int intron_window_size = 1000;
            final int intron_window_shift = 500;
            for (final Intron intron : transcript.getIntrons()) {
                if (intron.getLengthOnReference() <= intron_window_size)
                    continue;
                int start_pos = intron.getStart();
                while (start_pos + intron_window_size <= intron.getEnd()) {
                    int xend = Math.min(intron.getEnd(), start_pos + intron_window_size - 1);
                    int xstart = xend - intron_window_size - 1;
                    parts.add(new SubPartOfTranscript(transcript, intron.getName() + ".Sliding", Collections.singletonList(new SimpleInterval(intron.getContig(), xstart, xend))));
                    start_pos += intron_window_shift;
                }
            }
            for (final UTR utr : transcript.getUTRs()) {
                parts.add(new SubPartOfTranscript(transcript, utr.getName(), utr.getIntervals()));
            }
            if (transcript.getExonCount() > 1) {
                parts.add(new SubPartOfTranscript(transcript, "AllExons", transcript.getExons().stream().map(E -> E.toInterval()).collect(Collectors.toList())));
            }
            if (transcript.hasCodonStartDefined() && transcript.hasCodonStopDefined() && transcript.getAllCds().size() > 1) {
                parts.add(new SubPartOfTranscript(transcript, "AllCds", transcript.getAllCds().stream().map(E -> E.toInterval()).collect(Collectors.toList())));
            }
            final int L = transcript.getTranscriptLength();
            final int[] index2genomic = new int[L];
            int pos = 0;
            for (final Exon exon : transcript.getExons()) {
                for (int i = exon.getStart(); i <= exon.getEnd(); i++) {
                    index2genomic[pos] = i;
                    pos++;
                }
            }
            final int window_size = 200;
            final int window_shift = 100;
            int array_index = 0;
            while (array_index < index2genomic.length) {
                final List<Locatable> intervals = new ArrayList<>();
                int prev_pos = -1;
                int start_pos = index2genomic[array_index];
                int i = 0;
                while (i < window_size && array_index + i < index2genomic.length) {
                    final int curr_pos = index2genomic[array_index + i];
                    if (i > 0 && prev_pos + 1 != curr_pos) {
                        intervals.add(new SimpleInterval(transcript.getContig(), start_pos, prev_pos));
                        start_pos = curr_pos;
                    }
                    prev_pos = curr_pos;
                    i++;
                }
                intervals.add(new SimpleInterval(transcript.getContig(), start_pos, prev_pos));
                parts.add(new SubPartOfTranscript(transcript, "Sliding", intervals));
                array_index += window_shift;
            }
            for (final SubPartOfTranscript part : parts) {
                final List<VariantContext> variants = new ArrayList<>();
                for (final Locatable loc : part.intervals) {
                    Iterator<IntervalTree.Node<VariantContext>> iter = intervalTree.overlappers(loc.getStart(), loc.getEnd());
                    while (iter.hasNext()) variants.add(iter.next().getValue());
                }
                if (variants.isEmpty())
                    continue;
                final FisherResult fisher = runFisher(variants);
                if (fisher.p_value > this.fisherTreshold)
                    continue;
                if (vcw != null) {
                    for (final VariantContext ctx : variants) {
                        vcw.add(new VariantContextBuilder(ctx).attribute(BURDEN_KEY, VCFUtils.escapeInfoField(part.label)).make());
                    }
                }
                pw.print(part.getContig());
                pw.print("\t");
                pw.print(part.getStart() - 1);
                pw.print("\t");
                pw.print(part.getEnd());
                pw.print("\t");
                pw.print(part.label);
                pw.print("\t");
                pw.print(part.getLengthOnReference());
                pw.print("\t");
                pw.print(transcript.getProperties().getOrDefault("gene_name", "."));
                pw.print("\t");
                pw.print(transcript.getProperties().getOrDefault("transcript_type", "."));
                pw.print("\t");
                pw.print(gene.getStrand());
                pw.print("\t");
                pw.print(transcript.getId());
                pw.print("\t");
                pw.print(gene.getId());
                pw.print("\t");
                pw.print(part.intervals.stream().map(R -> String.valueOf(R.getStart()) + "-" + R.getEnd()).collect(Collectors.joining(";")));
                pw.print("\t");
                pw.print(fisher.p_value);
                pw.print("\t");
                pw.print(fisher.affected_alt);
                pw.print("\t");
                pw.print(fisher.affected_hom);
                pw.print("\t");
                pw.print(fisher.unaffected_alt);
                pw.print("\t");
                pw.print(fisher.unaffected_hom);
                pw.print("\t");
                pw.print(variants.size());
                pw.println();
            }
        }
    }
    progress.close();
    final ProgressFactory.Watcher<SimpleInterval> progress2 = ProgressFactory.newInstance().logger(LOG).dictionary(vcfDict).build();
    /**
     * scan intergenics ...
     */
    for (final SimpleInterval intergenic : all_intergenic) {
        progress2.apply(intergenic);
        final int intergenic_window_size = 2000;
        final int intergenic_window_shifr = 100;
        final List<SimpleInterval> parts = new ArrayList<>();
        if (intergenic.getLengthOnReference() <= intergenic_window_size)
            continue;
        int start_pos = intergenic.getStart();
        while (start_pos + intergenic_window_size <= intergenic.getEnd()) {
            int xend = Math.min(intergenic.getEnd(), start_pos + intergenic_window_size - 1);
            int xstart = xend - intergenic_window_size - 1;
            parts.add(new SimpleInterval(intergenic.getContig(), xstart, xend));
            start_pos += intergenic_window_shifr;
        }
        for (final SimpleInterval part : parts) {
            final List<VariantContext> variants = vcfReader.query(part).stream().filter(V -> accept(V)).collect(Collectors.toList());
            if (variants.isEmpty())
                continue;
            final FisherResult fisher = runFisher(variants);
            if (fisher.p_value > this.fisherTreshold)
                continue;
            final String label = "intergenic_" + part.getStart() + "_" + part.getEnd();
            if (vcw != null) {
                for (final VariantContext ctx : variants) {
                    vcw.add(new VariantContextBuilder(ctx).attribute(BURDEN_KEY, VCFUtils.escapeInfoField(label)).make());
                }
            }
            pw.print(part.getContig());
            pw.print("\t");
            pw.print(part.getStart() - 1);
            pw.print("\t");
            pw.print(part.getEnd());
            pw.print("\t");
            pw.print(label);
            pw.print("\t");
            pw.print(part.getLengthOnReference());
            pw.print("\t");
            pw.print(".");
            pw.print("\t");
            pw.print("intergenic");
            pw.print("\t");
            pw.print(".");
            pw.print("\t");
            pw.print(".");
            pw.print("\t");
            pw.print(".");
            pw.print("\t");
            pw.print("" + part.getStart() + "-" + part.getEnd());
            pw.print("\t");
            pw.print(fisher.p_value);
            pw.print("\t");
            pw.print(fisher.affected_alt);
            pw.print("\t");
            pw.print(fisher.affected_hom);
            pw.print("\t");
            pw.print(fisher.unaffected_alt);
            pw.print("\t");
            pw.print(fisher.unaffected_hom);
            pw.print("\t");
            pw.print(variants.size());
            pw.println();
        }
    }
    progress2.close();
}
Also used : VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) ArrayList(java.util.ArrayList) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) StringUtil(htsjdk.samtools.util.StringUtil) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Iterator(java.util.Iterator) Predicate(java.util.function.Predicate) UTR(com.github.lindenb.jvarkit.util.bio.structure.UTR) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) IntervalTree(htsjdk.samtools.util.IntervalTree) Collectors(java.util.stream.Collectors) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) List(java.util.List) Intron(com.github.lindenb.jvarkit.util.bio.structure.Intron) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) BitSet(java.util.BitSet) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) TranscriptInterval(com.github.lindenb.jvarkit.util.bio.structure.TranscriptInterval) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) UTR(com.github.lindenb.jvarkit.util.bio.structure.UTR) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) IntervalTree(htsjdk.samtools.util.IntervalTree) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) BitSet(java.util.BitSet) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Intron(com.github.lindenb.jvarkit.util.bio.structure.Intron) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Locatable(htsjdk.samtools.util.Locatable)

Example 7 with Exon

use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.

the class VcfStrechToSvg method run.

private void run(final ArchiveFactory archive, final BedLine bed, final VCFHeader header, final VCFReader in, final PrintWriter manifest) {
    LOG.info("processing " + bed);
    final Set<String> limitSamples;
    if (StringUtils.isBlank(this.sampleStr)) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
    } else if (this.sampleStr.startsWith("^")) {
        limitSamples = new TreeSet<>(header.getSampleNamesInOrder());
        limitSamples.removeAll(Arrays.stream(this.sampleStr.substring(1).split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet()));
    } else {
        limitSamples = Arrays.stream(this.sampleStr.split("[, ]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toCollection(TreeSet::new));
    }
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    try (CloseableIterator<VariantContext> iter = in.query(bed)) {
        final List<VariantSet> L = iter.stream().filter(V -> acceptVariant(V)).map(V -> new VariantSet(V)).collect(Collectors.toCollection(ArrayList::new));
        if (L.isEmpty()) {
            LOG.warn("No valid variant found for \"" + bed + "\"");
            return;
        }
        int i = 0;
        while (i + 1 < L.size()) {
            if (L.get(i).withinDistanceOf(L.get(i + 1), this.withinDistance)) {
                L.get(i).variants.addAll(L.get(i + 1).variants);
                L.remove(i + 1);
            } else {
                i++;
            }
        }
        final int margin_left = 50;
        final int margin_right = 10;
        final double drawingAreaWidth = image_width_pixel - (margin_left + margin_right);
        final int intervalLength = L.stream().mapToInt(V -> V.getLengthOnReference()).sum();
        double x = 0;
        for (i = 0; i < L.size(); i++) {
            L.get(i).x = x;
            x += (L.get(i).getLengthOnReference() / (double) intervalLength) * drawingAreaWidth;
        }
        for (i = 0; i < L.size(); i++) {
            L.get(i).width = (i + 1 < L.size() ? L.get(i + 1).x : drawingAreaWidth) - L.get(i).x;
        }
        try {
            final DocumentBuilderFactory db = DocumentBuilderFactory.newInstance();
            final DocumentBuilder dom = db.newDocumentBuilder();
            this.document = dom.newDocument();
            final Element svgRoot = element("svg");
            this.document.appendChild(svgRoot);
            final String mainTitleStr = SequenceDictionaryUtils.getBuildName(dict).orElse("") + " " + new SimpleInterval(bed).toNiceString() + " length:" + StringUtils.niceInt(bed.getLengthOnReference());
            /* SVG title */
            {
                final Element title = element("title");
                svgRoot.appendChild(title);
                title.appendChild(text(mainTitleStr));
            }
            /* SVG style */
            {
                final String gtopacity = this.dynamicParams.getOrDefault("gt.opacity", "0.7");
                final Element style = element("style");
                svgRoot.appendChild(style);
                style.appendChild(text(".maintitle {text-anchor:middle;fill:blue} " + ".vc {stroke-width:0.5px;} " + ".transcript {stroke:black;stroke-width:1px;opacity:1;}" + ".exon {stroke:black;stroke-width:0.5px;fill:blue;opacity:1;}" + ".sample {fill:blue;font-size:7px;} " + ".samplelabel {stroke:gray;stroke-width:0.5px;font-size:" + this.dynamicParams.getOrDefault("sample.fontsize", "7") + "px;}\n" + ".coverage { fill:gray; stroke:yellow;opacity:0.2;} " + ".frame { fill:none; stroke: darkgray;} " + ".area0 {fill:white;}\n" + ".area1 {fill:floralwhite;}\n" + "circle.HOM_REF {fill:green;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HET {fill:blue;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "circle.HOM_VAR {fill:red;opacity:" + gtopacity + ";stroke-width:0.5px;}\n" + "a {cursor: pointer;}\n"));
            }
            /* desc */
            {
                final Element descr = element("desc");
                svgRoot.appendChild(descr);
                descr.appendChild(text("Author: Pierre Lindenbaum\n" + JVarkitVersion.getInstance().getCompilationDate() + "\n" + JVarkitVersion.getInstance().getGitHash()));
            }
            // main title
            {
                final Element gtitle = element("text", mainTitleStr);
                gtitle.setAttribute("class", "maintitle");
                gtitle.setAttribute("x", format(this.image_width_pixel / 2.0));
                gtitle.setAttribute("y", "15");
                svgRoot.appendChild(wrapLoc(gtitle, bed));
            }
            int margin_top = 50;
            double y = margin_top;
            final double min_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r1", "1"));
            final double max_circle_radius = Double.parseDouble(this.dynamicParams.getOrDefault("gt.r2", "7"));
            final Element main_g = element("g");
            svgRoot.appendChild(main_g);
            /**
             * plot genes
             */
            if (!this.all_genes.isEmpty()) {
                final double transcript_height = 5;
                final double exon_height = (transcript_height - 1);
                final double save_y = y;
                final Element g_genes = element("g");
                g_genes.setAttribute("transform", "translate(" + margin_left + ",0)");
                main_g.appendChild(g_genes);
                /* loop over each vset */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    // find transcript in this vset
                    final List<Transcript> transcripts = this.all_genes.getOverlapping(vset).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.overlaps(vset)).collect(Collectors.toList());
                    if (transcripts.isEmpty())
                        continue;
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_genes.appendChild(g_vset);
                    // y in this vset
                    double y2 = save_y;
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    /* loop over transcripts */
                    for (final Transcript tr : transcripts) {
                        final Element g_tr = element("g");
                        g_tr.setAttribute("transform", "translate(0," + format(y2) + ")");
                        g_vset.appendChild(g_tr);
                        final Element line = element("line");
                        line.setAttribute("class", "transcript");
                        line.setAttribute("x1", format(Math.max(0, base2pixel.applyAsDouble(tr.getStart()))));
                        line.setAttribute("y1", format(transcript_height / 2.0));
                        line.setAttribute("x2", format(Math.min(vset.width, base2pixel.applyAsDouble(tr.getEnd()))));
                        line.setAttribute("y2", format(transcript_height / 2.0));
                        line.appendChild(element("title", tr.getId()));
                        g_tr.appendChild(wrapLoc(line, tr));
                        /* loop over exons */
                        for (final Exon exon : tr.getExons()) {
                            if (!exon.overlaps(vset))
                                continue;
                            final Element exRect = element("rect");
                            exRect.setAttribute("class", "exon");
                            final double x_start = Math.max(0, base2pixel.applyAsDouble(exon.getStart()));
                            exRect.setAttribute("x", format(x_start));
                            exRect.setAttribute("y", format(transcript_height / 2.0 - exon_height / 2.0));
                            final double x_end = Math.min(vset.width, base2pixel.applyAsDouble(exon.getEnd()));
                            exRect.setAttribute("width", format(x_end - x_start));
                            exRect.setAttribute("height", format(exon_height));
                            exRect.appendChild(element("title", exon.getName()));
                            g_tr.appendChild(wrapLoc(exRect, exon));
                        }
                        y2 += transcript_height + 0.5;
                    }
                    y = Math.max(y, y2);
                }
                y++;
            }
            final double sample_height = Double.parseDouble(this.dynamicParams.getOrDefault("sample.height", "25"));
            final double sample_height2 = sample_height - (max_circle_radius * 2.0);
            int space_between_samples = 2;
            int got_n_samples = 0;
            for (final String sn : header.getSampleNamesInOrder()) {
                if (!limitSamples.contains(sn))
                    continue;
                boolean got_this_sample = false;
                final Element g_sample = element("g");
                g_sample.setAttribute("transform", "translate(" + margin_left + "," + format(y) + ")");
                /* get coverage */
                final int maxCoverage;
                if (this.sample2bam.containsKey(sn)) {
                    final CoverageFactory coverageFactory = new CoverageFactory();
                    try (SamReader sr = this.openSamReader(this.sample2bam.get(sn))) {
                        /* loop over each variant set */
                        for (final VariantSet vset : L) {
                            vset.coverage = coverageFactory.getSimpleCoverage(sr, vset, sn);
                        }
                    }
                    maxCoverage = L.stream().flatMapToInt(V -> V.coverage.stream()).max().orElse(0);
                } else {
                    maxCoverage = 0;
                    for (final VariantSet vset : L) {
                        vset.coverage = null;
                    }
                }
                /* loop over each variant set */
                for (i = 0; i < L.size(); i++) {
                    final VariantSet vset = L.get(i);
                    final Element g_vset = element("g");
                    g_vset.setAttribute("transform", "translate(" + format(vset.x) + ",0)");
                    g_sample.appendChild(g_vset);
                    /* convert base to pixel */
                    final ToDoubleFunction<Integer> base2pixel = vset.createBaseToPixelFunction();
                    // plot set length
                    final Element rect = element("rect");
                    rect.setAttribute("class", "area" + (i % 2));
                    rect.setAttribute("x", "0");
                    rect.setAttribute("y", "0");
                    rect.setAttribute("width", format(vset.width));
                    rect.setAttribute("height", format(sample_height));
                    if (!remove_tooltip)
                        rect.appendChild(element("title", vset.toString()));
                    g_vset.appendChild(rect);
                    // plot coverage
                    if (maxCoverage > 0 && this.sample2bam.containsKey(sn)) {
                        final double[] scaled = vset.coverage.scaleAverage((int) vset.width);
                        final StringBuilder sb = new StringBuilder();
                        sb.append("0," + sample_height);
                        for (int t = 0; t < scaled.length; t++) {
                            if (t > 1 && t + 1 < scaled.length && format(scaled[t - 1]).equals(format(scaled[t + 1])) && format(scaled[t - 1]).equals(format(scaled[t])))
                                continue;
                            sb.append(" ").append(t).append(",");
                            sb.append(format(sample_height * (1.0 - scaled[t] / maxCoverage)));
                        }
                        sb.append(" " + format(vset.width) + "," + sample_height);
                        final Element polyline = element("polyline");
                        polyline.setAttribute("class", "coverage");
                        polyline.setAttribute("points", sb.toString());
                        g_vset.appendChild(polyline);
                        vset.coverage = null;
                    }
                    // plot vertical line if colorTag defined
                    if (!StringUtils.isBlank(this.colorTag)) {
                        for (final VariantContext vc : vset.variants) {
                            if (!vc.hasAttribute(this.colorTag))
                                continue;
                            final String cssColor = vc.getAttributeAsString(this.colorTag, "");
                            if (StringUtils.isBlank(cssColor))
                                continue;
                            final double x0 = base2pixel.applyAsDouble(vc.getStart());
                            final Element line = element("line");
                            line.setAttribute("class", "vc");
                            line.setAttribute("style", "stroke:" + cssColor);
                            line.setAttribute("x1", format(x0));
                            line.setAttribute("y1", "0");
                            line.setAttribute("x2", format(x0));
                            line.setAttribute("y2", format(sample_height));
                            g_vset.appendChild(line);
                        }
                    }
                    // print all variants in this vcfset for this sample
                    for (final VariantContext vc : vset.variants) {
                        final Genotype gt = vc.getGenotype(sn);
                        if (gt.isNoCall())
                            continue;
                        if (hide_hom_ref && gt.isHomRef())
                            continue;
                        if (gt.hasGQ() && gt.getGQ() < this.minGQ)
                            continue;
                        final OptionalDouble alt_ratio = getAltRatio(gt);
                        if (!alt_ratio.isPresent())
                            continue;
                        final OptionalDouble af = getAF(vc);
                        final double circle_radius = min_circle_radius + (max_circle_radius - min_circle_radius) * (1.0 - af.orElse(1.0));
                        // HOMREF=0;  HET =0.5; HOMVAR = 1;
                        final double gtx = base2pixel.applyAsDouble(vc.getStart());
                        final double gty = sample_height - (sample_height2 * alt_ratio.getAsDouble() + (sample_height - sample_height2) / 2.0);
                        final Element circle = element("circle");
                        circle.setAttribute("class", gt.getType().name());
                        circle.setAttribute("cx", format(gtx));
                        circle.setAttribute("cy", format(gty));
                        circle.setAttribute("r", format(circle_radius));
                        if (!remove_tooltip)
                            circle.appendChild(element("title", vc.getStart() + " " + (vc.hasID() ? vc.getID() : "") + " " + vc.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining("/")) + " " + gt.getType().name() + " AF=" + format(af.orElse(-1))));
                        g_vset.appendChild(wrapLoc(circle, vc));
                        got_this_sample = true;
                    }
                }
                final Element frame_sample = element("rect");
                frame_sample.setAttribute("class", "frame");
                frame_sample.setAttribute("x", "0");
                frame_sample.setAttribute("y", "0");
                frame_sample.setAttribute("width", format(drawingAreaWidth));
                frame_sample.setAttribute("height", format(sample_height));
                g_sample.appendChild(frame_sample);
                final Element label = element("text", sn + (maxCoverage == 0 ? "" : " Max Cov. " + maxCoverage));
                label.setAttribute("class", "samplelabel");
                label.setAttribute("x", "0");
                label.setAttribute("y", "0");
                // label.setAttribute("transform", "translate("+format(-10)+","+0+") rotate(90) ");
                label.setAttribute("transform", "translate(12,12)");
                if (!remove_tooltip)
                    label.appendChild(element("title", sn));
                g_sample.appendChild(label);
                if (got_this_sample) {
                    got_n_samples++;
                    main_g.appendChild(g_sample);
                    y += sample_height + space_between_samples;
                } else {
                    LOG.warn("no valid data for sample " + sn + " in " + bed);
                }
            }
            // remove extra sample space
            y -= space_between_samples;
            svgRoot.setAttribute("width", format(this.image_width_pixel + 1));
            svgRoot.setAttribute("height", format(y + 1));
            if (got_n_samples == 0) {
                LOG.info("no sample/genotype found for " + bed);
                return;
            }
            // save
            final Transformer tr = TransformerFactory.newInstance().newTransformer();
            final String filename = bed.getContig() + "_" + bed.getStart() + "_" + bed.getEnd() + ".svg" + (this.compressed_svg ? ".gz" : "");
            LOG.info("writing " + filename);
            if (this.compressed_svg) {
                try (final OutputStream pw = archive.openOuputStream(filename)) {
                    try (GZIPOutputStream gzout = new GZIPOutputStream(pw)) {
                        tr.transform(new DOMSource(this.document), new StreamResult(gzout));
                        gzout.finish();
                        gzout.flush();
                    }
                    pw.flush();
                }
            } else {
                try (final PrintWriter pw = archive.openWriter(filename)) {
                    tr.transform(new DOMSource(this.document), new StreamResult(pw));
                    pw.flush();
                }
            }
            manifest.print(bed.getContig());
            manifest.print("\t");
            manifest.print(bed.getStart() - 1);
            manifest.print("\t");
            manifest.print(bed.getEnd());
            manifest.print("\t");
            manifest.print(filename);
            manifest.println();
        } catch (final Throwable err) {
            throw new RuntimeException(err);
        } finally {
            this.document = null;
        }
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMException(htsjdk.samtools.SAMException) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) ToDoubleFunction(java.util.function.ToDoubleFunction) VariantContext(htsjdk.variant.variantcontext.VariantContext) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFConstants(htsjdk.variant.vcf.VCFConstants) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) Element(org.w3c.dom.Element) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) GZIPOutputStream(java.util.zip.GZIPOutputStream) TreeSet(java.util.TreeSet) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) PrintWriter(java.io.PrintWriter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StreamResult(javax.xml.transform.stream.StreamResult) Genotype(htsjdk.variant.variantcontext.Genotype) OptionalDouble(java.util.OptionalDouble) DocumentBuilder(javax.xml.parsers.DocumentBuilder)

Example 8 with Exon

use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.

the class PlotSashimi method plotSashimi.

/**
 * create the SVG itself
 */
private void plotSashimi(final ArchiveFactory archive, final SamReader samReader, final Locatable interval, final Path bamPath, final PrintWriter manifest) {
    final int drawing_width = Math.max(100, this.image_width_pixel);
    final int coverageHeight = Math.max(100, Integer.parseInt(this.dynamicParams.getOrDefault("coverage.height", "300")));
    final double pixelperbase = drawing_width / (double) interval.getLengthOnReference();
    final SAMFileHeader header = samReader.getFileHeader();
    final Collection<Gene> genes = this.geneMap.getOverlapping(interval);
    final Set<String> geneNames = genes.stream().map(G -> G.getGeneName()).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toCollection(TreeSet::new));
    /**
     * extract the sample name or just use the filename
     */
    final String sampleName = StringUtils.ifBlank(header.getReadGroups().stream().map(G -> this.partition.apply(G)).filter(S -> !StringUtils.isBlank(S)).sorted().collect(Collectors.joining(";")), bamPath.getFileName().toString());
    final Function<Integer, Double> pos2pixel = POS -> (POS - interval.getStart()) / (double) interval.getLengthOnReference() * drawing_width;
    final Counter<SimpleInterval> gaps = new Counter<>();
    final int[] coverage = new int[interval.getLengthOnReference()];
    try (SAMRecordIterator iter = samReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd())) {
        /**
         * no read here, skip
         */
        boolean got_one = false;
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (rec.getReadUnmappedFlag())
                continue;
            if (rec.getReadFailsVendorQualityCheckFlag())
                continue;
            if (rec.isSecondaryOrSupplementary())
                continue;
            if (rec.getDuplicateReadFlag())
                continue;
            if (rec.getMappingQuality() < this.min_mapq)
                continue;
            final Cigar cigar = rec.getCigar();
            if (cigar == null || cigar.isEmpty())
                continue;
            got_one = true;
            int ref = rec.getAlignmentStart();
            for (final CigarElement ce : cigar) {
                if (ref > interval.getEnd())
                    break;
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.N) || (use_D_operator && op.equals(CigarOperator.D))) {
                    gaps.incr(new SimpleInterval(rec.getContig(), ref, ref + ce.getLength() - 1));
                }
                if (op.consumesReferenceBases()) {
                    if (op.consumesReadBases()) {
                        for (int x = 0; x < ce.getLength(); ++x) {
                            final int pos1 = ref + x;
                            if (pos1 < interval.getStart())
                                continue;
                            if (pos1 > interval.getEnd())
                                break;
                            coverage[pos1 - interval.getStart()]++;
                        }
                    }
                    ref += ce.getLength();
                }
            }
        }
        if (!got_one && this.skip_region_without_read)
            return;
    }
    final int max_coverage;
    if (this.force_max_coverage > 0) {
        max_coverage = this.force_max_coverage;
    } else {
        max_coverage = Math.max(1, Arrays.stream(coverage).max().orElse(0));
    }
    while (this.document.hasChildNodes()) {
        this.document.removeChild(this.document.getFirstChild());
    }
    final Element svgRoot = element("svg");
    this.document.appendChild(svgRoot);
    /* SVG title */
    {
        final Element title = element("title");
        svgRoot.appendChild(title);
        title.appendChild(text(interval.toString() + (!geneNames.isEmpty() && geneNames.size() < 3 ? " " + String.join(" ", geneNames) : "")));
    }
    /* SVG style */
    {
        final Element style = element("style");
        svgRoot.appendChild(style);
        style.appendChild(text(this.cssPath == null ? ".coverage { fill:red;fill:url('#grad01')} " + ".maintitle {text-anchor:middle;fill:blue} " + ".sample {fill:blue;font-size:7px;} " + ".frame { fill:none; stroke: darkgray;} " + ".arcK { fill:none; stroke: blue; stroke-linecap:round;opacity:0.8;} " + ".arcU { fill:none; stroke: red; stroke-linecap:round;opacity:0.8;} " + ".transcript { fill:darkgray; stroke: darkgray;} " + ".exon { fill:green; stroke: darkgray;} " + ".frame { fill:none; stroke: darkgray;} " + ".rulerline {stroke:lightgray;stroke-width:0.5px;}\n" + ".exonline {stroke:green;stroke-width:0.5px;opacity:0.5;}\n" + ".rulerlabel {stroke:gray;stroke-width:0.5px;font-size:7px;}\n" + "a {cursor: pointer;}\n" : IOUtils.slurpPath(this.cssPath)));
    }
    // SVG def
    {
        final Element defs = element("defs");
        svgRoot.appendChild(defs);
        // linear gradient
        {
            Element grad = element("linearGradient");
            defs.appendChild(grad);
            grad.setAttribute("id", "grad01");
            grad.setAttribute("gradientTransform", "rotate(90)");
            Element stop = element("stop");
            grad.appendChild(stop);
            stop.setAttribute("offset", "0%");
            stop.setAttribute("stop-color", (max_coverage > 50 ? "red" : max_coverage > 20 ? "green" : "blue"));
            stop = element("stop");
            grad.appendChild(stop);
            stop.setAttribute("offset", "100%");
            stop.setAttribute("stop-color", "darkblue");
        }
    }
    final Element descr = element("desc");
    svgRoot.appendChild(descr);
    descr.appendChild(text("Author: Pierre Lindenbaum\n" + JVarkitVersion.getInstance().getCompilationDate() + "\n" + JVarkitVersion.getInstance().getGitHash()));
    final Element maing = element("g");
    svgRoot.appendChild(maing);
    int y = 0;
    // main title
    Element gtitle = element("text", new SimpleInterval(interval).toNiceString() + (StringUtils.isBlank(sampleName) ? "" : " " + sampleName) + (geneNames.isEmpty() ? "" : " " + String.join(" ", geneNames)));
    gtitle.setAttribute("class", "maintitle");
    gtitle.setAttribute("x", format(drawing_width / 2));
    gtitle.setAttribute("y", "15");
    svgRoot.appendChild(gtitle);
    y += 20;
    // sample name
    if (!StringUtils.isBlank(sampleName)) {
        gtitle = element("text", sampleName);
        gtitle.setAttribute("class", "sample");
        gtitle.setAttribute("x", "5");
        gtitle.setAttribute("y", "20");
        svgRoot.appendChild(gtitle);
    }
    y += 50;
    final int prev_y = y;
    /**
     * horizontal ruler
     */
    {
        final Element ruler_gh = element("g");
        maing.appendChild(ruler_gh);
        final int sep = bestTicks(interval.getLengthOnReference());
        for (int pos = interval.getStart(); pos <= interval.getEnd(); ++pos) {
            if (pos % sep != 0)
                continue;
            double x = pos2pixel.apply(pos);
            final Element line = element("line");
            ruler_gh.appendChild(line);
            line.setAttribute("class", "rulerline");
            line.appendChild(element("title", StringUtils.niceInt(pos)));
            line.setAttribute("x1", format(x));
            line.setAttribute("x2", format(x));
            line.setAttribute("y1", format(y));
            line.setAttribute("y2", format(y + coverageHeight));
            final Element label = element("text", StringUtils.niceInt(pos));
            label.setAttribute("class", "rulerlabel");
            label.setAttribute("x", "0");
            label.setAttribute("y", "0");
            label.setAttribute("transform", "translate(" + format(x) + "," + y + ") rotate(90) ");
            ruler_gh.appendChild(label);
        }
    }
    /**
     * vertical ruler
     */
    {
        final Element ruler_gv = element("g");
        maing.appendChild(ruler_gv);
        final int sep = bestTicks(max_coverage);
        for (int pos = 0; pos <= max_coverage; ++pos) {
            if (pos % sep != 0)
                continue;
            double ry = (int) (y + coverageHeight - (pos / (double) max_coverage) * coverageHeight);
            final Element line = element("line");
            ruler_gv.appendChild(line);
            line.setAttribute("class", "rulerline");
            line.appendChild(element("title", StringUtils.niceInt(pos)));
            line.setAttribute("x1", format(0));
            line.setAttribute("x2", format(drawing_width));
            line.setAttribute("y1", format(ry));
            line.setAttribute("y2", format(ry));
            final Element label = element("text", StringUtils.niceInt(pos));
            label.setAttribute("class", "rulerlabel");
            label.setAttribute("x", "1");
            label.setAttribute("y", format(ry));
            ruler_gv.appendChild(label);
        }
    }
    /**
     * vertical lines of exons
     */
    final Element exon_v = element("g");
    final Element covPath = element("path");
    covPath.setAttribute("class", "coverage");
    maing.appendChild(covPath);
    final StringBuilder sb = new StringBuilder();
    sb.append("M 0 " + format(y + coverageHeight));
    for (int k = 0; k < coverage.length; k++) {
        // if(k+1< coverage.length && coverage[k]==coverage[k+1]) continue;
        final double dpy = y + coverageHeight - coverageHeight * (coverage[k] / (double) max_coverage);
        sb.append(" L " + format(pixelperbase * k) + " " + format(dpy));
    }
    sb.append(" L " + format(drawing_width) + " " + format(y + coverageHeight));
    sb.append(" Z");
    covPath.setAttribute("d", sb.toString());
    covPath.appendChild(element("title", "Coverage. Max:" + StringUtils.niceInt(max_coverage)));
    int next_y = y + coverageHeight;
    /* plot arc */
    if (!gaps.isEmpty()) {
        boolean drawAbove = true;
        int max_occurence = (int) gaps.count(gaps.getMostFrequent());
        for (final SimpleInterval intron : gaps.keySet()) {
            final int occurence = (int) gaps.count(intron);
            boolean is_known_intron = genes.stream().flatMap(G -> G.getTranscripts().stream()).flatMap(T -> T.getIntrons().stream()).filter(E -> E.overlaps(intron)).anyMatch(I -> I.getStart() == intron.getStart() && I.getEnd() == intron.getEnd());
            final int junctionStart = intron.getStart() - 1;
            final int junctionEnd = intron.getEnd() + 1;
            if (!CoordMath.encloses(interval.getStart(), interval.getEnd(), junctionStart, junctionEnd))
                continue;
            final double xstart = pos2pixel.apply(junctionStart);
            final double xend = pos2pixel.apply(junctionEnd);
            double ystart = y + coverageHeight - coverageHeight * (coverage[junctionStart - interval.getStart()] / (double) max_coverage);
            double yend = y + coverageHeight - coverageHeight * (coverage[junctionEnd - interval.getStart()] / (double) max_coverage);
            final Element arc = element("path");
            sb.setLength(0);
            double x_mid = (xend - xstart) / 2.0;
            double x2 = xstart + x_mid;
            final double y2;
            // small gap: always print it under xaxis
            if (xend - xstart < 30)
                drawAbove = true;
            if (drawAbove) {
                ystart = y + coverageHeight;
                yend = y + coverageHeight;
                y2 = y + coverageHeight + x_mid;
                next_y = (int) Math.max(next_y, y + coverageHeight + x_mid / 2 + 10);
            } else {
                y2 = Math.max(0, ystart + (yend - ystart) / 2.0 - x_mid);
            }
            sb.append("M " + format(xstart) + " " + format(ystart));
            sb.append(" Q " + format(x2) + " " + format(y2) + " " + format(xend) + " " + format(yend));
            arc.setAttribute("d", sb.toString());
            arc.setAttribute("class", "arc" + (is_known_intron ? "K" : "U"));
            final double stroke_width = 1 + /* show very small one */
            (occurence / (double) max_occurence) * 10;
            arc.setAttribute("style", "stroke-width:" + format(stroke_width) + "px;");
            arc.appendChild(element("title", new SimpleInterval(interval.getContig(), junctionStart, junctionEnd).toNiceString() + " (" + StringUtils.niceInt(occurence) + ") " + (is_known_intron ? "known" : "unknown")));
            maing.appendChild(wrapLoc(arc, intron));
            drawAbove = !drawAbove;
        }
    }
    y = next_y;
    y += 2;
    /**
     * pileup transcripts
     */
    final List<List<Transcript>> transcriptRows = new ArrayList<>();
    for (final Transcript transcript : genes.stream().flatMap(L -> L.getTranscripts().stream()).filter(T -> T.overlaps(interval)).sorted((A, B) -> Integer.compare(A.getStart(), B.getStart())).collect(Collectors.toList())) {
        int rowidx = 0;
        while (rowidx < transcriptRows.size()) {
            final List<Transcript> row = transcriptRows.get(rowidx);
            final Transcript last = row.get(row.size() - 1);
            if (!last.overlaps(transcript)) {
                row.add(transcript);
                break;
            }
            rowidx++;
        }
        if (rowidx == transcriptRows.size()) {
            final List<Transcript> row = new ArrayList<>();
            row.add(transcript);
            transcriptRows.add(row);
        }
    }
    /**
     * plot transcripts
     */
    final Element transcripts_g = element("g");
    maing.appendChild(transcripts_g);
    final int transcript_height = Math.max(10, Integer.parseInt(this.dynamicParams.getOrDefault("transcript.height", "12")));
    for (final List<Transcript> row : transcriptRows) {
        final Element grow = element("g");
        transcripts_g.appendChild(grow);
        for (final Transcript transcript : row) {
            final Element transcript_g = element("g");
            grow.appendChild(transcript_g);
            final Element tr = element("line");
            final double midy = y + transcript_height / 2.0;
            tr.setAttribute("class", "transcript");
            double tr_x1 = Math.max(1.0, pos2pixel.apply(transcript.getStart()));
            tr.setAttribute("x1", format(tr_x1));
            tr.setAttribute("y1", format(midy));
            tr.setAttribute("x2", format(Math.min(drawing_width, pos2pixel.apply(transcript.getEnd()))));
            tr.setAttribute("y2", format(midy));
            tr.appendChild(element("title", transcript.getId() + " " + transcript.getGene().getGeneName()));
            transcript_g.appendChild(wrapLoc(tr, transcript));
            final Element label = element("text", transcript.getId() + " " + transcript.getGene().getGeneName());
            label.setAttribute("class", "rulerlabel");
            label.setAttribute("x", format(tr_x1));
            label.setAttribute("y", format(midy));
            transcript_g.appendChild(label);
            for (final Exon exon : transcript.getExons()) {
                if (!exon.overlaps(interval))
                    continue;
                final Element exon_rect = element("rect");
                exon_rect.setAttribute("class", "exon");
                final double exonx1 = Math.max(1.0, pos2pixel.apply(exon.getStart()));
                final double exonx2 = Math.min(drawing_width, pos2pixel.apply(exon.getEnd()));
                exon_rect.setAttribute("x", format(exonx1));
                exon_rect.setAttribute("y", format(y));
                exon_rect.setAttribute("height", format(transcript_height));
                exon_rect.setAttribute("width", format(exonx2 - exonx1));
                exon_rect.appendChild(element("title", exon.getName() + " " + transcript.getId() + " " + transcript.getGene().getGeneName()));
                transcript_g.appendChild(wrapLoc(exon_rect, exon));
                for (int side = 0; side < 2; ++side) {
                    final double x = pos2pixel.apply(side == 0 ? exon.getStart() : exon.getEnd());
                    final Element line = element("line");
                    exon_v.appendChild(line);
                    line.setAttribute("class", "exonline");
                    line.setAttribute("x1", format(x));
                    line.setAttribute("x2", format(x));
                    line.setAttribute("y2", format(y + transcript_height));
                    line.setAttribute("y1", format(prev_y));
                }
            }
        }
        y += transcript_height + 2;
    }
    maing.appendChild(exon_v);
    /* final frame */
    final Element frame_rect = element("rect");
    frame_rect.setAttribute("class", "frame");
    frame_rect.setAttribute("x", "0");
    frame_rect.setAttribute("y", "0");
    frame_rect.setAttribute("width", format(drawing_width));
    frame_rect.setAttribute("height", format(y));
    svgRoot.appendChild(frame_rect);
    svgRoot.setAttribute("width", format(drawing_width + 1));
    svgRoot.setAttribute("height", format(y + 1));
    try {
        final Transformer tr = TransformerFactory.newInstance().newTransformer();
        final String md5 = StringUtils.md5(interval.getContig() + ":" + interval.getStart() + ":" + interval.getEnd() + ":" + bamPath.toString());
        final String filename = md5.substring(0, 2) + File.separatorChar + md5.substring(2) + File.separator + interval.getContig() + "_" + interval.getStart() + "_" + interval.getEnd() + (StringUtils.isBlank(sampleName) ? "" : "." + sampleName.replaceAll("[/\\:]", "_")) + ".svg" + (this.compressed_svg ? ".gz" : "");
        if (this.compressed_svg) {
            try (final OutputStream pw = archive.openOuputStream(filename)) {
                try (GZIPOutputStream gzout = new GZIPOutputStream(pw)) {
                    tr.transform(new DOMSource(this.document), new StreamResult(gzout));
                    gzout.finish();
                    gzout.flush();
                }
                pw.flush();
            }
        } else {
            try (final PrintWriter pw = archive.openWriter(filename)) {
                tr.transform(new DOMSource(this.document), new StreamResult(pw));
                pw.flush();
            }
        }
        manifest.print(interval.getContig());
        manifest.print('\t');
        manifest.print(interval.getStart() - 1);
        manifest.print('\t');
        manifest.print(interval.getEnd());
        manifest.print('\t');
        manifest.print(bamPath.toString());
        manifest.print('\t');
        manifest.print(geneNames.isEmpty() ? "." : String.join(",", geneNames));
        manifest.print('\t');
        manifest.print(StringUtils.isBlank(sampleName) ? "." : sampleName);
        manifest.print('\t');
        manifest.print((archive.isTarOrZipArchive() ? "" : this.outputFile.toString() + File.separator) + filename);
        manifest.println();
    } catch (final Exception err) {
        throw new RuntimeException(err);
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) StreamResult(javax.xml.transform.stream.StreamResult) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Collection(java.util.Collection) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) CoordMath(htsjdk.samtools.util.CoordMath) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) DOMSource(javax.xml.transform.dom.DOMSource) Cigar(htsjdk.samtools.Cigar) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) Function(java.util.function.Function) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) OutputStream(java.io.OutputStream) Counter(com.github.lindenb.jvarkit.util.Counter) Locatable(htsjdk.samtools.util.Locatable) DecimalFormat(java.text.DecimalFormat) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) File(java.io.File) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) CigarElement(htsjdk.samtools.CigarElement) Element(org.w3c.dom.Element) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) Counter(com.github.lindenb.jvarkit.util.Counter) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) GZIPOutputStream(java.util.zip.GZIPOutputStream) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) List(java.util.List) ArrayList(java.util.ArrayList) PrintWriter(java.io.PrintWriter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) StreamResult(javax.xml.transform.stream.StreamResult) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 9 with Exon

use of com.github.lindenb.jvarkit.util.bio.structure.Exon in project jvarkit by lindenb.

the class BackLocate method doWork.

@Override
public int doWork(final List<String> args) {
    PrintStream out = null;
    BufferedReader in = null;
    try {
        this.referenceGenome = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
        try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
            final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.referenceGenome);
            final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
            gtfReader.setContigNameConverter(contigNameConverter);
            gtfReader.getAllGenes().stream().filter(G -> !StringUtils.isBlank(G.getGeneName())).flatMap(G -> G.getTranscripts().stream()).filter(T -> T.isCoding() && T.hasCDS()).forEach(T -> {
                final String gn = T.getGene().getGeneName().toUpperCase();
                List<Transcript> L = this.name2transcripts.get(gn);
                if (L == null) {
                    L = new ArrayList<>();
                    this.name2transcripts.put(gn, L);
                }
                L.add(T);
            });
        }
        out = this.openPathOrStdoutAsPrintStream(this.outputFile);
        out.print("#User.Gene");
        out.print('\t');
        out.print("AA1");
        out.print('\t');
        out.print("petide.pos.1");
        out.print('\t');
        out.print("AA2");
        out.print('\t');
        out.print("transcript.name");
        out.print('\t');
        out.print("transcript.id");
        out.print('\t');
        out.print("transcript.strand");
        out.print('\t');
        out.print("transcript.AA");
        out.print('\t');
        out.print("index0.in.rna");
        out.print('\t');
        out.print("wild.codon");
        out.print('\t');
        out.print("potential.var.codons");
        out.print('\t');
        out.print("base.in.rna");
        out.print('\t');
        out.print("chromosome");
        out.print('\t');
        out.print("index0.in.genomic");
        out.print('\t');
        out.print("exon");
        if (this.printSequences) {
            out.print('\t');
            out.print("mRNA");
            out.print('\t');
            out.print("protein");
        }
        out.print('\t');
        out.print("messages");
        out.print('\t');
        out.print("extra.user.data");
        out.println();
        if (args.isEmpty()) {
            in = super.openBufferedReader(null);
            this.run(out, in);
            CloserUtil.close(in);
        } else {
            for (final String filename : args) {
                in = super.openBufferedReader(filename);
                this.run(out, in);
                CloserUtil.close(in);
            }
        }
        return 0;
    } catch (final Exception e) {
        LOG.severe(e);
        return -1;
    } finally {
        CloserUtil.close(this.referenceGenome);
        this.referenceGenome = null;
        CloserUtil.close(out);
        CloserUtil.close(in);
    }
}
Also used : AminoAcid(com.github.lindenb.jvarkit.util.bio.AminoAcids.AminoAcid) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) PeptideSequence(com.github.lindenb.jvarkit.util.bio.structure.PeptideSequence) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) RNASequenceFactory(com.github.lindenb.jvarkit.util.bio.structure.RNASequenceFactory) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) AminoAcids(com.github.lindenb.jvarkit.util.bio.AminoAcids) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) Path(java.nio.file.Path) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintStream(java.io.PrintStream) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) RNASequence(com.github.lindenb.jvarkit.util.bio.structure.RNASequence) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) PrintStream(java.io.PrintStream) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) BufferedReader(java.io.BufferedReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException)

Aggregations

Exon (com.github.lindenb.jvarkit.util.bio.structure.Exon)9 Parameter (com.beust.jcommander.Parameter)8 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)8 GtfReader (com.github.lindenb.jvarkit.util.bio.structure.GtfReader)8 Transcript (com.github.lindenb.jvarkit.util.bio.structure.Transcript)8 Program (com.github.lindenb.jvarkit.util.jcommander.Program)8 Logger (com.github.lindenb.jvarkit.util.log.Logger)8 Path (java.nio.file.Path)8 List (java.util.List)8 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)7 Gene (com.github.lindenb.jvarkit.util.bio.structure.Gene)7 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)7 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)7 Collectors (java.util.stream.Collectors)7 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)6 CloserUtil (htsjdk.samtools.util.CloserUtil)6 Interval (htsjdk.samtools.util.Interval)6 Locatable (htsjdk.samtools.util.Locatable)6 PrintWriter (java.io.PrintWriter)6 IntervalTreeMap (htsjdk.samtools.util.IntervalTreeMap)5