Search in sources :

Example 51 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class DepthOfCoverage method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    if (this.auto_mask && this.faidx == null) {
        LOG.error("Cannot auto mask if REF is not defined");
        return -1;
    }
    if (this.maskBed != null && this.includeBed != null) {
        LOG.error("both --mask and --bed both defined");
        return -1;
    }
    ReferenceSequenceFile referenceSequenceFile = null;
    try {
        final Predicate<String> isRejectContigRegex;
        if (!StringUtils.isBlank(this.skipContigExpr)) {
            final Pattern pat = Pattern.compile(this.skipContigExpr);
            isRejectContigRegex = S -> pat.matcher(S).matches();
        } else {
            isRejectContigRegex = S -> false;
        }
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.faidx != null) {
            srf.referenceSequence(this.faidx);
            srf.setUseAsyncIo(this.asyncIo);
            referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
        }
        out = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        out.print("#BAM\tSample\tContig\tContig-Length\tMasked-Contig-Length\tCount\tDepth\tMedian\tMin\tMax\tMaxPos");
        for (RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
            if (r.getMinInclusive() == null)
                continue;
            out.print("\t");
            out.print(r.toString());
        }
        out.println();
        for (final Path path : IOUtils.unrollPaths(args)) {
            try (final SamReader sr = srf.open(path)) {
                if (!sr.hasIndex()) {
                    LOG.error("File " + path + " is not indexed.");
                    return -1;
                }
                final SAMFileHeader header = sr.getFileHeader();
                final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                final Set<String> rejectContigSet = dict.getSequences().stream().map(SSR -> SSR.getSequenceName()).filter(isRejectContigRegex).collect(Collectors.toCollection(HashSet::new));
                rejectContigSet.addAll(dict.getSequences().stream().filter(SSR -> SSR.getSequenceLength() < this.skipContigLength).map(SSR -> SSR.getSequenceName()).collect(Collectors.toCollection(HashSet::new)));
                if (!header.getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
                    LOG.error("file is not sorted on coordinate :" + header.getSortOrder() + " " + path);
                    return -1;
                }
                final QueryInterval[] intervals;
                if (this.useBamIndexFlag && this.includeBed != null) {
                    if (!sr.hasIndex()) {
                        LOG.error("Bam is not indexed. " + path);
                        return -1;
                    }
                    final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                    final List<QueryInterval> L = new ArrayList<>();
                    try (BedLineReader br = new BedLineReader(this.includeBed)) {
                        while (br.hasNext()) {
                            final BedLine bed = br.next();
                            final String ctg = contigNameConverter.apply(bed.getContig());
                            if (StringUtils.isBlank(ctg))
                                continue;
                            final int tid = dict.getSequenceIndex(ctg);
                            if (tid < 0)
                                continue;
                            L.add(new QueryInterval(tid, bed.getStart(), bed.getEnd()));
                        }
                    }
                    intervals = QueryInterval.optimizeIntervals(L.toArray(new QueryInterval[L.size()]));
                } else {
                    intervals = null;
                }
                Integer minCov = null;
                Integer maxCov = null;
                ContigPos maxCovPosition = null;
                long count_raw_bases = 0L;
                long count_bases = 0L;
                long sum_coverage = 0L;
                final DiscreteMedian<Integer> discreteMedian_wg = new DiscreteMedian<>();
                final Counter<RangeOfIntegers.Range> countMap_wg = new Counter<>();
                final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(path.toString());
                int[] coverage = null;
                String prevContig = null;
                BitSet mask = null;
                final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
                try (CloseableIterator<SAMRecord> iter = intervals == null ? sr.iterator() : sr.queryOverlapping(intervals)) {
                    for (; ; ) {
                        final SAMRecord rec = iter.hasNext() ? progress.apply(iter.next()) : null;
                        if (rec != null) {
                            if (!SAMRecordDefaultFilter.accept(rec, this.mapping_quality))
                                continue;
                            if (rejectContigSet.contains(rec.getContig()))
                                continue;
                        }
                        if (rec == null || !rec.getContig().equals(prevContig)) {
                            if (coverage != null) {
                                // DUMP
                                long count_bases_ctg = 0L;
                                long sum_coverage_ctg = 0L;
                                Integer minV_ctg = null;
                                Integer maxV_ctg = null;
                                ContigPos maxPos_ctg = null;
                                final DiscreteMedian<Integer> discreteMedian_ctg = new DiscreteMedian<>();
                                final Counter<RangeOfIntegers.Range> countMap_ctg = new Counter<>();
                                for (int i = 0; i < coverage.length; i++) {
                                    if (mask.get(i))
                                        continue;
                                    final int covi = coverage[i];
                                    if (covi > this.max_depth)
                                        continue;
                                    if (minV_ctg == null || minV_ctg.intValue() > covi)
                                        minV_ctg = covi;
                                    if (maxV_ctg == null || maxV_ctg.intValue() < covi) {
                                        maxV_ctg = covi;
                                        maxPos_ctg = new ContigPos(prevContig, i + 1);
                                    }
                                    countMap_ctg.incr(this.summaryCov.getRange(covi));
                                    count_bases_ctg++;
                                    sum_coverage_ctg += covi;
                                    discreteMedian_ctg.add(covi);
                                }
                                out.print(path);
                                out.print("\t");
                                out.print(sample);
                                out.print("\t");
                                out.print(prevContig);
                                out.print("\t");
                                out.print(coverage.length);
                                out.print("\t");
                                out.print(count_bases_ctg);
                                out.print("\t");
                                out.print(sum_coverage_ctg);
                                out.print("\t");
                                if (count_bases_ctg > 0) {
                                    out.printf("%.2f", sum_coverage_ctg / (double) count_bases_ctg);
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                final OptionalDouble median = discreteMedian_ctg.getMedian();
                                if (median.isPresent()) {
                                    out.print(median.getAsDouble());
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                if (minV_ctg != null) {
                                    out.print(minV_ctg);
                                } else {
                                    out.print("N/A");
                                }
                                out.print("\t");
                                if (maxV_ctg != null) {
                                    out.print(maxV_ctg);
                                    out.print("\t");
                                    out.print(maxPos_ctg);
                                } else {
                                    out.print("N/A\tN/A");
                                }
                                for (final RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
                                    if (r.getMinInclusive() == null)
                                        continue;
                                    out.print("\t");
                                    out.print(countMap_ctg.count(r));
                                    if (!countMap_ctg.isEmpty()) {
                                        out.print(" ");
                                        out.printf("(%.2f%%)", (countMap_ctg.count(r) / (countMap_ctg.getTotal() * 1.0)) * 100.0);
                                    }
                                }
                                out.println();
                                if (minCov == null || (minV_ctg != null && minV_ctg.compareTo(minCov) < 0))
                                    minCov = minV_ctg;
                                if (maxCov == null || (maxV_ctg != null && maxV_ctg.compareTo(maxCov) > 0)) {
                                    maxCov = maxV_ctg;
                                    maxCovPosition = maxPos_ctg;
                                }
                                count_bases += count_bases_ctg;
                                sum_coverage += sum_coverage_ctg;
                                count_raw_bases += coverage.length;
                                discreteMedian_wg.add(discreteMedian_ctg);
                                countMap_wg.putAll(countMap_ctg);
                            }
                            coverage = null;
                            mask = null;
                            // /
                            System.gc();
                            if (rec == null)
                                break;
                            final SAMSequenceRecord ssr = Objects.requireNonNull(dict.getSequence(rec.getContig()));
                            coverage = new int[ssr.getSequenceLength()];
                            mask = new BitSet(ssr.getSequenceLength());
                            if (this.auto_mask && referenceSequenceFile != null) {
                                final byte[] refSeq = Objects.requireNonNull(referenceSequenceFile.getSequence(ssr.getSequenceName())).getBases();
                                for (int i = 0; i < refSeq.length; i++) {
                                    if (AcidNucleics.isATGC(refSeq[i]))
                                        continue;
                                    mask.set(i);
                                }
                            }
                            /* read mask */
                            if (this.maskBed != null) {
                                final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                                try (BedLineReader br = new BedLineReader(this.maskBed)) {
                                    while (br.hasNext()) {
                                        final BedLine bed = br.next();
                                        if (bed == null)
                                            continue;
                                        String ctg = contigNameConverter.apply(bed.getContig());
                                        if (StringUtils.isBlank(ctg))
                                            continue;
                                        if (!rec.getContig().equals(ctg))
                                            continue;
                                        for (int p1 = bed.getStart(); p1 <= bed.getEnd() && p1 <= coverage.length; ++p1) {
                                            mask.set(p1 - 1);
                                        }
                                    }
                                }
                            } else if (this.includeBed != null) {
                                final List<Locatable> list = new ArrayList<>();
                                final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
                                try (BedLineReader br = new BedLineReader(this.includeBed)) {
                                    while (br.hasNext()) {
                                        final BedLine bed = br.next();
                                        if (bed == null)
                                            continue;
                                        final String ctg = contigNameConverter.apply(bed.getContig());
                                        if (StringUtils.isBlank(ctg))
                                            continue;
                                        if (!rec.getContig().equals(ctg))
                                            continue;
                                        list.add(new SimpleInterval(ctg, bed.getStart(), bed.getEnd()));
                                    }
                                }
                                // sort on starts
                                Collections.sort(list, (A, B) -> Integer.compare(A.getStart(), B.getStart()));
                                int p1 = 1;
                                while (p1 <= coverage.length) {
                                    while (!list.isEmpty() && list.get(0).getEnd() < p1) {
                                        list.remove(0);
                                    }
                                    if (!list.isEmpty() && list.get(0).getStart() <= p1 && p1 <= list.get(0).getEnd()) {
                                        ++p1;
                                        continue;
                                    }
                                    mask.set(p1 - 1);
                                    p1++;
                                }
                            }
                            prevContig = rec.getContig();
                        }
                        int max_end1 = coverage.length;
                        if (!this.disable_paired_overlap_flag && rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && rec.getReferenceIndex().equals(rec.getMateReferenceIndex()) && rec.getAlignmentStart() < rec.getMateAlignmentStart() && rec.getAlignmentEnd() > rec.getMateAlignmentStart()) {
                            max_end1 = rec.getMateAlignmentStart() - 1;
                        }
                        for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
                            final int pos1 = block.getReferenceStart();
                            final int len = block.getLength();
                            for (int i = 0; i < len; i++) {
                                if (pos1 + i - 1 >= 0 && pos1 + i <= max_end1) {
                                    coverage[pos1 + i - 1]++;
                                }
                            }
                        }
                    }
                /* end rec */
                }
                /* end iter */
                progress.close();
                out.print(path);
                out.print("\t");
                out.print(sample);
                out.print("\t");
                out.print(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
                out.print("\t");
                out.print(count_raw_bases);
                out.print("\t");
                out.print(count_bases);
                out.print("\t");
                out.print(sum_coverage);
                out.print("\t");
                if (count_bases > 0) {
                    out.printf("%.2f", sum_coverage / (double) count_bases);
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                final OptionalDouble median = discreteMedian_wg.getMedian();
                if (median.isPresent()) {
                    out.print(median.getAsDouble());
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                if (minCov != null) {
                    out.print(minCov);
                } else {
                    out.print("N/A");
                }
                out.print("\t");
                if (maxCov != null) {
                    out.print(maxCov + "\t" + maxCovPosition);
                } else {
                    out.print("N/A\tN/A");
                }
                for (final RangeOfIntegers.Range r : this.summaryCov.getRanges()) {
                    if (r.getMinInclusive() == null)
                        continue;
                    out.print("\t");
                    out.print(countMap_wg.count(r));
                    if (!countMap_wg.isEmpty()) {
                        out.print(" ");
                        out.printf("(%.2f%%)", (countMap_wg.count(r) / (countMap_wg.getTotal() * 1.0)) * 100.0);
                    }
                }
                out.println();
            }
        }
        out.flush();
        out.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(referenceSequenceFile);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) OptionalDouble(java.util.OptionalDouble) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) AlignmentBlock(htsjdk.samtools.AlignmentBlock) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Counter(com.github.lindenb.jvarkit.util.Counter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) ContigPos(com.github.lindenb.jvarkit.util.vcf.ContigPos) SAMRecord(htsjdk.samtools.SAMRecord) Objects(java.util.Objects) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) QueryInterval(htsjdk.samtools.QueryInterval) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) BitSet(java.util.BitSet) Pattern(java.util.regex.Pattern) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) ArrayList(java.util.ArrayList) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) Path(java.nio.file.Path) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) ContigPos(com.github.lindenb.jvarkit.util.vcf.ContigPos) BitSet(java.util.BitSet) OptionalDouble(java.util.OptionalDouble) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) RangeOfIntegers(com.github.lindenb.jvarkit.math.RangeOfIntegers) SAMFileHeader(htsjdk.samtools.SAMFileHeader) AlignmentBlock(htsjdk.samtools.AlignmentBlock)

Example 52 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class PlotSashimi method plotSashimi.

/**
 * create the SVG itself
 */
private void plotSashimi(final ArchiveFactory archive, final SamReader samReader, final Locatable interval, final Path bamPath, final PrintWriter manifest) {
    final int drawing_width = Math.max(100, this.image_width_pixel);
    final int coverageHeight = Math.max(100, Integer.parseInt(this.dynamicParams.getOrDefault("coverage.height", "300")));
    final double pixelperbase = drawing_width / (double) interval.getLengthOnReference();
    final SAMFileHeader header = samReader.getFileHeader();
    final Collection<Gene> genes = this.geneMap.getOverlapping(interval);
    final Set<String> geneNames = genes.stream().map(G -> G.getGeneName()).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toCollection(TreeSet::new));
    /**
     * extract the sample name or just use the filename
     */
    final String sampleName = StringUtils.ifBlank(header.getReadGroups().stream().map(G -> this.partition.apply(G)).filter(S -> !StringUtils.isBlank(S)).sorted().collect(Collectors.joining(";")), bamPath.getFileName().toString());
    final Function<Integer, Double> pos2pixel = POS -> (POS - interval.getStart()) / (double) interval.getLengthOnReference() * drawing_width;
    final Counter<SimpleInterval> gaps = new Counter<>();
    final int[] coverage = new int[interval.getLengthOnReference()];
    try (SAMRecordIterator iter = samReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd())) {
        /**
         * no read here, skip
         */
        boolean got_one = false;
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            if (rec.getReadUnmappedFlag())
                continue;
            if (rec.getReadFailsVendorQualityCheckFlag())
                continue;
            if (rec.isSecondaryOrSupplementary())
                continue;
            if (rec.getDuplicateReadFlag())
                continue;
            if (rec.getMappingQuality() < this.min_mapq)
                continue;
            final Cigar cigar = rec.getCigar();
            if (cigar == null || cigar.isEmpty())
                continue;
            got_one = true;
            int ref = rec.getAlignmentStart();
            for (final CigarElement ce : cigar) {
                if (ref > interval.getEnd())
                    break;
                final CigarOperator op = ce.getOperator();
                if (op.equals(CigarOperator.N) || (use_D_operator && op.equals(CigarOperator.D))) {
                    gaps.incr(new SimpleInterval(rec.getContig(), ref, ref + ce.getLength() - 1));
                }
                if (op.consumesReferenceBases()) {
                    if (op.consumesReadBases()) {
                        for (int x = 0; x < ce.getLength(); ++x) {
                            final int pos1 = ref + x;
                            if (pos1 < interval.getStart())
                                continue;
                            if (pos1 > interval.getEnd())
                                break;
                            coverage[pos1 - interval.getStart()]++;
                        }
                    }
                    ref += ce.getLength();
                }
            }
        }
        if (!got_one && this.skip_region_without_read)
            return;
    }
    final int max_coverage;
    if (this.force_max_coverage > 0) {
        max_coverage = this.force_max_coverage;
    } else {
        max_coverage = Math.max(1, Arrays.stream(coverage).max().orElse(0));
    }
    while (this.document.hasChildNodes()) {
        this.document.removeChild(this.document.getFirstChild());
    }
    final Element svgRoot = element("svg");
    this.document.appendChild(svgRoot);
    /* SVG title */
    {
        final Element title = element("title");
        svgRoot.appendChild(title);
        title.appendChild(text(interval.toString() + (!geneNames.isEmpty() && geneNames.size() < 3 ? " " + String.join(" ", geneNames) : "")));
    }
    /* SVG style */
    {
        final Element style = element("style");
        svgRoot.appendChild(style);
        style.appendChild(text(this.cssPath == null ? ".coverage { fill:red;fill:url('#grad01')} " + ".maintitle {text-anchor:middle;fill:blue} " + ".sample {fill:blue;font-size:7px;} " + ".frame { fill:none; stroke: darkgray;} " + ".arcK { fill:none; stroke: blue; stroke-linecap:round;opacity:0.8;} " + ".arcU { fill:none; stroke: red; stroke-linecap:round;opacity:0.8;} " + ".transcript { fill:darkgray; stroke: darkgray;} " + ".exon { fill:green; stroke: darkgray;} " + ".frame { fill:none; stroke: darkgray;} " + ".rulerline {stroke:lightgray;stroke-width:0.5px;}\n" + ".exonline {stroke:green;stroke-width:0.5px;opacity:0.5;}\n" + ".rulerlabel {stroke:gray;stroke-width:0.5px;font-size:7px;}\n" + "a {cursor: pointer;}\n" : IOUtils.slurpPath(this.cssPath)));
    }
    // SVG def
    {
        final Element defs = element("defs");
        svgRoot.appendChild(defs);
        // linear gradient
        {
            Element grad = element("linearGradient");
            defs.appendChild(grad);
            grad.setAttribute("id", "grad01");
            grad.setAttribute("gradientTransform", "rotate(90)");
            Element stop = element("stop");
            grad.appendChild(stop);
            stop.setAttribute("offset", "0%");
            stop.setAttribute("stop-color", (max_coverage > 50 ? "red" : max_coverage > 20 ? "green" : "blue"));
            stop = element("stop");
            grad.appendChild(stop);
            stop.setAttribute("offset", "100%");
            stop.setAttribute("stop-color", "darkblue");
        }
    }
    final Element descr = element("desc");
    svgRoot.appendChild(descr);
    descr.appendChild(text("Author: Pierre Lindenbaum\n" + JVarkitVersion.getInstance().getCompilationDate() + "\n" + JVarkitVersion.getInstance().getGitHash()));
    final Element maing = element("g");
    svgRoot.appendChild(maing);
    int y = 0;
    // main title
    Element gtitle = element("text", new SimpleInterval(interval).toNiceString() + (StringUtils.isBlank(sampleName) ? "" : " " + sampleName) + (geneNames.isEmpty() ? "" : " " + String.join(" ", geneNames)));
    gtitle.setAttribute("class", "maintitle");
    gtitle.setAttribute("x", format(drawing_width / 2));
    gtitle.setAttribute("y", "15");
    svgRoot.appendChild(gtitle);
    y += 20;
    // sample name
    if (!StringUtils.isBlank(sampleName)) {
        gtitle = element("text", sampleName);
        gtitle.setAttribute("class", "sample");
        gtitle.setAttribute("x", "5");
        gtitle.setAttribute("y", "20");
        svgRoot.appendChild(gtitle);
    }
    y += 50;
    final int prev_y = y;
    /**
     * horizontal ruler
     */
    {
        final Element ruler_gh = element("g");
        maing.appendChild(ruler_gh);
        final int sep = bestTicks(interval.getLengthOnReference());
        for (int pos = interval.getStart(); pos <= interval.getEnd(); ++pos) {
            if (pos % sep != 0)
                continue;
            double x = pos2pixel.apply(pos);
            final Element line = element("line");
            ruler_gh.appendChild(line);
            line.setAttribute("class", "rulerline");
            line.appendChild(element("title", StringUtils.niceInt(pos)));
            line.setAttribute("x1", format(x));
            line.setAttribute("x2", format(x));
            line.setAttribute("y1", format(y));
            line.setAttribute("y2", format(y + coverageHeight));
            final Element label = element("text", StringUtils.niceInt(pos));
            label.setAttribute("class", "rulerlabel");
            label.setAttribute("x", "0");
            label.setAttribute("y", "0");
            label.setAttribute("transform", "translate(" + format(x) + "," + y + ") rotate(90) ");
            ruler_gh.appendChild(label);
        }
    }
    /**
     * vertical ruler
     */
    {
        final Element ruler_gv = element("g");
        maing.appendChild(ruler_gv);
        final int sep = bestTicks(max_coverage);
        for (int pos = 0; pos <= max_coverage; ++pos) {
            if (pos % sep != 0)
                continue;
            double ry = (int) (y + coverageHeight - (pos / (double) max_coverage) * coverageHeight);
            final Element line = element("line");
            ruler_gv.appendChild(line);
            line.setAttribute("class", "rulerline");
            line.appendChild(element("title", StringUtils.niceInt(pos)));
            line.setAttribute("x1", format(0));
            line.setAttribute("x2", format(drawing_width));
            line.setAttribute("y1", format(ry));
            line.setAttribute("y2", format(ry));
            final Element label = element("text", StringUtils.niceInt(pos));
            label.setAttribute("class", "rulerlabel");
            label.setAttribute("x", "1");
            label.setAttribute("y", format(ry));
            ruler_gv.appendChild(label);
        }
    }
    /**
     * vertical lines of exons
     */
    final Element exon_v = element("g");
    final Element covPath = element("path");
    covPath.setAttribute("class", "coverage");
    maing.appendChild(covPath);
    final StringBuilder sb = new StringBuilder();
    sb.append("M 0 " + format(y + coverageHeight));
    for (int k = 0; k < coverage.length; k++) {
        // if(k+1< coverage.length && coverage[k]==coverage[k+1]) continue;
        final double dpy = y + coverageHeight - coverageHeight * (coverage[k] / (double) max_coverage);
        sb.append(" L " + format(pixelperbase * k) + " " + format(dpy));
    }
    sb.append(" L " + format(drawing_width) + " " + format(y + coverageHeight));
    sb.append(" Z");
    covPath.setAttribute("d", sb.toString());
    covPath.appendChild(element("title", "Coverage. Max:" + StringUtils.niceInt(max_coverage)));
    int next_y = y + coverageHeight;
    /* plot arc */
    if (!gaps.isEmpty()) {
        boolean drawAbove = true;
        int max_occurence = (int) gaps.count(gaps.getMostFrequent());
        for (final SimpleInterval intron : gaps.keySet()) {
            final int occurence = (int) gaps.count(intron);
            boolean is_known_intron = genes.stream().flatMap(G -> G.getTranscripts().stream()).flatMap(T -> T.getIntrons().stream()).filter(E -> E.overlaps(intron)).anyMatch(I -> I.getStart() == intron.getStart() && I.getEnd() == intron.getEnd());
            final int junctionStart = intron.getStart() - 1;
            final int junctionEnd = intron.getEnd() + 1;
            if (!CoordMath.encloses(interval.getStart(), interval.getEnd(), junctionStart, junctionEnd))
                continue;
            final double xstart = pos2pixel.apply(junctionStart);
            final double xend = pos2pixel.apply(junctionEnd);
            double ystart = y + coverageHeight - coverageHeight * (coverage[junctionStart - interval.getStart()] / (double) max_coverage);
            double yend = y + coverageHeight - coverageHeight * (coverage[junctionEnd - interval.getStart()] / (double) max_coverage);
            final Element arc = element("path");
            sb.setLength(0);
            double x_mid = (xend - xstart) / 2.0;
            double x2 = xstart + x_mid;
            final double y2;
            // small gap: always print it under xaxis
            if (xend - xstart < 30)
                drawAbove = true;
            if (drawAbove) {
                ystart = y + coverageHeight;
                yend = y + coverageHeight;
                y2 = y + coverageHeight + x_mid;
                next_y = (int) Math.max(next_y, y + coverageHeight + x_mid / 2 + 10);
            } else {
                y2 = Math.max(0, ystart + (yend - ystart) / 2.0 - x_mid);
            }
            sb.append("M " + format(xstart) + " " + format(ystart));
            sb.append(" Q " + format(x2) + " " + format(y2) + " " + format(xend) + " " + format(yend));
            arc.setAttribute("d", sb.toString());
            arc.setAttribute("class", "arc" + (is_known_intron ? "K" : "U"));
            final double stroke_width = 1 + /* show very small one */
            (occurence / (double) max_occurence) * 10;
            arc.setAttribute("style", "stroke-width:" + format(stroke_width) + "px;");
            arc.appendChild(element("title", new SimpleInterval(interval.getContig(), junctionStart, junctionEnd).toNiceString() + " (" + StringUtils.niceInt(occurence) + ") " + (is_known_intron ? "known" : "unknown")));
            maing.appendChild(wrapLoc(arc, intron));
            drawAbove = !drawAbove;
        }
    }
    y = next_y;
    y += 2;
    /**
     * pileup transcripts
     */
    final List<List<Transcript>> transcriptRows = new ArrayList<>();
    for (final Transcript transcript : genes.stream().flatMap(L -> L.getTranscripts().stream()).filter(T -> T.overlaps(interval)).sorted((A, B) -> Integer.compare(A.getStart(), B.getStart())).collect(Collectors.toList())) {
        int rowidx = 0;
        while (rowidx < transcriptRows.size()) {
            final List<Transcript> row = transcriptRows.get(rowidx);
            final Transcript last = row.get(row.size() - 1);
            if (!last.overlaps(transcript)) {
                row.add(transcript);
                break;
            }
            rowidx++;
        }
        if (rowidx == transcriptRows.size()) {
            final List<Transcript> row = new ArrayList<>();
            row.add(transcript);
            transcriptRows.add(row);
        }
    }
    /**
     * plot transcripts
     */
    final Element transcripts_g = element("g");
    maing.appendChild(transcripts_g);
    final int transcript_height = Math.max(10, Integer.parseInt(this.dynamicParams.getOrDefault("transcript.height", "12")));
    for (final List<Transcript> row : transcriptRows) {
        final Element grow = element("g");
        transcripts_g.appendChild(grow);
        for (final Transcript transcript : row) {
            final Element transcript_g = element("g");
            grow.appendChild(transcript_g);
            final Element tr = element("line");
            final double midy = y + transcript_height / 2.0;
            tr.setAttribute("class", "transcript");
            double tr_x1 = Math.max(1.0, pos2pixel.apply(transcript.getStart()));
            tr.setAttribute("x1", format(tr_x1));
            tr.setAttribute("y1", format(midy));
            tr.setAttribute("x2", format(Math.min(drawing_width, pos2pixel.apply(transcript.getEnd()))));
            tr.setAttribute("y2", format(midy));
            tr.appendChild(element("title", transcript.getId() + " " + transcript.getGene().getGeneName()));
            transcript_g.appendChild(wrapLoc(tr, transcript));
            final Element label = element("text", transcript.getId() + " " + transcript.getGene().getGeneName());
            label.setAttribute("class", "rulerlabel");
            label.setAttribute("x", format(tr_x1));
            label.setAttribute("y", format(midy));
            transcript_g.appendChild(label);
            for (final Exon exon : transcript.getExons()) {
                if (!exon.overlaps(interval))
                    continue;
                final Element exon_rect = element("rect");
                exon_rect.setAttribute("class", "exon");
                final double exonx1 = Math.max(1.0, pos2pixel.apply(exon.getStart()));
                final double exonx2 = Math.min(drawing_width, pos2pixel.apply(exon.getEnd()));
                exon_rect.setAttribute("x", format(exonx1));
                exon_rect.setAttribute("y", format(y));
                exon_rect.setAttribute("height", format(transcript_height));
                exon_rect.setAttribute("width", format(exonx2 - exonx1));
                exon_rect.appendChild(element("title", exon.getName() + " " + transcript.getId() + " " + transcript.getGene().getGeneName()));
                transcript_g.appendChild(wrapLoc(exon_rect, exon));
                for (int side = 0; side < 2; ++side) {
                    final double x = pos2pixel.apply(side == 0 ? exon.getStart() : exon.getEnd());
                    final Element line = element("line");
                    exon_v.appendChild(line);
                    line.setAttribute("class", "exonline");
                    line.setAttribute("x1", format(x));
                    line.setAttribute("x2", format(x));
                    line.setAttribute("y2", format(y + transcript_height));
                    line.setAttribute("y1", format(prev_y));
                }
            }
        }
        y += transcript_height + 2;
    }
    maing.appendChild(exon_v);
    /* final frame */
    final Element frame_rect = element("rect");
    frame_rect.setAttribute("class", "frame");
    frame_rect.setAttribute("x", "0");
    frame_rect.setAttribute("y", "0");
    frame_rect.setAttribute("width", format(drawing_width));
    frame_rect.setAttribute("height", format(y));
    svgRoot.appendChild(frame_rect);
    svgRoot.setAttribute("width", format(drawing_width + 1));
    svgRoot.setAttribute("height", format(y + 1));
    try {
        final Transformer tr = TransformerFactory.newInstance().newTransformer();
        final String md5 = StringUtils.md5(interval.getContig() + ":" + interval.getStart() + ":" + interval.getEnd() + ":" + bamPath.toString());
        final String filename = md5.substring(0, 2) + File.separatorChar + md5.substring(2) + File.separator + interval.getContig() + "_" + interval.getStart() + "_" + interval.getEnd() + (StringUtils.isBlank(sampleName) ? "" : "." + sampleName.replaceAll("[/\\:]", "_")) + ".svg" + (this.compressed_svg ? ".gz" : "");
        if (this.compressed_svg) {
            try (final OutputStream pw = archive.openOuputStream(filename)) {
                try (GZIPOutputStream gzout = new GZIPOutputStream(pw)) {
                    tr.transform(new DOMSource(this.document), new StreamResult(gzout));
                    gzout.finish();
                    gzout.flush();
                }
                pw.flush();
            }
        } else {
            try (final PrintWriter pw = archive.openWriter(filename)) {
                tr.transform(new DOMSource(this.document), new StreamResult(pw));
                pw.flush();
            }
        }
        manifest.print(interval.getContig());
        manifest.print('\t');
        manifest.print(interval.getStart() - 1);
        manifest.print('\t');
        manifest.print(interval.getEnd());
        manifest.print('\t');
        manifest.print(bamPath.toString());
        manifest.print('\t');
        manifest.print(geneNames.isEmpty() ? "." : String.join(",", geneNames));
        manifest.print('\t');
        manifest.print(StringUtils.isBlank(sampleName) ? "." : sampleName);
        manifest.print('\t');
        manifest.print((archive.isTarOrZipArchive() ? "" : this.outputFile.toString() + File.separator) + filename);
        manifest.println();
    } catch (final Exception err) {
        throw new RuntimeException(err);
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) StreamResult(javax.xml.transform.stream.StreamResult) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Collection(java.util.Collection) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) CoordMath(htsjdk.samtools.util.CoordMath) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) DOMSource(javax.xml.transform.dom.DOMSource) Cigar(htsjdk.samtools.Cigar) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) Function(java.util.function.Function) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) OutputStream(java.io.OutputStream) Counter(com.github.lindenb.jvarkit.util.Counter) Locatable(htsjdk.samtools.util.Locatable) DecimalFormat(java.text.DecimalFormat) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) File(java.io.File) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) CigarElement(htsjdk.samtools.CigarElement) Element(org.w3c.dom.Element) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) Counter(com.github.lindenb.jvarkit.util.Counter) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) GZIPOutputStream(java.util.zip.GZIPOutputStream) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) List(java.util.List) ArrayList(java.util.ArrayList) PrintWriter(java.io.PrintWriter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) StreamResult(javax.xml.transform.stream.StreamResult) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 53 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class CoverageServer method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.image_width < 10) {
        LOG.error("low image width");
        return -1;
    }
    if (this.image_height < 10) {
        LOG.error("low image height");
        return -1;
    }
    if (this.images_per_row < 1) {
        LOG.error("low images_per_row");
        return -1;
    }
    if (this.extend_factor <= 0) {
        LOG.error("bad extend_factor " + this.extend_factor);
        return -1;
    }
    try {
        this.bamInput.addAll(IOUtils.unrollPaths(args).stream().map(F -> new BamInput(F)).collect(Collectors.toList()));
        if (this.bamInput.isEmpty()) {
            LOG.error("No BAM defined.");
            return -1;
        }
        this.dictionary = SequenceDictionaryUtils.extractRequired(this.faidxRef);
        for (final BamInput bi : this.bamInput) {
            final SamReaderFactory srf = SamReaderFactory.make().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidxRef);
            try (SamReader sr = srf.open(bi.bamPath)) {
                final SAMFileHeader header = sr.getFileHeader();
                SequenceUtil.assertSequenceDictionariesEqual(this.dictionary, SequenceDictionaryUtils.extractRequired(header));
                bi.sample = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bi.bamPath));
            }
        }
        if (this.pedigreePath != null) {
            this.pedigree = new PedigreeParser().parse(this.pedigreePath);
        }
        if (this.intervalsource != null) {
            final ContigNameConverter cvt = ContigNameConverter.fromOneDictionary(this.dictionary);
            final BedLineCodec codec = new BedLineCodec();
            try (BufferedReader br = IOUtils.openPathForBufferedReading(this.intervalsource)) {
                br.lines().filter(L -> !BedLine.isBedHeader(L)).map(L -> codec.decode(L)).filter(B -> B != null).map(B -> new ReviewedInterval(new SimpleInterval(B.getContig(), B.getStart(), B.getEnd()), B.getOrDefault(3, ""))).map(B -> {
                    final String ctg = cvt.apply(B.getContig());
                    if (StringUtils.isBlank(ctg))
                        return null;
                    if (ctg.equals(B.getContig()))
                        return B;
                    return new ReviewedInterval(new SimpleInterval(ctg, B.getStart(), B.getEnd()), B.getName());
                }).filter(B -> B != null).forEach(B -> named_intervals.add(B));
            }
        }
        this.intervalListProvider.dictionary(this.dictionary).skipUnknownContigs().stream().map(L -> new Interval(L)).forEach(B -> named_intervals.add(new ReviewedInterval(B, "")));
        final Server server = new Server(this.serverPort);
        final ServletContextHandler context = new ServletContextHandler();
        context.addServlet(new ServletHolder(new CoverageServlet()), "/*");
        context.setContextPath("/");
        context.setResourceBase(".");
        server.setHandler(context);
        LOG.info("Starting server " + getProgramName() + " on port " + this.serverPort);
        server.start();
        LOG.info("Server started. Press Ctrl-C to stop. Check your proxy settings ." + " Open a web browser at http://localhost:" + this.serverPort + "/coverage .");
        server.join();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : Color(java.awt.Color) ServletContextHandler(org.eclipse.jetty.servlet.ServletContextHandler) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) ServletException(javax.servlet.ServletException) Rectangle2D(java.awt.geom.Rectangle2D) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) RenderingHints(java.awt.RenderingHints) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) Vector(java.util.Vector) XMLStreamException(javax.xml.stream.XMLStreamException) ImageIO(javax.imageio.ImageIO) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) HttpStatus(org.eclipse.jetty.http.HttpStatus) Path(java.nio.file.Path) Server(org.eclipse.jetty.server.Server) CloserUtil(htsjdk.samtools.util.CloserUtil) Shape(java.awt.Shape) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) HttpServlet(javax.servlet.http.HttpServlet) Composite(java.awt.Composite) BufferedImage(java.awt.image.BufferedImage) Logger(com.github.lindenb.jvarkit.util.log.Logger) StandardOpenOption(java.nio.file.StandardOpenOption) ServletHolder(org.eclipse.jetty.servlet.ServletHolder) Set(java.util.Set) Collectors(java.util.stream.Collectors) GTFCodec(com.github.lindenb.jvarkit.util.bio.gtf.GTFCodec) SAMRecord(htsjdk.samtools.SAMRecord) DoubleStream(java.util.stream.DoubleStream) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) Stream(java.util.stream.Stream) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) ToDoubleFunction(java.util.function.ToDoubleFunction) BasicStroke(java.awt.BasicStroke) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) GeneralPath(java.awt.geom.GeneralPath) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IntStream(java.util.stream.IntStream) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) IterableAdapter(htsjdk.samtools.util.IterableAdapter) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) GTFLine(com.github.lindenb.jvarkit.util.bio.gtf.GTFLine) Interval(htsjdk.samtools.util.Interval) AlphaComposite(java.awt.AlphaComposite) HttpServletRequest(javax.servlet.http.HttpServletRequest) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Graphics2D(java.awt.Graphics2D) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) TabixReader(htsjdk.tribble.readers.TabixReader) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) StreamSupport(java.util.stream.StreamSupport) IntFunction(java.util.function.IntFunction) VCFConstants(htsjdk.variant.vcf.VCFConstants) Stroke(java.awt.Stroke) Line2D(java.awt.geom.Line2D) Counter(com.github.lindenb.jvarkit.util.Counter) AbstractIterator(htsjdk.samtools.util.AbstractIterator) Locatable(htsjdk.samtools.util.Locatable) IntToDoubleFunction(java.util.function.IntToDoubleFunction) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) HttpServletResponse(javax.servlet.http.HttpServletResponse) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) Pileup(com.github.lindenb.jvarkit.samtools.util.Pileup) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Sample(com.github.lindenb.jvarkit.pedigree.Sample) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Server(org.eclipse.jetty.server.Server) ServletHolder(org.eclipse.jetty.servlet.ServletHolder) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) SamReader(htsjdk.samtools.SamReader) BufferedReader(java.io.BufferedReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ServletContextHandler(org.eclipse.jetty.servlet.ServletContextHandler) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Interval(htsjdk.samtools.util.Interval)

Example 54 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class HtsFileServer method dumpData.

/**
 * dumpData
 */
private void dumpData(final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException {
    final String[] fileids = request.getParameterValues(KEY_FILEID);
    final List<AbstractInput> selected = new ArrayList<>();
    if (fileids != null) {
        for (final String fid : fileids) {
            final AbstractInput input = this.htsMap.get(fid);
            if (input != null)
                selected.add(input);
        }
    }
    // default: add all
    if (selected.isEmpty()) {
        selected.addAll(this.htsMap.values());
    }
    Locatable loc = null;
    /* parse user interval */
    final String intervalstr = request.getParameter(KEY_INTERVAL);
    if (!StringUtils.isBlank(intervalstr)) {
        loc = IntervalParserFactory.newInstance().dictionary(this.dictionary).make().apply(intervalstr).orElse(null);
        if (loc == null && this.gtfFile != null) {
            final ContigNameConverter cvt = ContigNameConverter.fromOneDictionary(this.dictionary);
            final String geneName = intervalstr.trim();
            final GTFCodec codec = new GTFCodec();
            try (BufferedReader br = IOUtils.openPathForBufferedReading(gtfFile)) {
                br.lines().map(line -> {
                    if (StringUtils.isBlank(line) || line.startsWith("#"))
                        return null;
                    final String[] tokens = CharSplitter.TAB.split(line);
                    if (tokens.length < 9)
                        return null;
                    if (!(tokens[2].equals("gene") || tokens[2].equals("transcript")))
                        return null;
                    if (StringUtils.indexOfIgnoreCase(tokens[8], geneName) == -1)
                        return null;
                    final GTFLine gtfLine = codec.decode(line);
                    if (gtfLine == null)
                        return null;
                    if (tokens[2].equals("gene")) {
                        if (!(geneName.equals(gtfLine.getAttribute("gene_id")) || geneName.equals(gtfLine.getAttribute("gene_name"))))
                            return null;
                    } else if (tokens[2].equals("transcript")) {
                        if (!(geneName.equals(gtfLine.getAttribute("transcript_id"))))
                            return null;
                    }
                    final String ctg = cvt.apply(gtfLine.getContig());
                    if (StringUtils.isBlank(ctg))
                        return null;
                    return new SimpleInterval(ctg, gtfLine.getStart(), gtfLine.getEnd());
                }).filter(R -> R != null).findFirst().orElse(null);
            }
        }
        if (loc == null)
            loc = new SimpleInterval("undefined_interval", 1, 1);
    }
    String prefix = StringUtils.now() + ".";
    if (loc != null) {
        prefix += loc.getContig() + "_" + loc.getStart() + "_" + loc.getEnd() + ".";
    }
    final CRAMReferenceSource referenceSource = (selected.stream().anyMatch(I -> I instanceof BamInput) ? new ReferenceSource(faidxRef) : null);
    try (PrintStream out = new PrintStream(response.getOutputStream())) {
        final String charset = StringUtils.ifBlank(request.getCharacterEncoding(), "UTF-8");
        response.setCharacterEncoding(charset);
        if (selected.size() == 1) {
            final AbstractInput first = selected.get(0);
            final String fname = prefix + first.getOutputName();
            response.addHeader("Content-Type", first.getContentType());
            response.addHeader("Content-Disposition", "attachment; name=\"" + fname + "\"; filename=\"" + fname + "\"");
            response.setContentType("data/binary; charset=" + charset.toLowerCase());
            first.dump(loc, out, referenceSource);
        } else {
            final String fname = prefix + HtsFileServer.class.getSimpleName() + ".zip";
            response.addHeader("Content-Type", "application/zip");
            response.addHeader("Content-Disposition", "attachment; name=\"" + fname + "\"; filename=\"" + fname + "\"");
            response.setContentType("data/binary; charset=" + charset.toLowerCase());
            final ZipOutputStream zout = new ZipOutputStream(out, Charset.forName(charset));
            zout.setLevel(0);
            for (AbstractInput input : selected) {
                final ZipEntry zipEntry = new ZipEntry(prefix + input.getOutputName());
                zout.putNextEntry(zipEntry);
                OutputStream uos = IOUtils.uncloseableOutputStream(zout);
                input.dump(loc, uos, referenceSource);
                zout.closeEntry();
                if (out.checkError())
                    break;
            }
            zout.finish();
        }
        out.flush();
    }
}
Also used : ServletContextHandler(org.eclipse.jetty.servlet.ServletContextHandler) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) ServletException(javax.servlet.ServletException) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) CRAMReferenceSource(htsjdk.samtools.cram.ref.CRAMReferenceSource) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) Map(java.util.Map) XMLStreamException(javax.xml.stream.XMLStreamException) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) Server(org.eclipse.jetty.server.Server) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) HttpServlet(javax.servlet.http.HttpServlet) Logger(com.github.lindenb.jvarkit.util.log.Logger) ServletHolder(org.eclipse.jetty.servlet.ServletHolder) Set(java.util.Set) SAMFileWriter(htsjdk.samtools.SAMFileWriter) GTFCodec(com.github.lindenb.jvarkit.util.bio.gtf.GTFCodec) SAMRecord(htsjdk.samtools.SAMRecord) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ZipOutputStream(java.util.zip.ZipOutputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) HashMap(java.util.HashMap) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ReferenceSource(htsjdk.samtools.cram.ref.ReferenceSource) GTFLine(com.github.lindenb.jvarkit.util.bio.gtf.GTFLine) HttpServletRequest(javax.servlet.http.HttpServletRequest) Charset(java.nio.charset.Charset) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) TabixReader(htsjdk.tribble.readers.TabixReader) OutputStream(java.io.OutputStream) PrintStream(java.io.PrintStream) Locatable(htsjdk.samtools.util.Locatable) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) HttpServletResponse(javax.servlet.http.HttpServletResponse) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) PrintStream(java.io.PrintStream) ZipEntry(java.util.zip.ZipEntry) ZipOutputStream(java.util.zip.ZipOutputStream) BlockCompressedOutputStream(htsjdk.samtools.util.BlockCompressedOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) GTFCodec(com.github.lindenb.jvarkit.util.bio.gtf.GTFCodec) CRAMReferenceSource(htsjdk.samtools.cram.ref.CRAMReferenceSource) ReferenceSource(htsjdk.samtools.cram.ref.ReferenceSource) CRAMReferenceSource(htsjdk.samtools.cram.ref.CRAMReferenceSource) ZipOutputStream(java.util.zip.ZipOutputStream) BufferedReader(java.io.BufferedReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) GTFLine(com.github.lindenb.jvarkit.util.bio.gtf.GTFLine) Locatable(htsjdk.samtools.util.Locatable)

Example 55 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class SetFileTools method sortAndMerge.

private List<Locatable> sortAndMerge(final List<? extends Locatable> L0) {
    final List<Locatable> L = new ArrayList<>(L0);
    // merge overlapping
    Collections.sort(L, theSorter);
    int i = 0;
    while (i + 1 < L.size()) {
        final Locatable xi = L.get(i);
        final Locatable xj = L.get(i + 1);
        if (xi.overlaps(xj)) {
            L.set(i, new SimpleInterval(xi.getContig(), Math.min(xi.getStart(), xj.getStart()), Math.max(xi.getEnd(), xj.getEnd())));
            L.remove(i + 1);
        } else {
            i++;
        }
    }
    return L;
}
Also used : ArrayList(java.util.ArrayList) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)71 ArrayList (java.util.ArrayList)49 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)47 List (java.util.List)47 Locatable (htsjdk.samtools.util.Locatable)46 Path (java.nio.file.Path)46 Parameter (com.beust.jcommander.Parameter)43 Program (com.github.lindenb.jvarkit.util.jcommander.Program)43 Logger (com.github.lindenb.jvarkit.util.log.Logger)43 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)39 Collectors (java.util.stream.Collectors)38 Set (java.util.Set)37 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)36 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)35 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)35 SAMFileHeader (htsjdk.samtools.SAMFileHeader)34 CloserUtil (htsjdk.samtools.util.CloserUtil)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)33 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)32 SamReader (htsjdk.samtools.SamReader)32