Search in sources :

Example 6 with DiscreteMedian

use of com.github.lindenb.jvarkit.math.DiscreteMedian in project jvarkit by lindenb.

the class CoveragePlotter method doWork.

@Override
public int doWork(final List<String> args) {
    ArchiveFactory archive = null;
    PrintWriter manifest = null;
    try {
        if (extend < 1.0) {
            LOG.error("extend is lower than 1 :" + this.extend);
            return -1;
        }
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.refPath);
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(CoveragePlotter.this.refPath).validationStringency(ValidationStringency.LENIENT);
        final List<Path> inputBams = IOUtils.unrollPaths(this.bamsPath);
        if (inputBams.isEmpty()) {
            LOG.error("input bam file missing.");
            return -1;
        }
        Iterator<? extends Locatable> iter;
        final String input = oneFileOrNull(args);
        if (input == null) {
            final BedLineCodec codec = new BedLineCodec();
            final LineIterator liter = new LineIterator(stdin());
            iter = new AbstractIterator<Locatable>() {

                @Override
                protected Locatable advance() {
                    while (liter.hasNext()) {
                        final String line = liter.next();
                        final BedLine bed = codec.decode(line);
                        if (bed == null) {
                            continue;
                        }
                        return bed;
                    }
                    liter.close();
                    return null;
                }
            };
        } else {
            iter = IntervalListProvider.from(input).dictionary(dict).stream().iterator();
        }
        final BufferedImage image = new BufferedImage(this.dimension.width, this.dimension.height, BufferedImage.TYPE_INT_ARGB);
        final BufferedImage offscreen = new BufferedImage(this.dimension.width, this.dimension.height, BufferedImage.TYPE_INT_ARGB);
        final double y_mid = this.dimension.getHeight() / 2.0;
        final ToDoubleFunction<Double> normToPixelY = NORM -> this.dimension.getHeight() - NORM * y_mid;
        final DiscreteMedian<Integer> discreteMedian = new DiscreteMedian<>();
        manifest = (this.manifestPath == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.manifestPath));
        archive = ArchiveFactory.open(this.outputFile);
        while (iter.hasNext()) {
            final Locatable the_locatable = iter.next();
            String label = "";
            if (the_locatable instanceof BedLine) {
                final BedLine bedline = BedLine.class.cast(the_locatable);
                label = bedline.getOrDefault(3, label);
            }
            final SimpleInterval rawRegion = new SimpleInterval(the_locatable);
            final SimpleInterval extendedRegion;
            /* extend interval */
            if (this.extend > 1) {
                final int L1 = rawRegion.getLengthOnReference();
                final int L2 = (int) Math.ceil(L1 * this.extend);
                final int mid = rawRegion.getCenter().getPosition();
                int x0 = mid - L2 / 2;
                if (x0 < 0)
                    x0 = 1;
                int x1 = mid + L2 / 2;
                final SAMSequenceRecord ssr = dict.getSequence(rawRegion.getContig());
                if (ssr != null)
                    x1 = Math.min(ssr.getSequenceLength(), x1);
                if (x0 > x1)
                    continue;
                extendedRegion = new SimpleInterval(rawRegion.getContig(), x0, x1);
            } else {
                extendedRegion = rawRegion;
            }
            final ToDoubleFunction<Integer> pos2pixel = POS -> (POS - extendedRegion.getStart()) / (double) extendedRegion.getLengthOnReference() * this.dimension.getWidth();
            final String theGeneName = this.getGenes(extendedRegion).filter(G -> G.getType().equals("gene")).map(G -> G.getAttribute("gene_name")).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(null);
            final String outputFilename = this.prefix + extendedRegion.getContig() + "_" + extendedRegion.getStart() + "_" + extendedRegion.getEnd() + (StringUtils.isBlank(label) ? "" : "." + label.replaceAll("[^A-Za-z\\-\\.0-9]+", "_")) + (StringUtils.isBlank(theGeneName) || !StringUtils.isBlank(label) ? "" : "." + theGeneName.replaceAll("[^A-Za-z\\-\\.0-9]+", "_")) + ".png";
            final Graphics2D g2 = offscreen.createGraphics();
            g2.setColor(Color.BLACK);
            g2.setComposite(AlphaComposite.getInstance(AlphaComposite.CLEAR));
            g2.fillRect(0, 0, this.dimension.width, this.dimension.height);
            g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER));
            g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
            g2.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
            final Graphics2D g = image.createGraphics();
            g.setColor(Color.WHITE);
            g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
            g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
            g.fillRect(0, 0, this.dimension.width, this.dimension.height);
            // draw exons
            getGenes(extendedRegion).filter(G -> G.getType().equals("exon")).forEach(EX -> {
                g.setColor(new Color(240, 240, 240));
                double x1 = pos2pixel.applyAsDouble(EX.getStart());
                double x2 = pos2pixel.applyAsDouble(EX.getEnd());
                g.fill(new Rectangle2D.Double(x1, 0, (x2 - x1), this.dimension.height));
            });
            int y = (int) (this.dimension.height / 2.0);
            g.setColor(Color.BLUE);
            g.drawLine(0, y, image.getWidth(), y);
            y = (int) (this.dimension.height / 4.0);
            g.setColor(Color.CYAN);
            g.drawLine(0, y, image.getWidth(), y);
            y = (int) (3.0 * this.dimension.height / 4.0);
            g.drawLine(0, y, image.getWidth(), y);
            g.setColor(Color.DARK_GRAY);
            g.drawRect(0, 0, this.dimension.width - 1, this.dimension.height - 1);
            drawGenes(g, new Rectangle(0, 0, image.getWidth(), image.getHeight()), extendedRegion);
            drawKnownCnv(g, new Rectangle(0, 0, image.getWidth(), image.getHeight()), extendedRegion);
            if (this.extend > 1) {
                g.setColor(Color.GREEN);
                int x = (int) pos2pixel.applyAsDouble(rawRegion.getStart());
                g.drawLine(x, 0, x, image.getHeight());
                x = (int) pos2pixel.applyAsDouble(rawRegion.getEnd());
                g.drawLine(x, 0, x, image.getHeight());
            }
            final int[] depth = new int[extendedRegion.getLengthOnReference()];
            final int[] copy = new int[depth.length];
            final BitSet blackListedPositions = new BitSet(depth.length);
            final Map<String, Point2D> sample2maxPoint = new HashMap<>(inputBams.size());
            boolean drawAbove = false;
            // fill black listed regions
            if (this.blackListedPath != null) {
                try (TabixReader tbr = new TabixReader(this.blackListedPath.toString())) {
                    final ContigNameConverter cvt = ContigNameConverter.fromContigSet(tbr.getChromosomes());
                    final String ctg = cvt.apply(extendedRegion.getContig());
                    if (!StringUtils.isBlank(ctg)) {
                        final BedLineCodec codec = new BedLineCodec();
                        final TabixReader.Iterator tbxr = tbr.query(ctg, extendedRegion.getStart(), extendedRegion.getEnd());
                        for (; ; ) {
                            final String line = tbxr.next();
                            if (line == null)
                                break;
                            final BedLine bed = codec.decode(line);
                            if (bed == null)
                                continue;
                            int p1 = Math.max(bed.getStart(), extendedRegion.getStart());
                            while (p1 <= extendedRegion.getEnd() && p1 <= bed.getEnd()) {
                                blackListedPositions.set(p1 - extendedRegion.getStart());
                                ++p1;
                            }
                        }
                    }
                } catch (Throwable err) {
                    LOG.warn(err);
                }
            }
            if (this.skip_original_interval_for_median && this.extend > 1) {
                for (int i = the_locatable.getStart(); i <= the_locatable.getEnd(); i++) {
                    blackListedPositions.set(i - extendedRegion.getStart());
                }
            }
            final Set<String> sampleNames = new TreeSet<>();
            for (final Path path : inputBams) {
                try (SamReader sr = samReaderFactory.open(path)) {
                    final SAMFileHeader header = sr.getFileHeader();
                    final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
                    sampleNames.add(sample);
                    /* sample has deletion */
                    int count_has_dp_le_0_5 = 0;
                    /* sample has dup */
                    int count_has_dp_ge_1_5 = 0;
                    /* longest arc */
                    int longest_arc = 0;
                    SequenceUtil.assertSequenceDictionariesEqual(dict, header.getSequenceDictionary());
                    Arrays.fill(depth, 0);
                    try (CloseableIterator<SAMRecord> siter = sr.queryOverlapping(extendedRegion.getContig(), extendedRegion.getStart(), extendedRegion.getEnd())) {
                        while (siter.hasNext()) {
                            final SAMRecord rec = siter.next();
                            if (rec.getReadUnmappedFlag())
                                continue;
                            if (!SAMRecordDefaultFilter.accept(rec, this.min_mapq))
                                continue;
                            int ref = rec.getStart();
                            final Cigar cigar = rec.getCigar();
                            if (cigar == null)
                                continue;
                            if (this.alpha_arc > 0.0 && rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && !rec.getProperPairFlag() && rec.getReferenceIndex().equals(rec.getMateReferenceIndex()) && (!this.arc_only_rr_ff || (rec.getReadNegativeStrandFlag() == rec.getMateNegativeStrandFlag()))) {
                                longest_arc = Math.max(longest_arc, Math.abs(rec.getMateAlignmentStart() - rec.getAlignmentStart()));
                                final double xstart = pos2pixel.applyAsDouble(rec.getAlignmentStart());
                                final double xend = pos2pixel.applyAsDouble(rec.getMateAlignmentStart());
                                final double len = (xend - xstart);
                                if (Math.abs(len) > this.min_invert && Math.abs(len) < this.max_invert) {
                                    final double y2 = y_mid + (drawAbove ? -1 : 1) * Math.min(y_mid, Math.abs(len / 2.0));
                                    final Composite oldComposite = g2.getComposite();
                                    g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, (float) this.alpha_arc));
                                    g2.setColor(getColor("arc", Color.ORANGE));
                                    final GeneralPath curve = new GeneralPath();
                                    curve.moveTo(xstart, y_mid);
                                    curve.quadTo(xstart + len / 2.0, y2, xend, y_mid);
                                    g2.draw(curve);
                                    g2.setComposite(oldComposite);
                                    drawAbove = !drawAbove;
                                }
                            }
                            for (CigarElement ce : cigar) {
                                final CigarOperator op = ce.getOperator();
                                final int len = ce.getLength();
                                if (op.consumesReferenceBases()) {
                                    if (op.consumesReadBases()) {
                                        for (int i = 0; i < len; i++) {
                                            final int pos = ref + i;
                                            if (pos < extendedRegion.getStart())
                                                continue;
                                            if (pos > extendedRegion.getEnd())
                                                break;
                                            depth[pos - extendedRegion.getStart()]++;
                                        }
                                    }
                                    ref += len;
                                }
                            }
                        }
                    // loop cigar
                    }
                    if (extendedRegion.getLengthOnReference() > image.getWidth() && extendedRegion.getLengthOnReference() > this.window_smooth_size) {
                        // smooth
                        final int bases_per_pixel = this.window_smooth_size;
                        System.arraycopy(depth, 0, copy, 0, depth.length);
                        for (int i = 0; i < depth.length && bases_per_pixel > 1; i++) {
                            double t = 0;
                            int count = 0;
                            for (int j = Math.max(0, i - bases_per_pixel); j <= i + bases_per_pixel && j < depth.length; j++) {
                                t += copy[j];
                                count++;
                            }
                            if (count == 0)
                                continue;
                            depth[i] = (int) (t / count);
                        }
                    }
                    discreteMedian.clear();
                    for (int i = 0; i < depth.length; i++) {
                        if (depth[i] > this.max_depth)
                            continue;
                        if (blackListedPositions.get(i))
                            continue;
                        discreteMedian.add(depth[i]);
                    }
                    final double median = discreteMedian.getMedian().orElse(0);
                    if (median <= 0) {
                        final String msg = "Skipping " + sample + " " + extendedRegion + " because median is 0";
                        LOG.warning(msg);
                        manifest.println("#" + msg);
                        continue;
                    }
                    Point2D max_position = null;
                    double max_distance_to_1 = 0.0;
                    final GeneralPath line = new GeneralPath();
                    for (int x = 0; x < image.getWidth(); x++) {
                        discreteMedian.clear();
                        int pos1 = (int) Math.floor(((x + 0) / (double) image.getWidth()) * depth.length);
                        final int pos2 = (int) Math.ceil(((x + 0) / (double) image.getWidth()) * depth.length);
                        while (pos1 <= pos2 && pos1 < depth.length) {
                            discreteMedian.add(depth[pos1]);
                            pos1++;
                        }
                        final double average = discreteMedian.getMedian().orElse(0);
                        final double normDepth = (average / median);
                        if (normDepth <= 0.5)
                            count_has_dp_le_0_5++;
                        if (normDepth >= 1.5)
                            count_has_dp_ge_1_5++;
                        final double y2 = normToPixelY.applyAsDouble(normDepth);
                        double distance_to_1 = Math.abs(normDepth - 1.0);
                        if (distance_to_1 > 0.3 && (max_position == null || distance_to_1 > max_distance_to_1)) {
                            max_distance_to_1 = distance_to_1;
                            max_position = new Point2D.Double(x, y2);
                        }
                        if (x == 0) {
                            line.moveTo(x, y2);
                        } else {
                            line.lineTo(x, y2);
                        }
                    }
                    g.setColor(max_distance_to_1 < 0.1 ? Color.lightGray : Color.GRAY);
                    final Stroke oldStroke = g.getStroke();
                    final Composite oldComposite = g.getComposite();
                    g.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, (float) this.alpha));
                    g.setStroke(new BasicStroke(0.5f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_ROUND));
                    g.draw(line);
                    g.setStroke(oldStroke);
                    g.setComposite(oldComposite);
                    if (max_position != null)
                        sample2maxPoint.put(sample, max_position);
                    // fill manifest
                    manifest.print(rawRegion.getContig());
                    manifest.print("\t");
                    manifest.print(rawRegion.getStart() - 1);
                    manifest.print("\t");
                    manifest.print(rawRegion.getEnd());
                    manifest.print("\t");
                    manifest.print(extendedRegion.getStart() - 1);
                    manifest.print("\t");
                    manifest.print(extendedRegion.getEnd());
                    manifest.print("\t");
                    manifest.print(outputFilename);
                    manifest.print("\t");
                    manifest.print(StringUtils.isBlank(theGeneName) ? "." : theGeneName);
                    manifest.print("\t");
                    manifest.print(sample);
                    manifest.print("\t");
                    manifest.print(count_has_dp_le_0_5);
                    manifest.print("\t");
                    manifest.print((int) ((count_has_dp_le_0_5 / (double) extendedRegion.getLengthOnReference()) * 100.0));
                    manifest.print("\t");
                    manifest.print(count_has_dp_ge_1_5);
                    manifest.print("\t");
                    manifest.print((int) ((count_has_dp_ge_1_5 / (double) extendedRegion.getLengthOnReference()) * 100.0));
                    manifest.print("\t");
                    manifest.print(median);
                    manifest.print("\t");
                    manifest.print(discreteMedian.getStandardDeviation().orElse(-99999));
                    manifest.print("\t");
                    manifest.print(longest_arc);
                    manifest.println();
                }
            // end loop over samples
            }
            g2.dispose();
            g.drawImage(offscreen, 0, 0, null);
            g.setColor(Color.BLACK);
            final String label_samples = sampleNames.size() > 10 ? "N=" + StringUtils.niceInt(sampleNames.size()) : String.join(";", sampleNames);
            final Set<String> all_genes = getGenes(extendedRegion).filter(G -> G.getType().equals("gene")).map(G -> G.getAttribute("gene_name")).filter(F -> !StringUtils.isBlank(F)).collect(Collectors.toCollection(TreeSet::new));
            g.drawString(extendedRegion.toNiceString() + " Length:" + StringUtils.niceInt(extendedRegion.getLengthOnReference()) + " Sample(s):" + label_samples + " " + label + " Gene(s):" + (all_genes.size() > 20 ? "N=" + StringUtils.niceInt(all_genes.size()) : String.join(";", all_genes)), 10, 10);
            if (!sample2maxPoint.isEmpty()) {
                /**
                 * draw sample names
                 */
                g.setColor(getColor("sample", Color.BLUE));
                final int sampleFontSize = 7;
                final Font oldFont = g.getFont();
                g.setFont(new Font(oldFont.getName(), Font.PLAIN, sampleFontSize));
                for (final String sample : sample2maxPoint.keySet()) {
                    final Point2D pt = sample2maxPoint.get(sample);
                    double sny = pt.getY();
                    if (sny > y_mid)
                        sny += sampleFontSize;
                    g.drawString(sample, (int) pt.getX(), (int) Math.min(this.dimension.height - sampleFontSize, Math.max(sampleFontSize, sny)));
                }
                g.setFont(oldFont);
            }
            g.dispose();
            try (OutputStream out = archive.openOuputStream(outputFilename)) {
                ImageIO.write(image, "PNG", out);
                out.flush();
            }
        }
        // end while iter
        archive.close();
        archive = null;
        manifest.flush();
        manifest.close();
        manifest = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(manifest);
        CloserUtil.close(archive);
    }
}
Also used : Color(java.awt.Color) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Point2D(java.awt.geom.Point2D) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Rectangle2D(java.awt.geom.Rectangle2D) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) RenderingHints(java.awt.RenderingHints) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) DimensionConverter(com.github.lindenb.jvarkit.jcommander.converter.DimensionConverter) Map(java.util.Map) ImageIO(javax.imageio.ImageIO) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Composite(java.awt.Composite) BufferedImage(java.awt.image.BufferedImage) Font(java.awt.Font) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) GTFCodec(com.github.lindenb.jvarkit.util.bio.gtf.GTFCodec) SAMRecord(htsjdk.samtools.SAMRecord) Dimension(java.awt.Dimension) List(java.util.List) Stream(java.util.stream.Stream) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) ToDoubleFunction(java.util.function.ToDoubleFunction) ColorUtils(com.github.lindenb.jvarkit.util.swing.ColorUtils) BasicStroke(java.awt.BasicStroke) GeneralPath(java.awt.geom.GeneralPath) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Rectangle(java.awt.Rectangle) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) IterableAdapter(htsjdk.samtools.util.IterableAdapter) HashMap(java.util.HashMap) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) GTFLine(com.github.lindenb.jvarkit.util.bio.gtf.GTFLine) Interval(htsjdk.samtools.util.Interval) AlphaComposite(java.awt.AlphaComposite) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Graphics2D(java.awt.Graphics2D) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) TabixReader(htsjdk.tribble.readers.TabixReader) StreamSupport(java.util.stream.StreamSupport) GraphicsState(com.github.lindenb.jvarkit.swing.GraphicsState) VCFConstants(htsjdk.variant.vcf.VCFConstants) Stroke(java.awt.Stroke) OutputStream(java.io.OutputStream) Line2D(java.awt.geom.Line2D) AbstractIterator(htsjdk.samtools.util.AbstractIterator) Locatable(htsjdk.samtools.util.Locatable) IntToDoubleFunction(java.util.function.IntToDoubleFunction) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) Pileup(com.github.lindenb.jvarkit.samtools.util.Pileup) DynamicParameter(com.beust.jcommander.DynamicParameter) BitSet(java.util.BitSet) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) Rectangle(java.awt.Rectangle) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Point2D(java.awt.geom.Point2D) TreeSet(java.util.TreeSet) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Color(java.awt.Color) BitSet(java.util.BitSet) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Locatable(htsjdk.samtools.util.Locatable) BasicStroke(java.awt.BasicStroke) GeneralPath(java.awt.geom.GeneralPath) TabixReader(htsjdk.tribble.readers.TabixReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) BufferedImage(java.awt.image.BufferedImage) Font(java.awt.Font) SamReader(htsjdk.samtools.SamReader) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) GeneralPath(java.awt.geom.GeneralPath) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) BasicStroke(java.awt.BasicStroke) Stroke(java.awt.Stroke) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Composite(java.awt.Composite) AlphaComposite(java.awt.AlphaComposite) Rectangle2D(java.awt.geom.Rectangle2D) Graphics2D(java.awt.Graphics2D) Cigar(htsjdk.samtools.Cigar)

Example 7 with DiscreteMedian

use of com.github.lindenb.jvarkit.math.DiscreteMedian in project jvarkit by lindenb.

the class WGSCoveragePlotter method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        if (!StringUtils.isBlank(this.skipContigExpr) && !StringUtils.isBlank(this.includeContigExpr)) {
            LOG.error("Both include/exclude patterns are not blank.");
            return -1;
        }
        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(true);
        final DocumentBuilder db = dbf.newDocumentBuilder();
        this.document = db.newDocument();
        final Element svgRoot = element("svg");
        this.document.appendChild(svgRoot);
        svgRoot.setAttribute("width", format(dimension.width));
        svgRoot.setAttribute("height", format(dimension.height));
        final Element maintitle = element("title");
        svgRoot.appendChild(maintitle);
        final Element descr = element("desc");
        svgRoot.appendChild(descr);
        descr.appendChild(text("Made with " + getProgramName() + " " + getVersion() + " Author: Pierre Lindenbaum"));
        final Element style = element("style");
        svgRoot.appendChild(style);
        style.appendChild(text("g.maing {stroke:black;stroke-width:0.5px;fill:none;}\n" + "text.title {stroke:none;fill:black;stroke-width:1px;text-anchor:middle;}\n" + "rect.frame {stroke:darkgray;fill:none;stroke-width:0.5px;}\n" + "text.ruler {stroke:none;fill:darkgray;stroke-width:1px;text-anchor:end;}\n" + "text.chromName {stroke:none;fill:darkgray;stroke-width:1px;text-anchor:middle;}\n" + "text.xLabel {stroke:none;fill:black;stroke-width:1px;text-anchor:middle;}\n" + "text.yLabel {stroke:none;fill:black;stroke-width:1px;text-anchor:middle;}\n" + ".cov0 {stroke:" + dynaParams.getOrDefault("stroke-cov0", plot_using_points ? "slategray" : "gray") + ";fill:" + dynaParams.getOrDefault("fill-cov0", "antiquewhite") + ";stroke-width:0.5px;}\n" + ".cov1 {stroke:" + dynaParams.getOrDefault("stroke-cov1", plot_using_points ? "#61666B" : "gray") + ";fill:" + dynaParams.getOrDefault("fill-cov0", "beige") + ";stroke-width:0.5px;}\n" + "rect.average {stroke:green;fill:green;stroke-width:0.5px;opacity:" + dynaParams.getOrDefault("average-opacity", "0.3") + ";}\n" + "rect.median {stroke:red;fill:red;stroke-width:0.5px;opacity:" + dynaParams.getOrDefault("median-opacity", "0.3") + ";}\n" + "line.ruler {stroke:darkgray;stroke-dasharray:4;fill:none;stroke-width:1;}\n" + ""));
        if (this.plot_using_points) {
            final int dz = Integer.parseInt(dynaParams.getOrDefault("point-size", "5"));
            final Element defsE = element("defs");
            svgRoot.appendChild(defsE);
            final Element g = element("g");
            defsE.appendChild(g);
            g.setAttribute("id", "cross");
            Element L = element("line");
            L.setAttribute("x1", format(-dz));
            L.setAttribute("x2", format(dz));
            L.setAttribute("y1", format(0));
            L.setAttribute("y2", format(0));
            g.appendChild(L);
            L = element("line");
            L.setAttribute("x1", format(0));
            L.setAttribute("x2", format(0));
            L.setAttribute("y1", format(-dz));
            L.setAttribute("y2", format(dz));
            g.appendChild(L);
        }
        final Element mainG = element("g");
        mainG.setAttribute("class", "maing");
        svgRoot.appendChild(mainG);
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.refPath);
        final List<ChromInfo> chromInfos = dict.getSequences().stream().filter(SR -> SR.getSequenceLength() >= this.min_contig_length).filter(SR -> StringUtils.isBlank(this.skipContigExpr) || !SR.getSequenceName().matches(this.skipContigExpr)).filter(SR -> StringUtils.isBlank(this.includeContigExpr) || SR.getSequenceName().matches(this.includeContigExpr)).map(SR -> new ChromInfo(SR)).collect(Collectors.toList());
        if (chromInfos.isEmpty()) {
            LOG.info("no valid chromosome was found in " + this.refPath);
            return -1;
        }
        final double pixelsBetweenChromosomes = Double.parseDouble(dynaParams.getOrDefault("distanceBetweenChromosomes", "1"));
        final double marginLeft = Double.parseDouble(dynaParams.getOrDefault("margin-left", "100"));
        final double marginRight = Double.parseDouble(dynaParams.getOrDefault("margin-right", "10"));
        final double marginTop = Double.parseDouble(dynaParams.getOrDefault("margin-top", "80"));
        final double marginBottom = Double.parseDouble(dynaParams.getOrDefault("margin-bottom", "50"));
        final double drawingWidth = this.dimension.width - (marginLeft + marginRight);
        final double drawingHeight = this.dimension.height - (marginTop + marginBottom);
        if (drawingWidth <= 0) {
            LOG.error("Bad image dimension");
            return -1;
        }
        final Element g_chroms = element("g");
        g_chroms.setAttribute("transform", "translate(" + marginLeft + "," + marginTop + ")");
        g_chroms.setAttribute("id", "chromosomes");
        mainG.appendChild(g_chroms);
        final long genomeLength = chromInfos.stream().mapToLong(CI -> CI.ssr.getSequenceLength()).sum();
        if (genomeLength <= 0L) {
            throw new IllegalStateException("genome-length<=0 :" + genomeLength);
        }
        final double pixelsPerBase = (drawingWidth - (pixelsBetweenChromosomes * (chromInfos.size() - 1))) / (double) genomeLength;
        if (pixelsPerBase <= 0) {
            throw new IllegalStateException("pixel per base <=0 with drawing-width:" + drawingWidth + " distance-between-contigs:" + pixelsBetweenChromosomes + " n-chromosomes:" + chromInfos.size() + " genome-length:" + genomeLength);
        }
        double x = marginRight;
        for (int i = 0; i < chromInfos.size(); i++) {
            final ChromInfo ci = chromInfos.get(i);
            ci.idx = i;
            ci.x = x;
            ci.width = ci.ssr.getSequenceLength() * pixelsPerBase;
            x = (ci.x + ci.width) + pixelsBetweenChromosomes;
        }
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(this.refPath).validationStringency(ValidationStringency.LENIENT);
        final Path input = Paths.get(oneAndOnlyOneFile(args));
        maintitle.appendChild(text(input.getFileName().toString()));
        try (SamReader sr = samReaderFactory.open(input)) {
            if (!sr.hasIndex()) {
                LOG.error("input bam " + input + " is not indexed.");
                return -1;
            }
            final SAMFileHeader header = sr.getFileHeader();
            final Set<String> limitRGIds;
            if (StringUtils.isBlank(this.limitSamples)) {
                limitRGIds = null;
            } else {
                final Set<String> sampleSet = Arrays.stream(this.limitSamples.split("[ ,]+")).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet());
                limitRGIds = header.getReadGroups().stream().filter(RG -> sampleSet.contains(this.groupBy.apply(RG, ""))).map(RG -> RG.getId()).collect(Collectors.toSet());
                if (limitRGIds.isEmpty()) {
                    LOG.error("samples are specified " + groupBy.name() + ":(" + this.limitSamples + ") but I cannot find any in the read groups of " + input);
                    return -1;
                }
            }
            final SamRecordFilter samReadFilter = new SamRecordFilter() {

                @Override
                public boolean filterOut(final SAMRecord rec) {
                    if (!SAMRecordDefaultFilter.accept(rec, min_mapq))
                        return true;
                    if (limitRGIds != null) {
                        final SAMReadGroupRecord rg = rec.getReadGroup();
                        if (rg == null)
                            return true;
                        if (!limitRGIds.contains(rg.getId()))
                            return true;
                    }
                    return false;
                }

                @Override
                public boolean filterOut(SAMRecord first, SAMRecord second) {
                    return filterOut(first) && filterOut(second);
                }
            };
            // title is path + sample(s)
            final Element g_label = element("text", input.getFileName().toString() + " " + header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toCollection(TreeSet::new)).stream().collect(Collectors.joining(" ")));
            mainG.appendChild(g_label);
            g_label.setAttribute("class", "title");
            g_label.setAttribute("x", format(marginLeft + drawingWidth / 2.0));
            g_label.setAttribute("y", format(marginTop / 2.0));
            g_label.appendChild(element("title", input.toString()));
            SequenceUtil.assertSequenceDictionariesEqual(dict, SequenceDictionaryUtils.extractRequired(header));
            if (this.max_depth == -1) {
                LOG.info("Computing " + this.percentile.name() + " depth...");
                final ProgressFactory.Watcher<SAMSequenceRecord> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
                final DiscreteMedian<Integer> median = new DiscreteMedian<>();
                for (final ChromInfo ci : chromInfos) {
                    int[] coverage = getContigCoverage(sr, progress.apply(ci.ssr), samReadFilter);
                    for (int i = 0; i < coverage.length; i++) {
                        median.add(coverage[i]);
                    }
                    coverage = null;
                    System.gc();
                }
                progress.close();
                final DiscreteMedian.Tendency t;
                if (this.percentile.equals(DiscreteMedian.Tendency.median)) {
                    t = this.percentile;
                } else {
                    // if percentile is min, everything will be at 0...
                    t = DiscreteMedian.Tendency.average;
                }
                max_depth = median.isEmpty() ? 0 : (int) (Double.parseDouble(dynaParams.getOrDefault("factor-y", "2.0")) * median.getTendency(t).orElse(0.0));
                if (max_depth <= 0)
                    max_depth = 1;
                LOG.info("Now using max depth for y axis=" + this.max_depth);
            }
            if (max_depth < 1) {
                LOG.error("Bad user 'max-depth':" + max_depth);
                return -1;
            }
            final ProgressFactory.Watcher<SAMSequenceRecord> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
            for (final ChromInfo ci : chromInfos) {
                progress.apply(ci.ssr);
                int[] coverage = getContigCoverage(sr, ci.ssr, samReadFilter);
                final Element g = element("g");
                g.setAttribute("transform", "translate(" + ci.x + ",0)");
                g.setAttribute("id", "chrom-" + ci.ssr.getSequenceName());
                g_chroms.appendChild(g);
                final DiscreteMedian<Integer> ctg_median = new DiscreteMedian<>();
                final Element polyline = element(this.plot_using_points ? "g" : "polygon");
                g.appendChild(polyline);
                final List<Point2D> points = new ArrayList<>((int) ci.width);
                polyline.setAttribute("class", "cov" + (ci.idx % 2));
                if (!plot_using_points)
                    points.add(new Point2D.Double(0, drawingHeight));
                for (int i = 0; i < coverage.length; ++i) {
                    ctg_median.add(coverage[i]);
                }
                for (x = 0; x < ci.width; x++) {
                    int x1 = (int) ((x + 0) * (1.0 / pixelsPerBase));
                    final int x2 = (int) Math.min(ci.ssr.getSequenceLength(), (x + 1) * (1.0 / pixelsPerBase));
                    final DiscreteMedian<Integer> median = new DiscreteMedian<>();
                    while (x1 < x2 && x1 < coverage.length) {
                        median.add(coverage[x1]);
                        ++x1;
                    }
                    final OptionalDouble dbl = median.getTendency(this.percentile);
                    if (dbl.isPresent()) {
                        double dbl2 = dbl.getAsDouble();
                        if (dbl2 > this.max_depth && this.cap_depth) {
                            if (this.plot_using_points)
                                continue;
                            dbl2 = this.max_depth;
                        }
                        points.add(new Point2D.Double(x, drawingHeight - (dbl2 / this.max_depth) * drawingHeight));
                    }
                }
                if (!plot_using_points)
                    points.add(new Point2D.Double(ci.width, drawingHeight));
                if (this.plot_using_points) {
                    for (final Point2D pt : points) {
                        final Element use = element("use");
                        use.setAttribute("href", "#cross");
                        use.setAttribute("x", format(pt.getX()));
                        use.setAttribute("y", format(pt.getY()));
                        polyline.appendChild(use);
                    }
                } else {
                    // simplify, remove points on the same line y
                    int i = 1;
                    while (i + 3 < points.size()) /* ignore first and last */
                    {
                        final Point2D p1 = points.get(i + 0);
                        final Point2D p2 = points.get(i + 1);
                        final Point2D p3 = points.get(i + 2);
                        if (p1.getY() == p2.getY() && p2.getY() == p3.getY()) {
                            points.remove(i + 1);
                        } else {
                            i++;
                        }
                    }
                    polyline.setAttribute("points", points.stream().map(P -> format(P.getX()) + "," + format(P.getY())).collect(Collectors.joining(" ")));
                    polyline.appendChild(element("title", ci.ssr.getSequenceName()));
                }
                for (int side = 0; side < 2; ++side) {
                    final OptionalDouble g_dbl = (side == 0 ? ctg_median.getAverage() : ctg_median.getMedian());
                    if (g_dbl.isPresent() && g_dbl.orElse(0) < this.max_depth) {
                        double h = drawingHeight - ((g_dbl.getAsDouble() / this.max_depth) * drawingHeight);
                        final Element hline = element("rect");
                        hline.appendChild(element("title", (side == 0 ? "average:" : "median:") + format(g_dbl.orElse(0))));
                        g.appendChild(hline);
                        hline.setAttribute("class", (side == 0 ? "average" : "median"));
                        hline.setAttribute("x", "0");
                        hline.setAttribute("y", format(h));
                        hline.setAttribute("width", format(ci.width));
                        hline.setAttribute("height", "1");
                    }
                }
                final Element title = element("text", ci.ssr.getSequenceName());
                title.setAttribute("x", format(ci.width / 2.0));
                title.setAttribute("y", format(-12.0));
                title.setAttribute("class", "chromName");
                title.appendChild(element("title", ci.ssr.getSequenceName() + " " + StringUtils.niceInt(ci.ssr.getSequenceLength()) + " bp"));
                g.appendChild(title);
                final Element rect = element("rect");
                rect.setAttribute("x", "0");
                rect.setAttribute("y", "0");
                rect.setAttribute("width", format(ci.width));
                rect.setAttribute("height", format(drawingHeight));
                rect.setAttribute("class", "frame");
                g.appendChild(rect);
                coverage = null;
                System.gc();
            }
            progress.close();
            final Element rulers = element("g");
            g_chroms.appendChild(rulers);
            int prev_y = -1;
            for (int i = 1; i <= 10.0; i++) {
                final int y = (int) ((i / 10.0) * this.max_depth);
                if (y == prev_y || y == 0)
                    continue;
                final double h = drawingHeight - ((i / 10.0) * drawingHeight);
                final Element hline = element("line");
                rulers.appendChild(hline);
                hline.setAttribute("class", "ruler");
                hline.setAttribute("x1", "-2");
                hline.setAttribute("y1", format(h));
                hline.setAttribute("x2", format(drawingWidth));
                hline.setAttribute("y2", format(h));
                hline.appendChild(element("title", StringUtils.niceInt(y)));
                final Element label = element("text", StringUtils.niceInt(y));
                label.setAttribute("class", "ruler");
                label.setAttribute("x", "-5");
                label.setAttribute("y", format(h));
                rulers.appendChild(label);
                prev_y = y;
            }
            final Element xLabel = element("text", this.refPath.getFileName().toString() + " " + SequenceDictionaryUtils.getBuildName(dict).orElse("") + " (" + StringUtils.niceInt(genomeLength) + " bp)");
            xLabel.setAttribute("class", "xLabel");
            xLabel.setAttribute("x", format(marginLeft + drawingWidth / 2.0));
            xLabel.setAttribute("y", format(this.dimension.height - marginBottom / 2.0));
            xLabel.appendChild(element("title", this.refPath.toString()));
            mainG.appendChild(xLabel);
            final Element yLabel = element("text", "Coverage bp (" + this.percentile.name() + ")");
            yLabel.setAttribute("class", "yLabel");
            yLabel.setAttribute("x", "0");
            yLabel.setAttribute("y", "0");
            yLabel.setAttribute("transform", "translate(" + format(marginLeft / 4.0) + "," + format(marginTop + drawingHeight / 2.0) + ") rotate(90)");
            mainG.appendChild(yLabel);
        }
        final Transformer tr = TransformerFactory.newInstance().newTransformer();
        final Result result;
        if (this.outputFile != null) {
            result = new StreamResult(this.outputFile.toFile());
        } else {
            result = new StreamResult(stdout());
        }
        tr.transform(new DOMSource(this.document), result);
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        document = null;
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Point2D(java.awt.geom.Point2D) Program(com.github.lindenb.jvarkit.util.jcommander.Program) StreamResult(javax.xml.transform.stream.StreamResult) Result(javax.xml.transform.Result) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) AlignmentBlock(htsjdk.samtools.AlignmentBlock) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) Document(org.w3c.dom.Document) DimensionConverter(com.github.lindenb.jvarkit.jcommander.converter.DimensionConverter) Map(java.util.Map) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) Dimension(java.awt.Dimension) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) DOMSource(javax.xml.transform.dom.DOMSource) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Parameter(com.beust.jcommander.Parameter) OptionalDouble(java.util.OptionalDouble) HashMap(java.util.HashMap) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) Element(org.w3c.dom.Element) Paths(java.nio.file.Paths) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) TransformerFactory(javax.xml.transform.TransformerFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Element(org.w3c.dom.Element) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) StreamResult(javax.xml.transform.stream.StreamResult) Result(javax.xml.transform.Result) SamReader(htsjdk.samtools.SamReader) Point2D(java.awt.geom.Point2D) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) StreamResult(javax.xml.transform.stream.StreamResult) OptionalDouble(java.util.OptionalDouble) OptionalDouble(java.util.OptionalDouble) DocumentBuilder(javax.xml.parsers.DocumentBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

DiscreteMedian (com.github.lindenb.jvarkit.math.DiscreteMedian)7 Parameter (com.beust.jcommander.Parameter)6 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)6 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)6 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)6 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)6 Program (com.github.lindenb.jvarkit.util.jcommander.Program)6 Logger (com.github.lindenb.jvarkit.util.log.Logger)6 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)6 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)5 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)5 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 SAMRecord (htsjdk.samtools.SAMRecord)5 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)5 SamReader (htsjdk.samtools.SamReader)5 SamReaderFactory (htsjdk.samtools.SamReaderFactory)5 CloseableIterator (htsjdk.samtools.util.CloseableIterator)5 Path (java.nio.file.Path)5