Search in sources :

Example 61 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class CoveragePlotter method doWork.

@Override
public int doWork(final List<String> args) {
    ArchiveFactory archive = null;
    PrintWriter manifest = null;
    try {
        if (extend < 1.0) {
            LOG.error("extend is lower than 1 :" + this.extend);
            return -1;
        }
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.refPath);
        final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(CoveragePlotter.this.refPath).validationStringency(ValidationStringency.LENIENT);
        final List<Path> inputBams = IOUtils.unrollPaths(this.bamsPath);
        if (inputBams.isEmpty()) {
            LOG.error("input bam file missing.");
            return -1;
        }
        Iterator<? extends Locatable> iter;
        final String input = oneFileOrNull(args);
        if (input == null) {
            final BedLineCodec codec = new BedLineCodec();
            final LineIterator liter = new LineIterator(stdin());
            iter = new AbstractIterator<Locatable>() {

                @Override
                protected Locatable advance() {
                    while (liter.hasNext()) {
                        final String line = liter.next();
                        final BedLine bed = codec.decode(line);
                        if (bed == null) {
                            continue;
                        }
                        return bed;
                    }
                    liter.close();
                    return null;
                }
            };
        } else {
            iter = IntervalListProvider.from(input).dictionary(dict).stream().iterator();
        }
        final BufferedImage image = new BufferedImage(this.dimension.width, this.dimension.height, BufferedImage.TYPE_INT_ARGB);
        final BufferedImage offscreen = new BufferedImage(this.dimension.width, this.dimension.height, BufferedImage.TYPE_INT_ARGB);
        final double y_mid = this.dimension.getHeight() / 2.0;
        final ToDoubleFunction<Double> normToPixelY = NORM -> this.dimension.getHeight() - NORM * y_mid;
        final DiscreteMedian<Integer> discreteMedian = new DiscreteMedian<>();
        manifest = (this.manifestPath == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.manifestPath));
        archive = ArchiveFactory.open(this.outputFile);
        while (iter.hasNext()) {
            final Locatable the_locatable = iter.next();
            String label = "";
            if (the_locatable instanceof BedLine) {
                final BedLine bedline = BedLine.class.cast(the_locatable);
                label = bedline.getOrDefault(3, label);
            }
            final SimpleInterval rawRegion = new SimpleInterval(the_locatable);
            final SimpleInterval extendedRegion;
            /* extend interval */
            if (this.extend > 1) {
                final int L1 = rawRegion.getLengthOnReference();
                final int L2 = (int) Math.ceil(L1 * this.extend);
                final int mid = rawRegion.getCenter().getPosition();
                int x0 = mid - L2 / 2;
                if (x0 < 0)
                    x0 = 1;
                int x1 = mid + L2 / 2;
                final SAMSequenceRecord ssr = dict.getSequence(rawRegion.getContig());
                if (ssr != null)
                    x1 = Math.min(ssr.getSequenceLength(), x1);
                if (x0 > x1)
                    continue;
                extendedRegion = new SimpleInterval(rawRegion.getContig(), x0, x1);
            } else {
                extendedRegion = rawRegion;
            }
            final ToDoubleFunction<Integer> pos2pixel = POS -> (POS - extendedRegion.getStart()) / (double) extendedRegion.getLengthOnReference() * this.dimension.getWidth();
            final String theGeneName = this.getGenes(extendedRegion).filter(G -> G.getType().equals("gene")).map(G -> G.getAttribute("gene_name")).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(null);
            final String outputFilename = this.prefix + extendedRegion.getContig() + "_" + extendedRegion.getStart() + "_" + extendedRegion.getEnd() + (StringUtils.isBlank(label) ? "" : "." + label.replaceAll("[^A-Za-z\\-\\.0-9]+", "_")) + (StringUtils.isBlank(theGeneName) || !StringUtils.isBlank(label) ? "" : "." + theGeneName.replaceAll("[^A-Za-z\\-\\.0-9]+", "_")) + ".png";
            final Graphics2D g2 = offscreen.createGraphics();
            g2.setColor(Color.BLACK);
            g2.setComposite(AlphaComposite.getInstance(AlphaComposite.CLEAR));
            g2.fillRect(0, 0, this.dimension.width, this.dimension.height);
            g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER));
            g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
            g2.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
            final Graphics2D g = image.createGraphics();
            g.setColor(Color.WHITE);
            g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
            g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
            g.fillRect(0, 0, this.dimension.width, this.dimension.height);
            // draw exons
            getGenes(extendedRegion).filter(G -> G.getType().equals("exon")).forEach(EX -> {
                g.setColor(new Color(240, 240, 240));
                double x1 = pos2pixel.applyAsDouble(EX.getStart());
                double x2 = pos2pixel.applyAsDouble(EX.getEnd());
                g.fill(new Rectangle2D.Double(x1, 0, (x2 - x1), this.dimension.height));
            });
            int y = (int) (this.dimension.height / 2.0);
            g.setColor(Color.BLUE);
            g.drawLine(0, y, image.getWidth(), y);
            y = (int) (this.dimension.height / 4.0);
            g.setColor(Color.CYAN);
            g.drawLine(0, y, image.getWidth(), y);
            y = (int) (3.0 * this.dimension.height / 4.0);
            g.drawLine(0, y, image.getWidth(), y);
            g.setColor(Color.DARK_GRAY);
            g.drawRect(0, 0, this.dimension.width - 1, this.dimension.height - 1);
            drawGenes(g, new Rectangle(0, 0, image.getWidth(), image.getHeight()), extendedRegion);
            drawKnownCnv(g, new Rectangle(0, 0, image.getWidth(), image.getHeight()), extendedRegion);
            if (this.extend > 1) {
                g.setColor(Color.GREEN);
                int x = (int) pos2pixel.applyAsDouble(rawRegion.getStart());
                g.drawLine(x, 0, x, image.getHeight());
                x = (int) pos2pixel.applyAsDouble(rawRegion.getEnd());
                g.drawLine(x, 0, x, image.getHeight());
            }
            final int[] depth = new int[extendedRegion.getLengthOnReference()];
            final int[] copy = new int[depth.length];
            final BitSet blackListedPositions = new BitSet(depth.length);
            final Map<String, Point2D> sample2maxPoint = new HashMap<>(inputBams.size());
            boolean drawAbove = false;
            // fill black listed regions
            if (this.blackListedPath != null) {
                try (TabixReader tbr = new TabixReader(this.blackListedPath.toString())) {
                    final ContigNameConverter cvt = ContigNameConverter.fromContigSet(tbr.getChromosomes());
                    final String ctg = cvt.apply(extendedRegion.getContig());
                    if (!StringUtils.isBlank(ctg)) {
                        final BedLineCodec codec = new BedLineCodec();
                        final TabixReader.Iterator tbxr = tbr.query(ctg, extendedRegion.getStart(), extendedRegion.getEnd());
                        for (; ; ) {
                            final String line = tbxr.next();
                            if (line == null)
                                break;
                            final BedLine bed = codec.decode(line);
                            if (bed == null)
                                continue;
                            int p1 = Math.max(bed.getStart(), extendedRegion.getStart());
                            while (p1 <= extendedRegion.getEnd() && p1 <= bed.getEnd()) {
                                blackListedPositions.set(p1 - extendedRegion.getStart());
                                ++p1;
                            }
                        }
                    }
                } catch (Throwable err) {
                    LOG.warn(err);
                }
            }
            if (this.skip_original_interval_for_median && this.extend > 1) {
                for (int i = the_locatable.getStart(); i <= the_locatable.getEnd(); i++) {
                    blackListedPositions.set(i - extendedRegion.getStart());
                }
            }
            final Set<String> sampleNames = new TreeSet<>();
            for (final Path path : inputBams) {
                try (SamReader sr = samReaderFactory.open(path)) {
                    final SAMFileHeader header = sr.getFileHeader();
                    final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
                    sampleNames.add(sample);
                    /* sample has deletion */
                    int count_has_dp_le_0_5 = 0;
                    /* sample has dup */
                    int count_has_dp_ge_1_5 = 0;
                    /* longest arc */
                    int longest_arc = 0;
                    SequenceUtil.assertSequenceDictionariesEqual(dict, header.getSequenceDictionary());
                    Arrays.fill(depth, 0);
                    try (CloseableIterator<SAMRecord> siter = sr.queryOverlapping(extendedRegion.getContig(), extendedRegion.getStart(), extendedRegion.getEnd())) {
                        while (siter.hasNext()) {
                            final SAMRecord rec = siter.next();
                            if (rec.getReadUnmappedFlag())
                                continue;
                            if (!SAMRecordDefaultFilter.accept(rec, this.min_mapq))
                                continue;
                            int ref = rec.getStart();
                            final Cigar cigar = rec.getCigar();
                            if (cigar == null)
                                continue;
                            if (this.alpha_arc > 0.0 && rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && !rec.getProperPairFlag() && rec.getReferenceIndex().equals(rec.getMateReferenceIndex()) && (!this.arc_only_rr_ff || (rec.getReadNegativeStrandFlag() == rec.getMateNegativeStrandFlag()))) {
                                longest_arc = Math.max(longest_arc, Math.abs(rec.getMateAlignmentStart() - rec.getAlignmentStart()));
                                final double xstart = pos2pixel.applyAsDouble(rec.getAlignmentStart());
                                final double xend = pos2pixel.applyAsDouble(rec.getMateAlignmentStart());
                                final double len = (xend - xstart);
                                if (Math.abs(len) > this.min_invert && Math.abs(len) < this.max_invert) {
                                    final double y2 = y_mid + (drawAbove ? -1 : 1) * Math.min(y_mid, Math.abs(len / 2.0));
                                    final Composite oldComposite = g2.getComposite();
                                    g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, (float) this.alpha_arc));
                                    g2.setColor(getColor("arc", Color.ORANGE));
                                    final GeneralPath curve = new GeneralPath();
                                    curve.moveTo(xstart, y_mid);
                                    curve.quadTo(xstart + len / 2.0, y2, xend, y_mid);
                                    g2.draw(curve);
                                    g2.setComposite(oldComposite);
                                    drawAbove = !drawAbove;
                                }
                            }
                            for (CigarElement ce : cigar) {
                                final CigarOperator op = ce.getOperator();
                                final int len = ce.getLength();
                                if (op.consumesReferenceBases()) {
                                    if (op.consumesReadBases()) {
                                        for (int i = 0; i < len; i++) {
                                            final int pos = ref + i;
                                            if (pos < extendedRegion.getStart())
                                                continue;
                                            if (pos > extendedRegion.getEnd())
                                                break;
                                            depth[pos - extendedRegion.getStart()]++;
                                        }
                                    }
                                    ref += len;
                                }
                            }
                        }
                    // loop cigar
                    }
                    if (extendedRegion.getLengthOnReference() > image.getWidth() && extendedRegion.getLengthOnReference() > this.window_smooth_size) {
                        // smooth
                        final int bases_per_pixel = this.window_smooth_size;
                        System.arraycopy(depth, 0, copy, 0, depth.length);
                        for (int i = 0; i < depth.length && bases_per_pixel > 1; i++) {
                            double t = 0;
                            int count = 0;
                            for (int j = Math.max(0, i - bases_per_pixel); j <= i + bases_per_pixel && j < depth.length; j++) {
                                t += copy[j];
                                count++;
                            }
                            if (count == 0)
                                continue;
                            depth[i] = (int) (t / count);
                        }
                    }
                    discreteMedian.clear();
                    for (int i = 0; i < depth.length; i++) {
                        if (depth[i] > this.max_depth)
                            continue;
                        if (blackListedPositions.get(i))
                            continue;
                        discreteMedian.add(depth[i]);
                    }
                    final double median = discreteMedian.getMedian().orElse(0);
                    if (median <= 0) {
                        final String msg = "Skipping " + sample + " " + extendedRegion + " because median is 0";
                        LOG.warning(msg);
                        manifest.println("#" + msg);
                        continue;
                    }
                    Point2D max_position = null;
                    double max_distance_to_1 = 0.0;
                    final GeneralPath line = new GeneralPath();
                    for (int x = 0; x < image.getWidth(); x++) {
                        discreteMedian.clear();
                        int pos1 = (int) Math.floor(((x + 0) / (double) image.getWidth()) * depth.length);
                        final int pos2 = (int) Math.ceil(((x + 0) / (double) image.getWidth()) * depth.length);
                        while (pos1 <= pos2 && pos1 < depth.length) {
                            discreteMedian.add(depth[pos1]);
                            pos1++;
                        }
                        final double average = discreteMedian.getMedian().orElse(0);
                        final double normDepth = (average / median);
                        if (normDepth <= 0.5)
                            count_has_dp_le_0_5++;
                        if (normDepth >= 1.5)
                            count_has_dp_ge_1_5++;
                        final double y2 = normToPixelY.applyAsDouble(normDepth);
                        double distance_to_1 = Math.abs(normDepth - 1.0);
                        if (distance_to_1 > 0.3 && (max_position == null || distance_to_1 > max_distance_to_1)) {
                            max_distance_to_1 = distance_to_1;
                            max_position = new Point2D.Double(x, y2);
                        }
                        if (x == 0) {
                            line.moveTo(x, y2);
                        } else {
                            line.lineTo(x, y2);
                        }
                    }
                    g.setColor(max_distance_to_1 < 0.1 ? Color.lightGray : Color.GRAY);
                    final Stroke oldStroke = g.getStroke();
                    final Composite oldComposite = g.getComposite();
                    g.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, (float) this.alpha));
                    g.setStroke(new BasicStroke(0.5f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_ROUND));
                    g.draw(line);
                    g.setStroke(oldStroke);
                    g.setComposite(oldComposite);
                    if (max_position != null)
                        sample2maxPoint.put(sample, max_position);
                    // fill manifest
                    manifest.print(rawRegion.getContig());
                    manifest.print("\t");
                    manifest.print(rawRegion.getStart() - 1);
                    manifest.print("\t");
                    manifest.print(rawRegion.getEnd());
                    manifest.print("\t");
                    manifest.print(extendedRegion.getStart() - 1);
                    manifest.print("\t");
                    manifest.print(extendedRegion.getEnd());
                    manifest.print("\t");
                    manifest.print(outputFilename);
                    manifest.print("\t");
                    manifest.print(StringUtils.isBlank(theGeneName) ? "." : theGeneName);
                    manifest.print("\t");
                    manifest.print(sample);
                    manifest.print("\t");
                    manifest.print(count_has_dp_le_0_5);
                    manifest.print("\t");
                    manifest.print((int) ((count_has_dp_le_0_5 / (double) extendedRegion.getLengthOnReference()) * 100.0));
                    manifest.print("\t");
                    manifest.print(count_has_dp_ge_1_5);
                    manifest.print("\t");
                    manifest.print((int) ((count_has_dp_ge_1_5 / (double) extendedRegion.getLengthOnReference()) * 100.0));
                    manifest.print("\t");
                    manifest.print(median);
                    manifest.print("\t");
                    manifest.print(discreteMedian.getStandardDeviation().orElse(-99999));
                    manifest.print("\t");
                    manifest.print(longest_arc);
                    manifest.println();
                }
            // end loop over samples
            }
            g2.dispose();
            g.drawImage(offscreen, 0, 0, null);
            g.setColor(Color.BLACK);
            final String label_samples = sampleNames.size() > 10 ? "N=" + StringUtils.niceInt(sampleNames.size()) : String.join(";", sampleNames);
            final Set<String> all_genes = getGenes(extendedRegion).filter(G -> G.getType().equals("gene")).map(G -> G.getAttribute("gene_name")).filter(F -> !StringUtils.isBlank(F)).collect(Collectors.toCollection(TreeSet::new));
            g.drawString(extendedRegion.toNiceString() + " Length:" + StringUtils.niceInt(extendedRegion.getLengthOnReference()) + " Sample(s):" + label_samples + " " + label + " Gene(s):" + (all_genes.size() > 20 ? "N=" + StringUtils.niceInt(all_genes.size()) : String.join(";", all_genes)), 10, 10);
            if (!sample2maxPoint.isEmpty()) {
                /**
                 * draw sample names
                 */
                g.setColor(getColor("sample", Color.BLUE));
                final int sampleFontSize = 7;
                final Font oldFont = g.getFont();
                g.setFont(new Font(oldFont.getName(), Font.PLAIN, sampleFontSize));
                for (final String sample : sample2maxPoint.keySet()) {
                    final Point2D pt = sample2maxPoint.get(sample);
                    double sny = pt.getY();
                    if (sny > y_mid)
                        sny += sampleFontSize;
                    g.drawString(sample, (int) pt.getX(), (int) Math.min(this.dimension.height - sampleFontSize, Math.max(sampleFontSize, sny)));
                }
                g.setFont(oldFont);
            }
            g.dispose();
            try (OutputStream out = archive.openOuputStream(outputFilename)) {
                ImageIO.write(image, "PNG", out);
                out.flush();
            }
        }
        // end while iter
        archive.close();
        archive = null;
        manifest.flush();
        manifest.close();
        manifest = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(manifest);
        CloserUtil.close(archive);
    }
}
Also used : Color(java.awt.Color) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Point2D(java.awt.geom.Point2D) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Rectangle2D(java.awt.geom.Rectangle2D) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) RenderingHints(java.awt.RenderingHints) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) DimensionConverter(com.github.lindenb.jvarkit.jcommander.converter.DimensionConverter) Map(java.util.Map) ImageIO(javax.imageio.ImageIO) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Composite(java.awt.Composite) BufferedImage(java.awt.image.BufferedImage) Font(java.awt.Font) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) GTFCodec(com.github.lindenb.jvarkit.util.bio.gtf.GTFCodec) SAMRecord(htsjdk.samtools.SAMRecord) Dimension(java.awt.Dimension) List(java.util.List) Stream(java.util.stream.Stream) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) ToDoubleFunction(java.util.function.ToDoubleFunction) ColorUtils(com.github.lindenb.jvarkit.util.swing.ColorUtils) BasicStroke(java.awt.BasicStroke) GeneralPath(java.awt.geom.GeneralPath) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Rectangle(java.awt.Rectangle) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) IterableAdapter(htsjdk.samtools.util.IterableAdapter) HashMap(java.util.HashMap) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) GTFLine(com.github.lindenb.jvarkit.util.bio.gtf.GTFLine) Interval(htsjdk.samtools.util.Interval) AlphaComposite(java.awt.AlphaComposite) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Graphics2D(java.awt.Graphics2D) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) TabixReader(htsjdk.tribble.readers.TabixReader) StreamSupport(java.util.stream.StreamSupport) GraphicsState(com.github.lindenb.jvarkit.swing.GraphicsState) VCFConstants(htsjdk.variant.vcf.VCFConstants) Stroke(java.awt.Stroke) OutputStream(java.io.OutputStream) Line2D(java.awt.geom.Line2D) AbstractIterator(htsjdk.samtools.util.AbstractIterator) Locatable(htsjdk.samtools.util.Locatable) IntToDoubleFunction(java.util.function.IntToDoubleFunction) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) Pileup(com.github.lindenb.jvarkit.samtools.util.Pileup) DynamicParameter(com.beust.jcommander.DynamicParameter) BitSet(java.util.BitSet) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) Rectangle(java.awt.Rectangle) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Point2D(java.awt.geom.Point2D) TreeSet(java.util.TreeSet) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Color(java.awt.Color) BitSet(java.util.BitSet) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Locatable(htsjdk.samtools.util.Locatable) BasicStroke(java.awt.BasicStroke) GeneralPath(java.awt.geom.GeneralPath) TabixReader(htsjdk.tribble.readers.TabixReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LineIterator(com.github.lindenb.jvarkit.util.iterator.LineIterator) BufferedImage(java.awt.image.BufferedImage) Font(java.awt.Font) SamReader(htsjdk.samtools.SamReader) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) GeneralPath(java.awt.geom.GeneralPath) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) BasicStroke(java.awt.BasicStroke) Stroke(java.awt.Stroke) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Composite(java.awt.Composite) AlphaComposite(java.awt.AlphaComposite) Rectangle2D(java.awt.geom.Rectangle2D) Graphics2D(java.awt.Graphics2D) Cigar(htsjdk.samtools.Cigar)

Example 62 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class SvToSVG method doWork.

@Override
public int doWork(final List<String> args) {
    /* parse interval */
    if (this.intervalStrList.isEmpty()) {
        LOG.error("interval(s) undefined");
        return -1;
    }
    this.drawinAreaWidth = Math.max(100, this.drawinAreaWidth);
    try {
        if (this.fastaFile != null) {
            this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.fastaFile);
        }
        if (this.vcfFile != null) {
            this.vcfFileReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
        }
        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(true);
        final DocumentBuilder db = dbf.newDocumentBuilder();
        this.document = db.newDocument();
        final List<Path> bamFiles = IOUtils.unrollPaths(args);
        if (bamFiles.isEmpty()) {
            LOG.error("bam(s) undefined");
            return -1;
        }
        final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.fastaFile);
        /* loop over each bam file */
        for (final Path bamFile : bamFiles) {
            try (final SamReader sr = srf.open(bamFile)) {
                final SAMFileHeader header = sr.getFileHeader();
                final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                final Function<String, Optional<SimpleInterval>> parser = IntervalParserFactory.newInstance().dictionary(dict).make();
                final Set<String> samples = header.getReadGroups().stream().map(G -> G.getSample()).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet());
                if (samples.size() != 1) {
                    LOG.error("expected on sample in " + bamFile + " but got " + samples.size() + " " + samples);
                    return -1;
                }
                final Sample sample = new Sample(samples.iterator().next());
                this.sampleList.add(sample);
                /* loop over each region for that sample */
                for (final String intervalStr : this.intervalStrList) {
                    LOG.info("scanning " + intervalStr + " for " + bamFile);
                    final SimpleInterval interval = parser.apply(intervalStr).orElseThrow(IntervalParserFactory.exception(intervalStr));
                    if (interval == null) {
                        LOG.error("Cannot parse " + intervalStr + " for " + bamFile);
                        return -1;
                    }
                    /* create new region */
                    final Sample.Region region = sample.new Region(interval);
                    if (!sample.regions.isEmpty() && region.getLengthOnReference() != sample.regions.get(0).getLengthOnReference()) {
                        LOG.warning("intervals should all have the same length");
                    }
                    sample.regions.add(region);
                    try (CloseableIterator<SAMRecord> iter = sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false)) {
                        while (iter.hasNext()) {
                            final SAMRecord record = iter.next();
                            boolean trace = record.getReadName().equals(DEBUG_READ);
                            if (!SAMRecordDefaultFilter.accept(record, this.min_mapq))
                                continue;
                            if (trace)
                                LOG.debug("got1");
                            final Cigar cigar = record.getCigar();
                            if (cigar == null || cigar.isEmpty())
                                continue;
                            if (trace)
                                LOG.debug("got2");
                            if (remove_simple_reads) {
                                if ((!cigar.isClipped() && SAMUtils.getOtherCanonicalAlignments(record).isEmpty() && (record.getReadPairedFlag() && record.getProperPairFlag())))
                                    continue;
                            }
                            if (!record.getContig().equals(interval.getContig()))
                                continue;
                            if (trace)
                                LOG.debug("got3");
                            final Sample.ShortRead shortRead = sample.new DefaultShortRead(region, record);
                            if (shortRead.getEnd() < interval.getStart())
                                continue;
                            if (shortRead.getStart() > interval.getEnd())
                                continue;
                            if (trace)
                                LOG.debug("got4" + record.getSAMString());
                            if (!sample.sampleName.equals(shortRead.getSampleName()))
                                continue;
                            region.beforePileup.add(shortRead);
                        }
                    }
                /* en iterator */
                }
            /* end loop interval */
            }
        /* end open SAM */
        }
        this.sampleList.forEach(S -> S.regions.forEach(R -> R.addSplitReads()));
        this.sampleList.forEach(S -> S.regions.forEach(R -> R.pileup()));
        /* sort per sample */
        this.sampleList.sort((A, B) -> A.sampleName.compareTo(B.sampleName));
        /* sort per region */
        this.sampleList.stream().forEach(SL -> SL.regions.sort((A, B) -> {
            final SmartComparator sc = new SmartComparator();
            int i = sc.compare(A.getContig(), B.getContig());
            if (i != 0)
                return i;
            return A.getStart() - B.getStart();
        }));
        buildDocument();
        final Transformer tr = TransformerFactory.newInstance().newTransformer();
        final Result result;
        if (this.outputFile != null) {
            result = new StreamResult(this.outputFile);
        } else {
            result = new StreamResult(stdout());
        }
        tr.transform(new DOMSource(this.document), result);
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(this.vcfFileReader);
        this.document = null;
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) Result(javax.xml.transform.Result) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) StaticCodeExtractor(com.github.lindenb.jvarkit.lang.StaticCodeExtractor) StringUtil(htsjdk.samtools.util.StringUtil) Document(org.w3c.dom.Document) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) DocumentFragment(org.w3c.dom.DocumentFragment) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) SmartComparator(com.github.lindenb.jvarkit.lang.SmartComparator) Stream(java.util.stream.Stream) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) CoordMath(htsjdk.samtools.util.CoordMath) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IntStream(java.util.stream.IntStream) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BiPredicate(java.util.function.BiPredicate) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) SamReader(htsjdk.samtools.SamReader) File(java.io.File) Element(org.w3c.dom.Element) TreeMap(java.util.TreeMap) DocumentBuilder(javax.xml.parsers.DocumentBuilder) TransformerFactory(javax.xml.transform.TransformerFactory) DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) StreamResult(javax.xml.transform.stream.StreamResult) Result(javax.xml.transform.Result) SamReader(htsjdk.samtools.SamReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Optional(java.util.Optional) StreamResult(javax.xml.transform.stream.StreamResult) SmartComparator(com.github.lindenb.jvarkit.lang.SmartComparator) Cigar(htsjdk.samtools.Cigar) DocumentBuilder(javax.xml.parsers.DocumentBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 63 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class SvToSVG method buildSample.

private Element buildSample(final Sample sample, final DocumentFragment animationLayer) {
    final Element sampleRoot = element("g");
    double curr_y = sample.getY();
    final Element sampleLabel = element("text", sample.sampleName);
    sampleLabel.setAttribute("x", "5");
    sampleLabel.setAttribute("y", format(curr_y + 14));
    sampleLabel.setAttribute("class", "samplename");
    sampleRoot.appendChild(sampleLabel);
    curr_y += 20;
    // loop over regions
    for (final Sample.Region region : sample.regions) {
        region.y = curr_y;
        // genomic sequence will be used to test if there is a mismatch. No garantee it exists.
        final GenomicSequence genomicSequence;
        if (this.indexedFastaSequenceFile != null) {
            final ContigNameConverter ctgConver = ContigNameConverter.fromOneDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
            final String sname = ctgConver.apply(region.getContig());
            if (StringUtil.isBlank(sname)) {
                genomicSequence = null;
            } else {
                genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, sname);
            }
        } else {
            genomicSequence = null;
        }
        final Element regionRoot = element("g");
        sampleRoot.appendChild(regionRoot);
        final Element rgnLabel = element("text", new SimpleInterval(region).toNiceString() + " Length:" + this.niceIntFormat.format((region.getLengthOnReference())));
        rgnLabel.setAttribute("x", "5");
        rgnLabel.setAttribute("y", format(curr_y + 12));
        rgnLabel.setAttribute("class", "samplename");
        regionRoot.appendChild(rgnLabel);
        curr_y += 20;
        final double y_top_region = curr_y;
        if (this.coverageHeight > 0) {
            /* draw coverage */
            final TreeMap<Integer, Long> pos2cov = region.shortReadStream().filter(R -> R.isDefaultShortRead()).flatMapToInt(R -> R.getRecord().getAlignmentBlocks().stream().flatMapToInt(RB -> java.util.stream.IntStream.rangeClosed(RB.getReferenceStart(), RB.getReferenceStart() + RB.getLength()))).filter(P -> P >= region.getStart()).filter(P -> P <= region.getEnd()).mapToObj(P -> P).collect(Collectors.groupingBy(Function.identity(), () -> new TreeMap<>(), Collectors.counting()));
            ;
            final long max_cov = pos2cov.values().stream().mapToLong(L -> L.longValue()).max().orElse(1L);
            final Element covPath = element("path");
            covPath.setAttribute("class", "coverage");
            regionRoot.appendChild(covPath);
            final StringBuilder sb = new StringBuilder();
            sb.append("M 0 " + format(curr_y + this.coverageHeight));
            for (int k = 0; k < this.drawinAreaWidth; k++) {
                final int pos1 = region.getStart() + (int) (((k + 0) / (double) this.drawinAreaWidth) * (double) region.getLengthOnReference());
                final int pos2 = region.getStart() + (int) (((k + 1) / (double) this.drawinAreaWidth) * (double) region.getLengthOnReference());
                final double dp = IntStream.range(pos1, pos2).filter(P -> pos2cov.containsKey(P)).mapToLong(P -> pos2cov.get(P)).max().orElseGet(() -> 0L);
                final double dpy = curr_y + this.coverageHeight - (dp / (double) max_cov) * (double) this.coverageHeight;
                sb.append(" L " + format(k) + " " + format(dpy));
            }
            sb.append(" L " + format(this.drawinAreaWidth) + " " + format(curr_y + this.coverageHeight));
            sb.append(" Z");
            covPath.setAttribute("d", sb.toString());
            covPath.appendChild(element("title", "Coverage. Max:" + niceIntFormat.format(max_cov)));
            curr_y += this.coverageHeight;
            curr_y += 2;
        }
        /* print all lines */
        for (int nLine = 0; nLine < region.lines.size(); ++nLine) {
            final List<Sample.ShortRead> line = region.lines.get(nLine);
            // loop over records on that line
            for (final Sample.ShortRead shortRead : line) {
                boolean trace = shortRead.getRecord().getReadName().equals(DEBUG_READ);
                shortRead.y = curr_y;
                int ref1 = shortRead.getStart();
                final double leftX = shortRead.getPixelStart();
                final double midy = this.featureHeight / 2.0;
                final double maxy = this.featureHeight;
                final Element g1 = element("g");
                final Element g3;
                if (shortRead.isSplitRead()) {
                    final Sample.SplitRead splitRead = Sample.SplitRead.class.cast(shortRead);
                    // final double dx = -0.5*shortRead.getPixelLength();
                    // final double dy = -0.5*shortRead.getHeight();
                    splitRead.g_rotate = element("g");
                    splitRead.g_rotate.setAttribute("transform", "rotate(0)");
                    g1.appendChild(splitRead.g_rotate);
                    g3 = element("g");
                    g3.setAttribute("transform", "translate(0,0)");
                    splitRead.g_rotate.appendChild(g3);
                    splitRead.g_translate2 = g3;
                } else {
                    g3 = g1;
                }
                g1.setAttribute("transform", "translate(" + format(shortRead.getPixelStart()) + "," + format(shortRead.getY()) + ")");
                // readElement.setAttribute("style","transform-origin:center");
                if (shortRead.isSplitRead()) {
                    // always in front
                    animationLayer.appendChild(g1);
                } else {
                    regionRoot.insertBefore(g1, regionRoot.getFirstChild());
                }
                final BiPredicate<Integer, Integer> isMismatch = (readpos0, refpos1) -> {
                    if (genomicSequence == null)
                        return false;
                    if (refpos1 < 1 || refpos1 > genomicSequence.length())
                        return false;
                    char refC = Character.toUpperCase(genomicSequence.charAt(refpos1 - 1));
                    if (refC == 'N')
                        return false;
                    final byte[] bases = shortRead.getRecord().getReadBases();
                    if (bases == null || bases == SAMRecord.NULL_SEQUENCE)
                        return false;
                    if (readpos0 < 0 || readpos0 >= bases.length) {
                        LOG.warn("P=" + readpos0 + " 0<x<" + bases.length);
                        return false;
                    }
                    char readC = (char) Character.toUpperCase(bases[readpos0]);
                    if (readC == 'N')
                        return false;
                    return readC != refC;
                };
                shortRead.g_translate1 = g1;
                int readpos0 = 0;
                int readpos0_clipped = 0;
                final Element title = element("title", shortRead.getRecord().getPairedReadName() + " " + shortRead.getRecord().getCigarString() + " " + (shortRead.getRecord().getReadNegativeStrandFlag() ? "-" : "+"));
                g3.appendChild(title);
                final DocumentFragment insertionsFragment = this.document.createDocumentFragment();
                final Cigar cigar = shortRead.getRecord().getCigar();
                for (int cigarIdx = 0; cigarIdx < cigar.numCigarElements(); cigarIdx++) {
                    final CigarElement ce = cigar.getCigarElement(cigarIdx);
                    final CigarOperator op = ce.getOperator();
                    int next_ref = ref1;
                    int next_read = readpos0;
                    switch(op) {
                        case I:
                            {
                                next_read += ce.getLength();
                                readpos0_clipped += ce.getLength();
                                if (!(ref1 + ce.getLength() < region.getStart() || ref1 > region.getEnd())) {
                                    final double ce_length = region.baseToPixel(region.getStart() + ce.getLength());
                                    final Element rectInsert = element("rect");
                                    rectInsert.setAttribute("class", "insert");
                                    rectInsert.setAttribute("x", format(region.baseToPixel(ref1) - leftX));
                                    rectInsert.setAttribute("y", format(0));
                                    rectInsert.setAttribute("width", format(this.svgDuration > 0 && ce_length > 1 ? ce_length : 1));
                                    rectInsert.setAttribute("height", format(this.featureHeight));
                                    rectInsert.appendChild(element("title", this.niceIntFormat.format(ce.getLength())));
                                    if (this.svgDuration > 0 && ce_length > 1) {
                                        final Element anim = element("animate");
                                        rectInsert.appendChild(anim);
                                        anim.setAttribute("attributeType", "XML");
                                        anim.setAttribute("attributeName", "width");
                                        anim.setAttribute("begin", "0s");
                                        anim.setAttribute("from", format(ce_length));
                                        anim.setAttribute("to", "1");
                                        anim.setAttribute("dur", String.valueOf(this.svgDuration) + "s");
                                        anim.setAttribute("repeatCount", this.svgRepeatCount);
                                        anim.setAttribute("fill", "freeze");
                                    }
                                    insertionsFragment.appendChild(rectInsert);
                                }
                                break;
                            }
                        case P:
                            continue;
                        case S:
                        case H:
                        case M:
                        case X:
                        case EQ:
                            {
                                next_ref += ce.getLength();
                                if (!op.equals(CigarOperator.H)) {
                                    next_read += ce.getLength();
                                }
                                if (!(next_ref < region.getStart() || ref1 > region.getEnd())) {
                                    final double distance_pix = region.baseToPixel(next_ref) - region.baseToPixel(ref1);
                                    final StringBuilder sb = new StringBuilder();
                                    final Element path = element("path");
                                    path.setAttribute("class", "op" + op.name() + (shortRead.isDefaultShortRead() ? "" : "x"));
                                    if (trace)
                                        path.setAttribute("style", "fill:blue;");
                                    if (!op.isClipping() && shortRead.isDefaultShortRead() && shortRead.getRecord().getReadPairedFlag() && !shortRead.getRecord().getProperPairFlag()) {
                                        if (!shortRead.getContig().equals(shortRead.getRecord().getMateReferenceName())) {
                                            path.setAttribute("style", "fill:orchid;");
                                        } else {
                                            path.setAttribute("style", "fill:lightblue;");
                                        }
                                    }
                                    // arrow <--
                                    if (cigarIdx == 0 && shortRead.isNegativeStrand()) {
                                        sb.append("M ").append(format(region.baseToPixel(ref1) - leftX)).append(',').append(0);
                                        sb.append(" h ").append(format(distance_pix));
                                        sb.append(" v ").append(format(maxy));
                                        sb.append(" h ").append(format(-(distance_pix)));
                                        sb.append(" l ").append(format(-arrow_w)).append(',').append(-featureHeight / 2.0);
                                        sb.append(" Z");
                                    } else // arrow -->
                                    if (cigarIdx + 1 == cigar.numCigarElements() && !shortRead.isNegativeStrand()) {
                                        sb.append("M ").append(format(region.baseToPixel(ref1) - leftX + distance_pix)).append(',').append(0);
                                        sb.append(" h ").append(format(-(distance_pix)));
                                        sb.append(" v ").append(format(maxy));
                                        sb.append(" h ").append(format(distance_pix));
                                        sb.append(" l ").append(format(arrow_w)).append(',').append(-featureHeight / 2.0);
                                        sb.append(" Z");
                                    } else {
                                        sb.append("M ").append(format(region.baseToPixel(ref1) - leftX)).append(',').append(0);
                                        sb.append(" h ").append(format(distance_pix));
                                        sb.append(" v ").append(format(maxy));
                                        sb.append(" h ").append(format(-(distance_pix)));
                                        sb.append(" Z");
                                    }
                                    path.setAttribute("d", sb.toString());
                                    g3.appendChild(path);
                                    if (op.isAlignment() && genomicSequence != null && !hideMismatch) {
                                        for (int x = 0; x < ce.getLength(); ++x) {
                                            if (!isMismatch.test(readpos0_clipped + x, ref1 + x))
                                                continue;
                                            if (ref1 + x < region.getStart())
                                                continue;
                                            if (ref1 + x > region.getEnd())
                                                continue;
                                            final Element rectMismatch = element("rect");
                                            rectMismatch.setAttribute("class", "mismatch");
                                            rectMismatch.setAttribute("x", format(region.baseToPixel(ref1 + x) - leftX));
                                            rectMismatch.setAttribute("y", format(0));
                                            rectMismatch.setAttribute("width", format((region.baseToPixel(ref1 + x + 1) - region.baseToPixel(ref1 + x))));
                                            rectMismatch.setAttribute("height", format(this.featureHeight));
                                            g3.appendChild(rectMismatch);
                                        }
                                    }
                                }
                                if (!op.isClipping()) {
                                    readpos0_clipped += ce.getLength();
                                }
                                break;
                            }
                        case D:
                        case N:
                            {
                                next_ref += ce.getLength();
                                if (!(next_ref < region.getStart() || ref1 > region.getEnd())) {
                                    final double distance_pix = region.baseToPixel(next_ref) - region.baseToPixel(ref1);
                                    final Element lineE = element("line");
                                    lineE.setAttribute("class", "opD");
                                    lineE.setAttribute("x1", format(region.baseToPixel(ref1) - leftX));
                                    lineE.setAttribute("y1", format(midy));
                                    lineE.setAttribute("x2", format(region.baseToPixel(ref1) - leftX + distance_pix));
                                    lineE.setAttribute("y2", format(midy));
                                    g3.insertBefore(lineE, g3.getFirstChild());
                                }
                                break;
                            }
                        default:
                            throw new IllegalStateException(op.name());
                    }
                    ref1 = next_ref;
                    readpos0 = next_read;
                }
                g3.appendChild(insertionsFragment);
            }
            curr_y += (this.featureHeight + 3);
        }
        for (int x = 1; x < 10; ++x) {
            final double xx = (this.drawinAreaWidth / 10.0) * x;
            final Element line = element("line");
            line.setAttribute("class", "ruler");
            line.setAttribute("x1", format(xx));
            line.setAttribute("y1", format(y_top_region));
            line.setAttribute("x2", format(xx));
            line.setAttribute("y2", format(curr_y));
            line.appendChild(element("title", niceIntFormat.format(region.getStart() + (int) (region.getLengthOnReference() / 10.0) * x)));
            regionRoot.insertBefore(line, regionRoot.getFirstChild());
        }
        /**
         * print variants
         */
        if (this.vcfFileReader != null) {
            final VCFHeader header = this.vcfFileReader.getHeader();
            final SAMSequenceDictionary vcfdict = header.getSequenceDictionary();
            final ContigNameConverter ctgConver = (vcfdict == null ? null : ContigNameConverter.fromOneDictionary(vcfdict));
            final String sname = ctgConver == null ? null : ctgConver.apply(region.getContig());
            if (!StringUtil.isBlank(sname)) {
                final CloseableIterator<VariantContext> iter = this.vcfFileReader.query(region.getContig(), region.getStart(), region.getEnd());
                while (iter.hasNext()) {
                    final VariantContext ctx = iter.next();
                    if (!ctx.isVariant())
                        continue;
                    final double x1 = Math.max(0, region.baseToPixel(ctx.getStart()));
                    final double x2 = Math.min(region.baseToPixel(ctx.getEnd() + 1), this.drawinAreaWidth);
                    final Genotype g = ctx.getGenotype(sample.sampleName);
                    final Element rect = element("rect");
                    rect.setAttribute("class", "variant" + (g == null ? "" : g.getType().name()));
                    rect.setAttribute("x", format(x1));
                    rect.setAttribute("y", format(y_top_region));
                    rect.setAttribute("width", format(x2 - x1));
                    rect.setAttribute("height", format(curr_y - y_top_region));
                    rect.appendChild(element("title", niceIntFormat.format(ctx.getStart()) + "-" + niceIntFormat.format(ctx.getEnd()) + " " + ctx.getReference().getDisplayString()));
                    regionRoot.insertBefore(rect, regionRoot.getFirstChild());
                }
                iter.close();
            }
        }
        region.height = curr_y - region.y;
    }
    sample.height = curr_y - sample.y;
    final Element frame = element("rect");
    frame.setAttribute("class", "frame");
    frame.setAttribute("x", format(0));
    frame.setAttribute("y", format(sample.getY()));
    frame.setAttribute("width", format(drawinAreaWidth));
    frame.setAttribute("height", format(sample.getHeight()));
    sampleRoot.appendChild(frame);
    return sampleRoot;
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) Result(javax.xml.transform.Result) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) StaticCodeExtractor(com.github.lindenb.jvarkit.lang.StaticCodeExtractor) StringUtil(htsjdk.samtools.util.StringUtil) Document(org.w3c.dom.Document) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) DocumentFragment(org.w3c.dom.DocumentFragment) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) SmartComparator(com.github.lindenb.jvarkit.lang.SmartComparator) Stream(java.util.stream.Stream) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) CoordMath(htsjdk.samtools.util.CoordMath) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IntStream(java.util.stream.IntStream) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BiPredicate(java.util.function.BiPredicate) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) SamReader(htsjdk.samtools.SamReader) File(java.io.File) Element(org.w3c.dom.Element) TreeMap(java.util.TreeMap) DocumentBuilder(javax.xml.parsers.DocumentBuilder) TransformerFactory(javax.xml.transform.TransformerFactory) CigarElement(htsjdk.samtools.CigarElement) Element(org.w3c.dom.Element) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) DocumentFragment(org.w3c.dom.DocumentFragment) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) TreeMap(java.util.TreeMap) CigarElement(htsjdk.samtools.CigarElement) Cigar(htsjdk.samtools.Cigar)

Example 64 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class BamStats05 method readBedFile.

private Map<String, List<SimpleInterval>> readBedFile(final Path bedFile) throws IOException {
    final Map<String, List<SimpleInterval>> gene2interval = new TreeMap<String, List<SimpleInterval>>();
    BufferedReader bedIn = null;
    try {
        bedIn = IOUtils.openPathForBufferedReading(bedFile);
        final BedLineCodec codec = new BedLineCodec();
        String line = null;
        while ((line = bedIn.readLine()) != null) {
            if (StringUtils.isBlank(line) || line.startsWith("#"))
                continue;
            final BedLine bedLine = codec.decode(line);
            if (bedLine == null)
                continue;
            if (bedLine.getColumnCount() < 4) {
                throw new IOException("bad bed line  (expected 4 columns CHROM/START/END/GENE) in " + line + " " + bedFile);
            }
            final String chrom = bedLine.getContig();
            final int chromStart1 = bedLine.getStart();
            final int chromEnd1 = bedLine.getEnd();
            final String gene = bedLine.get(3);
            if (StringUtils.isBlank(gene))
                throw new IOException("bad bed gene in " + line + " " + bedFile);
            List<SimpleInterval> intervals = gene2interval.get(gene);
            if (intervals == null) {
                intervals = new ArrayList<>();
                gene2interval.put(gene, intervals);
            } else if (!intervals.get(0).getContig().equals(chrom)) {
                throw new IOException("more than one chromosome for gene:" + gene + " " + intervals.get(0) + " and " + bedLine.toInterval());
            } else {
                if (!this.mergeOverlapping) {
                    intervals.stream().filter(R -> R.overlaps(bedLine)).findFirst().ifPresent(R -> {
                        throw new IllegalArgumentException("overlapping region: " + bedLine + " and " + R + "\nUse option --merge to merge overlapping regions.");
                    });
                }
            }
            intervals.add(new SimpleInterval(chrom, chromStart1, chromEnd1));
            if (this.mergeOverlapping) {
                intervals.sort((A, B) -> Integer.compare(A.getStart(), B.getStart()));
                int x = 0;
                while (x + 1 < intervals.size()) {
                    final SimpleInterval i1 = intervals.get(x + 0);
                    final SimpleInterval i2 = intervals.get(x + 1);
                    if (i1.overlaps(i2)) {
                        intervals.remove(x + 1);
                        intervals.set(x + 0, i1.merge(i2));
                    } else {
                        x++;
                    }
                }
            }
        }
        bedIn.close();
        return gene2interval;
    } finally {
        CloserUtil.close(bedIn);
    }
}
Also used : ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) DiscreteMedian(com.github.lindenb.jvarkit.math.DiscreteMedian) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) TreeMap(java.util.TreeMap) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IOException(java.io.IOException) TreeMap(java.util.TreeMap) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Example 65 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class BamXtremDepth method bitSetToLocatables.

private List<Locatable> bitSetToLocatables(final SAMSequenceRecord ssr, final BitSet bitSet) {
    final List<Locatable> L = new ArrayList<>();
    int i = 0;
    while (i < bitSet.size()) {
        i = bitSet.nextSetBit(i);
        if (i == -1)
            break;
        int j = i + 1;
        while (j < bitSet.size() && bitSet.get(j)) {
            j++;
        }
        // bitSet is zero based
        L.add(new SimpleInterval(ssr.getContig(), i + 1, j));
        i = j;
    }
    return L;
}
Also used : ArrayList(java.util.ArrayList) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)71 ArrayList (java.util.ArrayList)49 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)47 List (java.util.List)47 Locatable (htsjdk.samtools.util.Locatable)46 Path (java.nio.file.Path)46 Parameter (com.beust.jcommander.Parameter)43 Program (com.github.lindenb.jvarkit.util.jcommander.Program)43 Logger (com.github.lindenb.jvarkit.util.log.Logger)43 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)39 Collectors (java.util.stream.Collectors)38 Set (java.util.Set)37 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)36 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)35 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)35 SAMFileHeader (htsjdk.samtools.SAMFileHeader)34 CloserUtil (htsjdk.samtools.util.CloserUtil)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)33 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)32 SamReader (htsjdk.samtools.SamReader)32