Search in sources :

Example 36 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class Biostar480685 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader in = null;
    SAMFileWriter out = null;
    try {
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.faidx != null) {
            srf.referenceSequence(this.faidx);
            writingBamArgs.setReferencePath(this.faidx);
        }
        final String input = oneFileOrNull(args);
        if (input == null) {
            in = srf.open(SamInputResource.of(stdin()));
        } else {
            in = srf.open(SamInputResource.of(input));
        }
        final SAMFileHeader header = in.getFileHeader();
        if (!(header.getSortOrder().equals(SAMFileHeader.SortOrder.unsorted) || header.getSortOrder().equals(SAMFileHeader.SortOrder.queryname))) {
            LOG.error("input should be sorted with 'samtools sort -n' or 'samtools collate' but got " + header.getSortOrder());
            return -1;
        }
        final ReadClipper clipper = new ReadClipper();
        header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
        final SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getGitHash());
        JVarkitVersion.getInstance().addMetaData(this, header);
        out = this.writingBamArgs.openSamWriter(this.outputFile, header, true);
        try (CloseableIterator<List<SAMRecord>> iter = new EqualIterator<>(in.iterator(), (A, B) -> A.getReadName().compareTo(B.getReadName()))) {
            while (iter.hasNext()) {
                final List<SAMRecord> buffer = iter.next();
                int read1_idx = -1;
                int read2_idx = -1;
                for (int i = 0; i < buffer.size(); i++) {
                    final SAMRecord rec = buffer.get(i);
                    if (!rec.getReadPairedFlag())
                        continue;
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.getMateUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getFirstOfPairFlag()) {
                        read1_idx = i;
                    } else if (rec.getSecondOfPairFlag()) {
                        read2_idx = i;
                    }
                }
                if (read1_idx == -1 || read2_idx == -1 || read1_idx == read2_idx)
                    continue;
                final SAMRecord rec1a = buffer.get(read1_idx);
                final SAMRecord rec2a = buffer.get(read2_idx);
                if (!rec1a.overlaps(rec2a))
                    continue;
                final int chromStart = Math.max(rec1a.getStart(), rec2a.getStart());
                final int chromEnd = Math.min(rec1a.getEnd(), rec2a.getEnd());
                if (chromStart > chromEnd)
                    continue;
                final SimpleInterval rgn = new SimpleInterval(rec1a.getContig(), chromStart, chromEnd);
                final SAMRecord rec1b = clipper.clip(rec1a, rgn);
                if (rec1b == null || rec1b.getReadUnmappedFlag())
                    continue;
                final SAMRecord rec2b = clipper.clip(rec2a, rgn);
                if (rec2b == null || rec2b.getReadUnmappedFlag())
                    continue;
                rec1b.setAttribute("PG", prg.getId());
                rec2b.setAttribute("PG", prg.getId());
                rec1b.setAlignmentStart(chromStart);
                rec1b.setMateAlignmentStart(rec2b.getAlignmentStart());
                rec2b.setAlignmentStart(chromStart);
                rec2b.setMateAlignmentStart(rec1b.getAlignmentStart());
                rec1b.setAttribute("MC", rec2b.getCigarString());
                rec2b.setAttribute("MC", rec1b.getCigarString());
                rec1b.setAttribute("NM", null);
                rec2b.setAttribute("NM", null);
                buffer.set(read1_idx, rec1b);
                buffer.set(read2_idx, rec2b);
                for (SAMRecord rec : buffer) {
                    out.addAlignment(rec);
                }
            }
        }
        in.close();
        in = null;
        out.close();
        out = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(out);
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) ReadClipper(com.github.lindenb.jvarkit.tools.pcr.ReadClipper) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 37 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class BedCluster method mergeBedRecords.

/**
 * merge overlapping bed records
 */
private List<SimpleInterval> mergeBedRecords(final List<SimpleInterval> src) {
    if (!this.merge_bed_records)
        return src;
    final List<SimpleInterval> list = new ArrayList<>(src);
    Collections.sort(list, defaultIntervalCmp);
    int i = 0;
    while (i + 1 < list.size()) {
        final SimpleInterval r1 = list.get(i);
        final SimpleInterval r2 = list.get(i + 1);
        if (r1.overlaps(r2)) {
            list.remove(i + 1);
            list.set(i, r1.merge(r2));
        } else {
            i++;
        }
    }
    return list;
}
Also used : ArrayList(java.util.ArrayList) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Example 38 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class BamMatrix method doWork.

@Override
public int doWork(final List<String> args) {
    if (pixel_size < 1) {
        LOG.error("pixel size is too small (" + this.pixel_size + ")");
        return -1;
    }
    if (StringUtils.isBlank(region2Str)) {
        this.region2Str = region1Str;
    }
    try {
        final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        if (this.faidx != null)
            srf.referenceSequence(this.faidx);
        final String inputX;
        final String inputY;
        if (args.size() == 1) {
            inputX = args.get(0);
            inputY = null;
        } else if (args.size() == 2) {
            inputX = args.get(0);
            inputY = args.get(1);
        } else {
            LOG.error("illegal number of arguments.");
            return -1;
        }
        this.samReaderX = srf.open(SamInputResource.of(inputX));
        if (!this.samReaderX.hasIndex()) {
            LOG.error("Input " + inputX + " is not indexed");
            return -1;
        }
        this.dict = SequenceDictionaryUtils.extractRequired(this.samReaderX.getFileHeader());
        if (inputY == null) {
            this.samReaderY = srf.open(SamInputResource.of(inputY));
            if (!this.samReaderY.hasIndex()) {
                LOG.error("Input " + inputY + " is not indexed");
                return -1;
            }
            SequenceUtil.assertSequenceDictionariesEqual(SequenceDictionaryUtils.extractRequired(this.samReaderY.getFileHeader()), this.dict);
        } else {
            this.samReaderY = this.samReaderX;
        }
        final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(this.dict);
        final Function<String, Optional<SimpleInterval>> intervalParser = IntervalParserFactory.newInstance().dictionary(dict).enableWholeContig().make();
        this.userIntervalX = intervalParser.apply(this.region1Str).orElseThrow(IntervalParserFactory.exception(this.region1Str));
        this.userIntervalY = intervalParser.apply(this.region2Str).orElseThrow(IntervalParserFactory.exception(this.region2Str));
        // adjust intervals so they have the same length
        if (this.userIntervalX.getLengthOnReference() > this.userIntervalY.getLengthOnReference()) {
            final int mid = this.userIntervalY.getStart() + this.userIntervalY.getLengthOnReference() / 2;
            final int start = Math.max(1, mid - this.userIntervalX.getLengthOnReference() / 2);
            this.userIntervalY = new SimpleInterval(this.userIntervalY.getContig(), start, start + this.userIntervalX.getLengthOnReference());
            LOG.warn("Adjusting interval Y to " + this.userIntervalY + " so both intervals have the same length");
        } else if (this.userIntervalY.getLengthOnReference() > this.userIntervalX.getLengthOnReference()) {
            final int mid = this.userIntervalX.getStart() + this.userIntervalX.getLengthOnReference() / 2;
            final int start = Math.max(1, mid - this.userIntervalY.getLengthOnReference() / 2);
            this.userIntervalX = new SimpleInterval(this.userIntervalX.getContig(), start, start + this.userIntervalY.getLengthOnReference());
            LOG.warn("Adjusting interval X to " + this.userIntervalX + " so both intervals have the same length");
        }
        LOG.info("One pixel is " + (this.userIntervalX.getLengthOnReference() / (double) matrix_size) + " bases");
        final int distance = Math.max(this.userIntervalX.getLengthOnReference(), this.userIntervalY.getLengthOnReference());
        final double pixel2base = distance / (double) matrix_size;
        short max_count = 1;
        final short[] counts = new short[this.matrix_size * this.matrix_size];
        final ReadCounter counter = new MemoryReadCounter();
        /* loop over each pixel 1st axis */
        for (int pixY = 0; pixY < this.matrix_size; pixY++) {
            final int start1 = (int) (this.userIntervalY.getStart() + pixY * pixel2base);
            final int end1 = start1 + (int) pixel2base;
            final Interval qy = new Interval(this.userIntervalY.getContig(), start1, end1);
            if (!qy.overlaps(this.userIntervalY))
                continue;
            final Set<String> set1 = counter.getNamesMatching(1, qy);
            if (set1.isEmpty())
                continue;
            /* loop over each pixel 2nd axis */
            for (int pixX = 0; pixX < this.matrix_size; pixX++) {
                final int start2 = (int) (this.userIntervalX.getStart() + pixX * pixel2base);
                final int end2 = start2 + (int) pixel2base;
                final Interval qx = new Interval(this.userIntervalX.getContig(), start2, end2);
                if (!qx.overlaps(this.userIntervalX))
                    continue;
                if (!validateDisance(qy, qx))
                    continue;
                final int count_common;
                if (qx.compareTo(qy) == 0) {
                    count_common = set1.size();
                } else {
                    final HashSet<String> common = new HashSet<>(set1);
                    common.retainAll(counter.getNamesMatching(0, qx));
                    count_common = common.size();
                }
                final short count = count_common > Short.MAX_VALUE ? Short.MAX_VALUE : (short) count_common;
                max_count = (short) Math.max(count, max_count);
                counts[pixY * this.matrix_size + pixX] = count;
            }
        }
        counter.dispose();
        final int font_size = 10;
        final int cov_height = (this.hide_coverage ? 0 : 50);
        final int gene_height = 25;
        final int margin = font_size + cov_height + (this.gtfPath == null ? 0 : gene_height);
        final Insets margins = new Insets(margin, margin, 10, 10);
        final Dimension drawingAreaDim = new Dimension(this.matrix_size + margins.left + margins.right, this.matrix_size + margins.top + margins.bottom);
        final BufferedImage img = new BufferedImage(drawingAreaDim.width, drawingAreaDim.height, BufferedImage.TYPE_INT_RGB);
        final Graphics2D g = img.createGraphics();
        g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
        g.setColor(Color.WHITE);
        g.fillRect(0, 0, drawingAreaDim.width, drawingAreaDim.height);
        // draw sample
        final Hershey herschey = new Hershey();
        final String sampleX = samReaderX.getFileHeader().getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(inputX);
        final String sampleY = (samReaderX == samReaderY ? sampleX : samReaderX.getFileHeader().getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(inputY));
        final String sample = (sampleX.equals(sampleY) ? sampleX : String.join(" ", sampleX, sampleY));
        g.setColor(Color.DARK_GRAY);
        herschey.paint(g, sample, new Rectangle2D.Double(0, 1, margins.left - 1, font_size));
        for (int side = 0; side < 2 && !StringUtils.isBlank(this.highlightPath); ++side) {
            final int curr_side = side;
            final SimpleInterval r = (side == 0 ? this.userIntervalX : this.userIntervalY);
            final BedLineCodec bedCodec = new BedLineCodec();
            final Composite oldComposite = g.getComposite();
            g.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.3f));
            try (BufferedReader br = IOUtils.openURIForBufferedReading(this.highlightPath)) {
                br.lines().filter(L -> !(StringUtils.isBlank(L) || L.startsWith("#"))).map(L -> bedCodec.decode(L)).filter(B -> B != null).filter(K -> converter.apply(K.getContig()) != null && r.getContig().equals(converter.apply(K.getContig()))).filter(K -> CoordMath.overlaps(K.getStart(), K.getEnd(), r.getStart(), r.getEnd())).map(E -> new Interval(converter.apply(E.getContig()), E.getStart() + 1, E.getEnd())).filter(E -> CoordMath.overlaps(E.getStart(), E.getEnd(), r.getStart(), r.getEnd())).map(E -> new Interval(E.getContig(), Math.max(r.getStart(), E.getStart()), Math.min(r.getEnd(), E.getEnd()))).forEach(E -> {
                    double d = ((E.getStart() - r.getStart()) / (double) r.getLengthOnReference()) * matrix_size;
                    double dL = ((E.getLengthOnReference()) / (double) r.getLengthOnReference()) * matrix_size;
                    g.setColor(Color.YELLOW);
                    if (curr_side == 0) {
                        g.fill(new Rectangle2D.Double(d, 0, dL, margins.left));
                    } else {
                        g.fill(new Rectangle2D.Double(0, d, margins.top, dL));
                    }
                });
            }
            g.setComposite(oldComposite);
        }
        g.translate(margins.left, margins.top);
        final double logMaxV = Math.log(max_count);
        for (int pix1 = 0; pix1 < this.matrix_size; pix1++) {
            for (int pix2 = 0; pix2 < this.matrix_size; pix2++) {
                final short count = counts[pix1 * this.matrix_size + pix2];
                if (count == 0 || count < this.min_common_names)
                    continue;
                final int gray;
                switch(color_scale) {
                    case LINEAR:
                        gray = 255 - (int) (255 * (count / (double) max_count));
                        break;
                    case LOG:
                        gray = 255 - (int) (255 * ((Math.log(count)) / logMaxV));
                        break;
                    default:
                        throw new IllegalStateException(color_scale.name());
                }
                g.setColor(new Color(gray, 0, 0));
                g.fill(new Rectangle2D.Double(pix1 - pixel_size / 2.0, pix2 - pixel_size / 2.0, pixel_size, pixel_size));
            }
        }
        // draw frame
        g.setColor(Color.GRAY);
        g.drawRect(0, 0, this.matrix_size, this.matrix_size);
        g.translate(-margins.left, -margins.top);
        // used to plot depth
        final double[] coverage = new double[matrix_size];
        final List<SimpleInterval> exonsList;
        if (this.gtfPath == null) {
            exonsList = Collections.emptyList();
        } else {
            try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
                gtfReader.setContigNameConverter(converter);
                exonsList = gtfReader.getAllGenes().stream().filter(K -> K.overlaps(this.userIntervalX) || K.overlaps(this.userIntervalY)).flatMap(G -> G.getTranscripts().stream()).filter(T -> T.hasExon()).flatMap(K -> K.getExons().stream()).filter(E -> E.overlaps(this.userIntervalX) || E.overlaps(this.userIntervalY)).map(E -> new SimpleInterval(E)).collect(Collectors.toSet()).stream().collect(Collectors.toList());
            }
        }
        for (int side = 0; side < 2; ++side) {
            final SimpleInterval r = (side == 0 ? this.userIntervalX : this.userIntervalY);
            final AffineTransform oldtr = g.getTransform();
            AffineTransform tr;
            if (side == 0) {
                // horizonal axis
                tr = AffineTransform.getTranslateInstance(margins.left, 1);
            } else {
                // vertical
                tr = AffineTransform.getTranslateInstance(margins.left, margins.top);
                tr.concatenate(AffineTransform.getRotateInstance(Math.PI / 2.0));
            }
            g.setTransform(tr);
            // calculate coverage , do this only once if regionX==regionY
            if (!hide_coverage && !(side == 1 && this.userIntervalX.equals(this.userIntervalY))) {
                Arrays.fill(coverage, 0);
                final int[] count = new int[this.matrix_size];
                final IntervalList intervalList = new IntervalList(this.dict);
                intervalList.add(new Interval(r));
                try (final SamLocusIterator sli = new SamLocusIterator(this.samReaderX, intervalList, true)) {
                    while (sli.hasNext()) {
                        final LocusInfo locusInfo = sli.next();
                        final int pos = locusInfo.getPosition();
                        if (pos < r.getStart() || pos > r.getEnd())
                            continue;
                        final int depth = locusInfo.getRecordAndOffsets().size();
                        final int array_index = (int) (((pos - r.getStart()) / (double) r.getLengthOnReference()) * matrix_size);
                        coverage[array_index] += depth;
                        count[array_index]++;
                    }
                }
                for (int i = 0; i < coverage.length; ++i) {
                    if (count[i] == 0)
                        continue;
                    coverage[i] /= count[i];
                }
            }
            // draw ruler
            int y = 0;
            if (!this.hide_coverage) {
                final double max_cov = Arrays.stream(coverage).max().orElse(1);
                final GeneralPath gp = new GeneralPath();
                gp.moveTo(0, cov_height);
                for (int x = 0; x < coverage.length; ++x) {
                    gp.lineTo(x, y + cov_height - (coverage[x] / max_cov) * cov_height);
                }
                gp.lineTo(coverage.length, cov_height);
                gp.closePath();
                g.setColor(Color.GRAY);
                g.fill(gp);
                // string for max cov
                String label = StringUtils.niceInt((int) Arrays.stream(coverage).max().orElse(9));
                g.setColor(Color.DARK_GRAY);
                herschey.paint(g, label, new Rectangle2D.Double(matrix_size - label.length() * font_size, y, label.length() * font_size, font_size));
                y += cov_height;
            }
            // draw label
            g.setColor(Color.DARK_GRAY);
            // label is 'start position'
            String label = StringUtils.niceInt(r.getStart());
            herschey.paint(g, label, new Rectangle2D.Double(0, y, label.length() * font_size, font_size));
            // label is 'end position'
            label = StringUtils.niceInt(r.getEnd());
            herschey.paint(g, label, new Rectangle2D.Double(matrix_size - (label.length() * font_size), y, label.length() * font_size, font_size));
            // label is 'chromosome and length'
            label = r.getContig() + " ( " + StringUtils.niceInt(r.getLengthOnReference()) + " bp )";
            herschey.paint(g, label, new Rectangle2D.Double(matrix_size / 2.0 - (label.length() * font_size) / 2.0, y, label.length() * font_size, font_size));
            y += font_size;
            // draw genes
            if (this.gtfPath != null) {
                final double curr_y = y;
                double midy = y + gene_height / 2.0;
                g.setColor(Color.CYAN);
                g.draw(new Line2D.Double(0, midy, matrix_size, midy));
                exonsList.stream().filter(E -> E.overlaps(r)).map(E -> new SimpleInterval(E.getContig(), Math.max(r.getStart(), E.getStart()), Math.min(r.getEnd(), E.getEnd()))).forEach(E -> {
                    final double x = ((E.getStart() - r.getStart()) / (double) r.getLengthOnReference()) * matrix_size;
                    final double width = ((E.getLengthOnReference()) / (double) r.getLengthOnReference()) * matrix_size;
                    g.setColor(Color.BLUE);
                    g.fill(new Rectangle2D.Double(x, curr_y, width, gene_height));
                });
            }
            g.setTransform(oldtr);
        }
        g.dispose();
        try {
            if (this.outputFile == null) {
                ImageIO.write(img, "PNG", stdout());
            } else {
                ImageIO.write(img, this.outputFile.getName().endsWith(".png") ? "PNG" : "JPG", this.outputFile);
            }
        } catch (final IOException err) {
            throw new RuntimeIOException(err);
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.samReaderX);
        CloserUtil.close(this.samReaderY);
    }
}
Also used : Color(java.awt.Color) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Rectangle2D(java.awt.geom.Rectangle2D) RenderingHints(java.awt.RenderingHints) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) AlignmentBlock(htsjdk.samtools.AlignmentBlock) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) ImageIO(javax.imageio.ImageIO) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Composite(java.awt.Composite) BufferedImage(java.awt.image.BufferedImage) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) LocusInfo(htsjdk.samtools.util.SamLocusIterator.LocusInfo) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) AffineTransform(java.awt.geom.AffineTransform) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) Dimension(java.awt.Dimension) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) CoordMath(htsjdk.samtools.util.CoordMath) Optional(java.util.Optional) GeneralPath(java.awt.geom.GeneralPath) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Insets(java.awt.Insets) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) AlphaComposite(java.awt.AlphaComposite) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Graphics2D(java.awt.Graphics2D) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Line2D(java.awt.geom.Line2D) Locatable(htsjdk.samtools.util.Locatable) Hershey(com.github.lindenb.jvarkit.util.hershey.Hershey) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) IntervalList(htsjdk.samtools.util.IntervalList) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) File(java.io.File) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) SamInputResource(htsjdk.samtools.SamInputResource) QueryInterval(htsjdk.samtools.QueryInterval) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) Insets(java.awt.Insets) GeneralPath(java.awt.geom.GeneralPath) Line2D(java.awt.geom.Line2D) BufferedImage(java.awt.image.BufferedImage) IntervalList(htsjdk.samtools.util.IntervalList) LocusInfo(htsjdk.samtools.util.SamLocusIterator.LocusInfo) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Optional(java.util.Optional) Composite(java.awt.Composite) AlphaComposite(java.awt.AlphaComposite) Color(java.awt.Color) Hershey(com.github.lindenb.jvarkit.util.hershey.Hershey) Rectangle2D(java.awt.geom.Rectangle2D) Dimension(java.awt.Dimension) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) Graphics2D(java.awt.Graphics2D) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) SamLocusIterator(htsjdk.samtools.util.SamLocusIterator) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) BufferedReader(java.io.BufferedReader) AffineTransform(java.awt.geom.AffineTransform) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval)

Example 39 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class VcfLoopOverGenes method doWork.

@SuppressWarnings("resource")
@Override
public int doWork(final List<String> args) {
    PrintWriter pw = null;
    VCFReader vcfFileReader = null;
    CloseableIterator<VariantContext> iter = null;
    CloseableIterator<GeneLoc> iter2 = null;
    BufferedReader br = null;
    ArchiveFactory archive = null;
    try {
        final Path vcf = Paths.get(oneAndOnlyOneFile(args));
        vcfFileReader = VCFReaderFactory.makeDefault().open(vcf, (this.geneFile != null || !StringUtil.isBlank(this.regionStr)));
        this.dictionary = vcfFileReader.getHeader().getSequenceDictionary();
        if (this.dictionary == null) {
            throw new JvarkitException.VcfDictionaryMissing(vcf);
        }
        final VcfTools tools = new VcfTools(vcfFileReader.getHeader());
        if (!this.prefix.isEmpty() && !this.prefix.endsWith(".")) {
            this.prefix += ".";
        }
        if (this.geneFile == null) {
            final SortingCollection<GeneLoc> sortingCollection = SortingCollection.newInstance(GeneLoc.class, new GeneLocCodec(), (A, B) -> A.compareTo(B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            sortingCollection.setDestructiveIteration(true);
            if (StringUtil.isBlank(this.regionStr)) {
                iter = vcfFileReader.iterator();
            } else {
                final SimpleInterval interval = IntervalParserFactory.newInstance().dictionary(this.dictionary).enableWholeContig().make().apply(this.regionStr).orElseThrow(IntervalParserFactory.exception(this.regionStr));
                iter = vcfFileReader.query(interval.getContig(), interval.getStart(), interval.getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(vcfFileReader.getHeader()).logger(LOG);
            if (this.splitMethod.equals(SplitMethod.Annotations)) {
                while (iter.hasNext()) {
                    final VariantContext ctx = progress.watch(iter.next());
                    for (final AnnPredictionParser.AnnPrediction pred : tools.getAnnPredictionParser().getPredictions(ctx)) {
                        if (this.snpEffNoIntergenic && pred.isIntergenicRegion()) {
                            continue;
                        }
                        if (!StringUtil.isBlank(pred.getGeneName())) {
                            sortingCollection.add(create(ctx, pred.getGeneName(), SourceType.ANN_GeneName));
                        }
                        if (!StringUtil.isBlank(pred.getGeneId())) {
                            sortingCollection.add(create(ctx, pred.getGeneId(), SourceType.ANN_GeneID));
                        }
                        if (!StringUtil.isBlank(pred.getFeatureId())) {
                            sortingCollection.add(create(ctx, pred.getFeatureId(), SourceType.ANN_FeatureID));
                        }
                    }
                    for (final VepPredictionParser.VepPrediction pred : tools.getVepPredictionParser().getPredictions(ctx)) {
                        if (!StringUtil.isBlank(pred.getGene())) {
                            sortingCollection.add(create(ctx, pred.getGene(), SourceType.VEP_Gene));
                        }
                        if (!StringUtil.isBlank(pred.getFeature())) {
                            sortingCollection.add(create(ctx, pred.getFeature(), SourceType.VEP_Feature));
                        }
                        if (!StringUtil.isBlank(pred.getSymbol())) {
                            sortingCollection.add(create(ctx, pred.getSymbol(), SourceType.VEP_Symbol));
                        }
                        if (!StringUtil.isBlank(pred.getHgncId())) {
                            sortingCollection.add(create(ctx, pred.getHgncId(), SourceType.VEP_HgncId));
                        }
                    }
                }
            } else /**
             * split VCF per sliding window of variants
             */
            if (this.splitMethod.equals(SplitMethod.VariantSlidingWindow)) {
                if (this.variantsWinCount < 1) {
                    LOG.error("Bad value for variantsWinCount");
                    return -1;
                }
                if (this.variantsWinShift < 1 || this.variantsWinShift > this.variantsWinCount) {
                    LOG.error("Bad value for variantsWinShift");
                    return -1;
                }
                final List<VariantContext> buffer = new ArrayList<>(this.variantsWinCount);
                /**
                 * routine to dump buffer into sorting collection
                 */
                final Runnable dumpBuffer = () -> {
                    if (buffer.isEmpty())
                        return;
                    final String contig = buffer.get(0).getContig();
                    final int chromStart = buffer.stream().mapToInt(CTX -> CTX.getStart()).min().getAsInt();
                    // use last of start too
                    final int chromEnd0 = buffer.stream().mapToInt(CTX -> CTX.getStart()).max().getAsInt();
                    // final int chromEnd1 = buffer.stream().mapToInt(CTX->CTX.getEnd()).max().getAsInt();
                    final String identifier = contig + "_" + String.format(NUM_FORMAT, chromStart) + "_" + String.format(NUM_FORMAT, chromEnd0);
                    for (final VariantContext ctx : buffer) {
                        sortingCollection.add(create(ctx, identifier, SourceType.SlidingVariants));
                    }
                };
                while (iter.hasNext()) {
                    VariantContext ctx = progress.watch(iter.next());
                    /* reduce the memory footprint for this context */
                    ctx = new VariantContextBuilder(ctx).genotypes(Collections.emptyList()).unfiltered().rmAttributes(new ArrayList<>(ctx.getAttributes().keySet())).make();
                    if (!buffer.isEmpty() && !buffer.get(0).getContig().equals(ctx.getContig())) {
                        dumpBuffer.run();
                        buffer.clear();
                    }
                    buffer.add(ctx);
                    if (buffer.size() >= this.variantsWinCount) {
                        dumpBuffer.run();
                        final int fromIndex = Math.min(this.variantsWinShift, buffer.size());
                        buffer.subList(0, fromIndex).clear();
                    }
                }
                dumpBuffer.run();
                buffer.clear();
            } else if (this.splitMethod.equals(SplitMethod.ContigSlidingWindow)) {
                if (this.contigWinLength < 1) {
                    LOG.error("Bad value for contigWinCount");
                    return -1;
                }
                if (this.contigWinShift < 1 || this.contigWinShift > this.contigWinLength) {
                    LOG.error("Bad value for contigWinShift");
                    return -1;
                }
                while (iter.hasNext()) {
                    VariantContext ctx = progress.watch(iter.next());
                    /* reduce the memory footprint for this context */
                    ctx = new VariantContextBuilder(ctx).genotypes(Collections.emptyList()).unfiltered().rmAttributes(new ArrayList<>(ctx.getAttributes().keySet())).make();
                    int start = 0;
                    while (start <= ctx.getStart()) {
                        if (start + this.contigWinLength >= ctx.getStart()) {
                            final int chromStart = start;
                            final int chromEnd0 = start + this.contigWinLength;
                            final String identifier = ctx.getContig() + "_" + String.format(NUM_FORMAT, chromStart) + "_" + String.format(NUM_FORMAT, chromEnd0);
                            sortingCollection.add(create(ctx, identifier, SourceType.SlidingContig));
                        }
                        start += this.contigWinShift;
                    }
                }
            } else {
                throw new IllegalStateException("No such method: " + this.splitMethod);
            }
            sortingCollection.doneAdding();
            progress.finish();
            iter.close();
            iter = null;
            pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
            iter2 = sortingCollection.iterator();
            final EqualRangeIterator<GeneLoc> eqiter = new EqualRangeIterator<>(iter2, this.compareGeneName);
            int geneIdentifierId = 0;
            while (eqiter.hasNext()) {
                final List<GeneLoc> gene = eqiter.next();
                pw.print(gene.get(0).contig);
                pw.print('\t');
                // -1 for BED
                pw.print(gene.stream().mapToInt(G -> G.start).min().getAsInt() - 1);
                pw.print('\t');
                pw.print(gene.stream().mapToInt(G -> G.end).max().getAsInt());
                pw.print('\t');
                pw.print(this.prefix + String.format("%09d", ++geneIdentifierId));
                pw.print('\t');
                pw.print(gene.get(0).geneName);
                pw.print('\t');
                pw.print(gene.get(0).sourceType);
                pw.print('\t');
                pw.print(gene.size());
                pw.println();
            }
            pw.flush();
            pw.close();
            pw = null;
            eqiter.close();
            iter2.close();
            iter2 = null;
            sortingCollection.cleanup();
        } else {
            if (this.nJobs < 1) {
                this.nJobs = Math.max(1, Runtime.getRuntime().availableProcessors());
                LOG.info("setting njobs to " + this.nJobs);
            }
            final ExecutorService executorService;
            final List<Future<Integer>> futureResults;
            if (this.nJobs > 1) {
                executorService = new ThreadPoolExecutor(this.nJobs, this.nJobs, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
                futureResults = new ArrayList<>();
            } else {
                executorService = null;
                futureResults = Collections.emptyList();
            }
            if (this.outputFile == null) {
                LOG.error("When scanning a VCF with " + this.geneFile + ". Output file must be defined");
            }
            if (!this.exec.isEmpty()) {
                if (this.outputFile.getName().endsWith(".zip")) {
                    LOG.error("Cannot execute " + this.exec + " when saving to a zip.");
                    return -1;
                }
            }
            archive = ArchiveFactory.open(this.outputFile);
            PrintWriter manifest = this.deleteAfterCommand && !this.exec.isEmpty() ? // all files will be deleted, no manifest needed
            new PrintWriter(new NullOuputStream()) : archive.openWriter(this.prefix + "manifest.txt");
            br = IOUtils.openFileForBufferedReading(this.geneFile);
            final BedLineCodec bedCodec = new BedLineCodec();
            for (; ; ) {
                if (!futureResults.isEmpty()) {
                    int i = 0;
                    while (i < futureResults.size()) {
                        final Future<Integer> r = futureResults.get(i);
                        if (r.isCancelled()) {
                            LOG.error("Task was canceled. Break.");
                            return -1;
                        } else if (r.isDone()) {
                            futureResults.remove(i);
                            int rez = r.get();
                            if (rez != 0) {
                                LOG.error("Task Failed (" + rez + "). Break");
                            }
                        } else {
                            i++;
                        }
                    }
                }
                final String line = br.readLine();
                if (line == null)
                    break;
                if (line.startsWith("#") || line.isEmpty())
                    continue;
                final BedLine bedLine = bedCodec.decode(line);
                if (bedLine == null)
                    continue;
                // ID
                final String geneIdentifier = bedLine.get(3);
                // name
                final String geneName = bedLine.get(4);
                final SourceType sourceType = SourceType.valueOf(bedLine.get(5));
                final String filename = geneIdentifier;
                final String outputVcfName = (filename.startsWith(this.prefix) ? "" : this.prefix) + filename + ".vcf" + (this.compress ? ".gz" : "");
                LOG.info(bedLine.getContig() + ":" + bedLine.getStart() + "-" + bedLine.getEnd() + " length :" + (bedLine.getEnd() - bedLine.getStart()));
                if (bedLine.getEnd() - bedLine.getStart() > 1E6) {
                    LOG.warn("That's a large region ! " + bedLine);
                }
                OutputStream vcfOutputStream = null;
                VariantContextWriter vw = null;
                int countVariants = 0;
                final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(vcfFileReader.getHeader()).logger(LOG).prefix(geneName + " " + bedLine.getContig() + ":" + bedLine.getStart() + "-" + bedLine.getEnd());
                iter = vcfFileReader.query(bedLine.getContig(), bedLine.getStart(), bedLine.getEnd());
                while (iter.hasNext()) {
                    VariantContext ctx = progress.watch(iter.next());
                    switch(sourceType) {
                        case SlidingVariants:
                            {
                                // nothing
                                break;
                            }
                        case SlidingContig:
                            {
                                // nothing
                                break;
                            }
                        case ANN_GeneName:
                        case ANN_FeatureID:
                        case ANN_GeneID:
                            {
                                final List<String> preds = new ArrayList<>();
                                for (final AnnPredictionParser.AnnPrediction pred : tools.getAnnPredictionParser().getPredictions(ctx)) {
                                    final String predictionIdentifier;
                                    switch(sourceType) {
                                        case ANN_GeneName:
                                            predictionIdentifier = pred.getGeneName();
                                            break;
                                        case ANN_FeatureID:
                                            predictionIdentifier = pred.getFeatureId();
                                            break;
                                        case ANN_GeneID:
                                            predictionIdentifier = pred.getGeneId();
                                            break;
                                        default:
                                            throw new IllegalStateException(bedLine.toString());
                                    }
                                    if (StringUtil.isBlank(predictionIdentifier))
                                        continue;
                                    if (!geneName.equals(predictionIdentifier))
                                        continue;
                                    preds.add(pred.getOriginalAttributeAsString());
                                }
                                if (preds.isEmpty()) {
                                    ctx = null;
                                } else {
                                    ctx = new VariantContextBuilder(ctx).rmAttribute(tools.getAnnPredictionParser().getTag()).attribute(tools.getAnnPredictionParser().getTag(), preds).make();
                                }
                                break;
                            }
                        case VEP_Gene:
                        case VEP_Feature:
                        case VEP_Symbol:
                        case VEP_HgncId:
                            {
                                final List<String> preds = new ArrayList<>();
                                for (final VepPredictionParser.VepPrediction pred : tools.getVepPredictions(ctx)) {
                                    final String predictionIdentifier;
                                    switch(sourceType) {
                                        case VEP_Gene:
                                            predictionIdentifier = pred.getGene();
                                            break;
                                        case VEP_Feature:
                                            predictionIdentifier = pred.getFeature();
                                            break;
                                        case VEP_Symbol:
                                            predictionIdentifier = pred.getSymbol();
                                            break;
                                        case VEP_HgncId:
                                            predictionIdentifier = pred.getHgncId();
                                            break;
                                        default:
                                            throw new IllegalStateException(bedLine.toString());
                                    }
                                    if (StringUtil.isBlank(predictionIdentifier))
                                        continue;
                                    if (!geneName.equals(predictionIdentifier))
                                        continue;
                                    preds.add(pred.getOriginalAttributeAsString());
                                }
                                if (preds.isEmpty()) {
                                    ctx = null;
                                } else {
                                    ctx = new VariantContextBuilder(ctx).rmAttribute(tools.getVepPredictionParser().getTag()).attribute(tools.getVepPredictionParser().getTag(), preds).make();
                                }
                                break;
                            }
                        default:
                            throw new IllegalStateException(bedLine.toString());
                    }
                    if (ctx == null)
                        continue;
                    if (vcfOutputStream == null) {
                        LOG.info(filename);
                        manifest.println(outputVcfName);
                        final VCFHeader header = new VCFHeader(vcfFileReader.getHeader());
                        header.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_SPLITKEY, filename));
                        vcfOutputStream = archive.openOuputStream(outputVcfName);
                        vw = VCFUtils.createVariantContextWriterToOutputStream(vcfOutputStream);
                        vw.writeHeader(header);
                    }
                    countVariants++;
                    vw.add(ctx);
                    if (countVariants % 1000 == 0) {
                        LOG.info("Loading : " + geneIdentifier + " N=" + countVariants);
                    }
                }
                progress.finish();
                LOG.info(geneIdentifier + " N=" + countVariants);
                if (vcfOutputStream != null) {
                    vw.close();
                    vcfOutputStream.flush();
                    vcfOutputStream.close();
                    vw = null;
                    if (!this.exec.isEmpty()) {
                        final Callable<Integer> callable = () -> {
                            final File vcfOutFile = new File(this.outputFile, outputVcfName);
                            IOUtil.assertFileIsReadable(vcfOutFile);
                            final String vcfPath = vcfOutFile.getPath();
                            final StringTokenizer st = new StringTokenizer(this.exec);
                            final List<String> command = new ArrayList<>(1 + st.countTokens());
                            while (st.hasMoreTokens()) {
                                String token = st.nextToken().replaceAll("__PREFIX__", this.prefix).replaceAll("__CONTIG__", bedLine.getContig()).replaceAll("__CHROM__", bedLine.getContig()).replaceAll("__ID__", geneIdentifier).replaceAll("__NAME__", geneName).replaceAll("__START__", String.valueOf(bedLine.getStart())).replaceAll("__END__", String.valueOf(bedLine.getEnd())).replaceAll("__SOURCE__", sourceType.name()).replaceAll("__VCF__", vcfPath);
                                command.add(token);
                            }
                            LOG.info(command.stream().map(S -> "'" + S + "'").collect(Collectors.joining(" ")));
                            final ProcessBuilder pb = new ProcessBuilder(command);
                            pb.redirectErrorStream(true);
                            final Process p = pb.start();
                            final Thread stdoutThread = new Thread(() -> {
                                try {
                                    InputStream in = p.getInputStream();
                                    IOUtils.copyTo(in, stdout());
                                } catch (Exception err) {
                                    LOG.error(err);
                                }
                            });
                            stdoutThread.start();
                            int exitValue = p.waitFor();
                            if (exitValue != 0) {
                                LOG.error("Command failed (" + exitValue + "):" + String.join(" ", command));
                                return -1;
                            } else {
                                if (deleteAfterCommand) {
                                    if (!vcfOutFile.delete()) {
                                        LOG.warn("Cannot delete " + vcfOutFile);
                                    }
                                }
                                return 0;
                            }
                        };
                        if (executorService != null) {
                            final Future<Integer> rez = executorService.submit(callable);
                            futureResults.add(rez);
                        } else {
                            final int ret = callable.call();
                            if (ret != 0) {
                                LOG.error("Error with process (" + ret + ")");
                                return ret;
                            }
                        }
                    }
                } else {
                    manifest.println("#" + filename);
                    LOG.warn("No Variant Found for " + line);
                }
                iter.close();
            }
            ;
            if (executorService != null) {
                LOG.info("shutdown");
                executorService.shutdown();
                executorService.awaitTermination(365, TimeUnit.DAYS);
            }
            br.close();
            br = null;
            manifest.close();
            archive.close();
            archive = null;
            LOG.info("Done");
        }
        vcfFileReader.close();
        vcfFileReader = null;
        return 0;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        {
            CloserUtil.close(iter2);
            CloserUtil.close(iter);
            CloserUtil.close(pw);
            CloserUtil.close(vcfFileReader);
            CloserUtil.close(br);
            CloserUtil.close(archive);
        }
    }
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) Future(java.util.concurrent.Future) DataOutputStream(java.io.DataOutputStream) StringUtil(htsjdk.samtools.util.StringUtil) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Logger(com.github.lindenb.jvarkit.util.log.Logger) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) Callable(java.util.concurrent.Callable) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) StringTokenizer(java.util.StringTokenizer) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) SortingCollection(htsjdk.samtools.util.SortingCollection) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Paths(java.nio.file.Paths) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) BufferedReader(java.io.BufferedReader) Comparator(java.util.Comparator) Collections(java.util.Collections) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) List(java.util.List) ArrayList(java.util.ArrayList) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) File(java.io.File) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFReader(htsjdk.variant.vcf.VCFReader) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) IOException(java.io.IOException) StringTokenizer(java.util.StringTokenizer) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Example 40 with SimpleInterval

use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.

the class Biostar9501110 method findVariants.

private boolean findVariants(final SAMRecord record) {
    if (record.getReadUnmappedFlag()) {
        boolean keep = false;
        if (this.inverse_selection)
            keep = !keep;
        return keep;
    }
    final Locatable recloc = this.use_clip ? new SimpleInterval(record.getContig(), record.getUnclippedStart(), record.getUnclippedEnd()) : record;
    final Set<String> atts = new HashSet<>();
    int count_variant = 0;
    try (CloseableIterator<VariantContext> iter = this.bufferedVCFReader.query(recloc)) {
        while (iter.hasNext() && count_variant < this.min_num_variants) {
            final VariantContext ctx = iter.next();
            final FindVariantInSamRecord.Match match = this.findVariantInSamRecord.apply(record, ctx);
            if (match.getAllele().isPresent() && !match.getAllele().get().isReference()) {
                count_variant++;
                if (!StringUtils.isBlank(this.attribute)) {
                    char delim = '|';
                    final StringBuilder sb = new StringBuilder();
                    sb.append(ctx.getStart()).append(delim);
                    if (ctx.hasID())
                        sb.append(ctx.getID()).append(delim);
                    sb.append(ctx.getReference().getDisplayString()).append(delim);
                    sb.append(match.getAllele().get().getDisplayString());
                    atts.add(sb.toString());
                }
            }
        }
    }
    if (!atts.isEmpty()) {
        record.setAttribute(this.attribute, String.join(",", atts));
    }
    boolean keep = count_variant >= min_num_variants;
    if (this.inverse_selection)
        keep = !keep;
    return keep;
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) FindVariantInSamRecord(com.github.lindenb.jvarkit.samtools.FindVariantInSamRecord) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Locatable(htsjdk.samtools.util.Locatable) HashSet(java.util.HashSet)

Aggregations

SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)71 ArrayList (java.util.ArrayList)49 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)47 List (java.util.List)47 Locatable (htsjdk.samtools.util.Locatable)46 Path (java.nio.file.Path)46 Parameter (com.beust.jcommander.Parameter)43 Program (com.github.lindenb.jvarkit.util.jcommander.Program)43 Logger (com.github.lindenb.jvarkit.util.log.Logger)43 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)39 Collectors (java.util.stream.Collectors)38 Set (java.util.Set)37 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)36 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)35 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)35 SAMFileHeader (htsjdk.samtools.SAMFileHeader)34 CloserUtil (htsjdk.samtools.util.CloserUtil)34 CloseableIterator (htsjdk.samtools.util.CloseableIterator)33 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)32 SamReader (htsjdk.samtools.SamReader)32