Search in sources :

Example 76 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class SvToSVG method doWork.

@Override
public int doWork(final List<String> args) {
    /* parse interval */
    if (this.intervalStrList.isEmpty()) {
        LOG.error("interval(s) undefined");
        return -1;
    }
    this.drawinAreaWidth = Math.max(100, this.drawinAreaWidth);
    try {
        if (this.fastaFile != null) {
            this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.fastaFile);
        }
        if (this.vcfFile != null) {
            this.vcfFileReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
        }
        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(true);
        final DocumentBuilder db = dbf.newDocumentBuilder();
        this.document = db.newDocument();
        final List<Path> bamFiles = IOUtils.unrollPaths(args);
        if (bamFiles.isEmpty()) {
            LOG.error("bam(s) undefined");
            return -1;
        }
        final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.fastaFile);
        /* loop over each bam file */
        for (final Path bamFile : bamFiles) {
            try (final SamReader sr = srf.open(bamFile)) {
                final SAMFileHeader header = sr.getFileHeader();
                final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                final Function<String, Optional<SimpleInterval>> parser = IntervalParserFactory.newInstance().dictionary(dict).make();
                final Set<String> samples = header.getReadGroups().stream().map(G -> G.getSample()).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet());
                if (samples.size() != 1) {
                    LOG.error("expected on sample in " + bamFile + " but got " + samples.size() + " " + samples);
                    return -1;
                }
                final Sample sample = new Sample(samples.iterator().next());
                this.sampleList.add(sample);
                /* loop over each region for that sample */
                for (final String intervalStr : this.intervalStrList) {
                    LOG.info("scanning " + intervalStr + " for " + bamFile);
                    final SimpleInterval interval = parser.apply(intervalStr).orElseThrow(IntervalParserFactory.exception(intervalStr));
                    if (interval == null) {
                        LOG.error("Cannot parse " + intervalStr + " for " + bamFile);
                        return -1;
                    }
                    /* create new region */
                    final Sample.Region region = sample.new Region(interval);
                    if (!sample.regions.isEmpty() && region.getLengthOnReference() != sample.regions.get(0).getLengthOnReference()) {
                        LOG.warning("intervals should all have the same length");
                    }
                    sample.regions.add(region);
                    try (CloseableIterator<SAMRecord> iter = sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false)) {
                        while (iter.hasNext()) {
                            final SAMRecord record = iter.next();
                            boolean trace = record.getReadName().equals(DEBUG_READ);
                            if (!SAMRecordDefaultFilter.accept(record, this.min_mapq))
                                continue;
                            if (trace)
                                LOG.debug("got1");
                            final Cigar cigar = record.getCigar();
                            if (cigar == null || cigar.isEmpty())
                                continue;
                            if (trace)
                                LOG.debug("got2");
                            if (remove_simple_reads) {
                                if ((!cigar.isClipped() && SAMUtils.getOtherCanonicalAlignments(record).isEmpty() && (record.getReadPairedFlag() && record.getProperPairFlag())))
                                    continue;
                            }
                            if (!record.getContig().equals(interval.getContig()))
                                continue;
                            if (trace)
                                LOG.debug("got3");
                            final Sample.ShortRead shortRead = sample.new DefaultShortRead(region, record);
                            if (shortRead.getEnd() < interval.getStart())
                                continue;
                            if (shortRead.getStart() > interval.getEnd())
                                continue;
                            if (trace)
                                LOG.debug("got4" + record.getSAMString());
                            if (!sample.sampleName.equals(shortRead.getSampleName()))
                                continue;
                            region.beforePileup.add(shortRead);
                        }
                    }
                /* en iterator */
                }
            /* end loop interval */
            }
        /* end open SAM */
        }
        this.sampleList.forEach(S -> S.regions.forEach(R -> R.addSplitReads()));
        this.sampleList.forEach(S -> S.regions.forEach(R -> R.pileup()));
        /* sort per sample */
        this.sampleList.sort((A, B) -> A.sampleName.compareTo(B.sampleName));
        /* sort per region */
        this.sampleList.stream().forEach(SL -> SL.regions.sort((A, B) -> {
            final SmartComparator sc = new SmartComparator();
            int i = sc.compare(A.getContig(), B.getContig());
            if (i != 0)
                return i;
            return A.getStart() - B.getStart();
        }));
        buildDocument();
        final Transformer tr = TransformerFactory.newInstance().newTransformer();
        final Result result;
        if (this.outputFile != null) {
            result = new StreamResult(this.outputFile);
        } else {
            result = new StreamResult(stdout());
        }
        tr.transform(new DOMSource(this.document), result);
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(this.vcfFileReader);
        this.document = null;
    }
}
Also used : Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) Result(javax.xml.transform.Result) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) StaticCodeExtractor(com.github.lindenb.jvarkit.lang.StaticCodeExtractor) StringUtil(htsjdk.samtools.util.StringUtil) Document(org.w3c.dom.Document) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) DocumentFragment(org.w3c.dom.DocumentFragment) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) SmartComparator(com.github.lindenb.jvarkit.lang.SmartComparator) Stream(java.util.stream.Stream) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) CoordMath(htsjdk.samtools.util.CoordMath) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IntStream(java.util.stream.IntStream) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) SAMUtils(htsjdk.samtools.SAMUtils) Parameter(com.beust.jcommander.Parameter) Function(java.util.function.Function) ArrayList(java.util.ArrayList) BiPredicate(java.util.function.BiPredicate) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) SamReader(htsjdk.samtools.SamReader) File(java.io.File) Element(org.w3c.dom.Element) TreeMap(java.util.TreeMap) DocumentBuilder(javax.xml.parsers.DocumentBuilder) TransformerFactory(javax.xml.transform.TransformerFactory) DOMSource(javax.xml.transform.dom.DOMSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) StreamResult(javax.xml.transform.stream.StreamResult) Result(javax.xml.transform.Result) SamReader(htsjdk.samtools.SamReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Optional(java.util.Optional) StreamResult(javax.xml.transform.stream.StreamResult) SmartComparator(com.github.lindenb.jvarkit.lang.SmartComparator) Cigar(htsjdk.samtools.Cigar) DocumentBuilder(javax.xml.parsers.DocumentBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 77 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class WesCnvSvg method doWork.

@Override
public int doWork(final List<String> args) {
    XMLStreamWriter w = null;
    BufferedReader r = null;
    OutputStream fout = null;
    VCFReader vcfReader = null;
    try {
        this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
        final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
        final IntervalExtender extender = IntervalExtender.of(refDict, this.extendWhat);
        if (extender.isShriking()) {
            LOG.error("extend factor <1.0");
            return -1;
        }
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(refDict);
        final ContigDictComparator contigDictCompare = new ContigDictComparator(refDict);
        final List<CaptureInterval> userIntervals = this.intervalListProvider.stream().map(loc -> contigNameConverter.convertToSimpleInterval(loc).<RuntimeException>orElseThrow(() -> new RuntimeException(new JvarkitException.ContigNotFoundInDictionary(loc.getContig(), refDict)))).map(extender).collect(HtsCollectors.mergeIntervals()).map(L -> new CaptureInterval(L)).sorted(contigDictCompare.createLocatableComparator()).collect(Collectors.toCollection(ArrayList::new));
        if (userIntervals.isEmpty()) {
            LOG.error("no interval or bed defined");
            return -1;
        }
        this.countBasesToBeDisplayed = userIntervals.stream().mapToInt(R -> R.getLengthOnReference()).sum();
        if (this.countBasesToBeDisplayed < 1) {
            LOG.error("Nothing to display. BED count==0");
            return -1;
        } else {
            double x1 = 0;
            for (int i = 0; i < userIntervals.size(); ++i) {
                final CaptureInterval ci = userIntervals.get(i);
                ci.pixelx = x1;
                x1 += ci.getPixelWidth();
            }
        }
        /* distinct ordered contigs */
        final List<String> distinctContigs = userIntervals.stream().map(R -> R.getContig()).collect(Collectors.toSet()).stream().sorted(contigDictCompare).collect(Collectors.toList());
        final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidx);
        for (final Path bamFile : IOUtils.unrollPaths(args)) {
            final BamInput bi = new BamInput();
            bi.bamPath = bamFile;
            try (SamReader samReader = srf.open(bamFile)) {
                final SAMSequenceDictionary samDict = SequenceDictionaryUtils.extractRequired(samReader.getFileHeader());
                if (!SequenceUtil.areSequenceDictionariesEqual(refDict, samDict)) {
                    throw new JvarkitException.DictionariesAreNotTheSame(refDict, samDict);
                }
                bi.sample = samReader.getFileHeader().getReadGroups().stream().map(V -> V.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bamFile));
                final CoverageFactory covFactory = new CoverageFactory().setMappingQuality(this.minMappingQuality);
                for (final String contig : distinctContigs) {
                    final List<CaptureInterval> contig_intervals = userIntervals.stream().filter(R -> R.getContig().equals(contig)).collect(Collectors.toList());
                    final CoverageFactory.SimpleCoverage coverage = covFactory.getSimpleCoverage(samReader, contig_intervals, null);
                    for (CaptureInterval rgn : contig_intervals) {
                        final double[] array = coverage.getSubCoverage(rgn).scale(this.scaleType, (int) rgn.getPixelWidth());
                        bi.coverages.add(array);
                    }
                }
            }
            this.bamInputs.add(bi);
        }
        if (this.bamInputs.isEmpty()) {
            LOG.error("no bam input");
            return -1;
        }
        if (this.vcfFile != null) {
            vcfReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
            final SAMSequenceDictionary vcfDict = vcfReader.getHeader().getSequenceDictionary();
            if (vcfDict != null)
                SequenceUtil.assertSequenceDictionariesEqual(refDict, vcfDict);
        }
        final XMLOutputFactory xof = XMLOutputFactory.newFactory();
        if (this.outputFile == null) {
            w = xof.createXMLStreamWriter(stdout(), "UTF-8");
        } else {
            fout = Files.newOutputStream(this.outputFile);
            w = xof.createXMLStreamWriter(fout, "UTF-8");
        }
        final Function<List<Point2D.Double>, String> points2str = (L) -> L.stream().map(S -> format(S.getX()) + "," + format(S.getY())).collect(Collectors.joining(" "));
        final Consumer<List<Point2D.Double>> simplifyPoints = (L) -> {
            for (int z = 0; z + 1 < L.size(); ++z) {
                if (L.get(z).getY() == L.get(z + 1).getY()) {
                    L.get(z).x = L.get(z + 1).x;
                    L.remove(z + 1);
                }
            }
        };
        w.writeStartDocument("UTF-8", "1.0");
        final Dimension dim = new Dimension(this.drawinAreaWidth, 0);
        final int bed_header_height = 20;
        dim.height += bed_header_height;
        dim.height += (int) this.bamInputs.stream().mapToDouble(B -> B.getPixelHeight()).sum();
        LOG.debug("drawing area: " + dim);
        w.writeStartElement("svg");
        w.writeAttribute("width", String.valueOf(dim.width));
        w.writeAttribute("height", String.valueOf(dim.height));
        w.writeDefaultNamespace(SVG.NS);
        w.writeNamespace("xlink", XLINK.NS);
        // https://stackoverflow.com/questions/15717970
        w.writeStartElement("style");
        if (this.cssFile != null) {
            w.writeCharacters(IOUtil.slurp(this.cssFile));
        } else {
            w.writeCharacters("g.maing {stroke:black;stroke-width:0.5px;fill:whitesmoke;font-size:10pt;}\n" + "text.sampleLabel {stroke:none;stroke-width:0.5px;fill:blue;}" + "text.captureLabel {stroke:none;stroke-width:0.5px;fill:slategrey;text-anchor:middle;}" + "polygon.area {stroke:darkgray;stroke-width:0.5px;fill:url('#grad01');}" + "line.linedp {stroke:darkcyan;stroke-width:0.3px;opacity:0.4;}" + "text.linedp {fill-opacity:0.6;font-size:7px;stroke:none;stroke-width:0.5px;fill:darkcyan;}" + "rect.sampleFrame { fill:none;stroke:slategray;stroke-width:0.3px;}" + "rect.clickRgn {fill:none;stroke:none;pointer-events:all;}" + "polyline.gc {stroke:lightcoral;stroke-width:0.3px;fill:none;}" + "polyline.clipping {stroke:orange;stroke-width:0.8px;fill:none;}" + "circle.ar {fill:orange;stroke:none;}" + "circle.aa {fill:red;stroke:none;}" + "circle.rr {fill:green;stroke:none;}");
        }
        // style
        w.writeEndElement();
        w.writeStartElement("title");
        w.writeCharacters(this.domSvgTitle);
        w.writeEndElement();
        w.writeStartElement("defs");
        // alleles
        final double genotype_radius = Double.parseDouble(this.dynaParams.getOrDefault("gt.radius", "1.5"));
        w.writeEmptyElement("circle");
        w.writeAttribute("r", format(genotype_radius));
        w.writeAttribute("id", "rr");
        w.writeAttribute("class", "rr");
        w.writeEmptyElement("circle");
        w.writeAttribute("r", format(genotype_radius));
        w.writeAttribute("id", "ar");
        w.writeAttribute("class", "ar");
        w.writeEmptyElement("circle");
        w.writeAttribute("r", format(genotype_radius));
        w.writeAttribute("id", "aa");
        w.writeAttribute("class", "aa");
        // gradient
        w.writeStartElement("linearGradient");
        w.writeAttribute("id", "grad01");
        w.writeAttribute("x1", "50%");
        w.writeAttribute("x2", "50%");
        w.writeAttribute("y1", "0%");
        w.writeAttribute("y2", "100%");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "0%");
        w.writeAttribute("style", "stop-color:lightgray;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "100%");
        w.writeAttribute("style", "stop-color:gray;stop-opacity:1;");
        w.writeEndElement();
        // gc percent
        for (final CaptureInterval ci : userIntervals) {
            final GenomicSequence genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, ci.getContig());
            final int gc_percent_width = (int) ci.getPixelWidth();
            final List<Point2D.Double> points = new ArrayList<>(gc_percent_width);
            for (int x = 0; x < gc_percent_width; ++x) {
                int pos1 = ci.getStart() + (int) (((x + 0) / ci.getPixelWidth()) * ci.getLengthOnReference());
                int pos2 = ci.getStart() + (int) (((x + 1) / ci.getPixelWidth()) * ci.getLengthOnReference());
                double gc_percent = getGcPercent(genomicSequence, pos1, pos2);
                double y = this.sampleTrackHeight - this.sampleTrackHeight * gc_percent;
                points.add(new Point2D.Double(x, y));
            }
            simplifyPoints.accept(points);
            w.writeStartElement("polyline");
            w.writeAttribute("class", "gc");
            w.writeAttribute("id", "z" + ci.getId());
            w.writeAttribute("points", points2str.apply(points));
            w.writeStartElement("title");
            w.writeCharacters("GC %");
            w.writeEndElement();
            w.writeEndElement();
        }
        // defs
        w.writeEndElement();
        w.writeStartElement("script");
        final StringBuilder openBrowserFunction = new StringBuilder("function openGenomeBrowser(contig,chromStart,chromEnd) {\n");
        if (!this.hyperlinkType.isEmpty()) {
            openBrowserFunction.append("var url=\"" + this.hyperlinkType.getPattern() + "\".replace(/__CHROM__/g,contig).replace(/__START__/g,chromStart).replace(/__END__/g,chromEnd);\n");
            openBrowserFunction.append("window.open(url,'_blank');\n");
        } else {
        // nothing
        }
        openBrowserFunction.append("}\n");
        w.writeCData(openBrowserFunction.toString() + "function clicked(evt,contig,chromStart,chromEnd){\n" + "    var e = evt.target;\n" + "    var dim = e.getBoundingClientRect();\n" + "    var x = 1.0 * evt.clientX - dim.left;\n" + "    var cLen = 1.0* (chromEnd - chromStart); if(cLen<1) cLen=1.0;\n" + "    var pos1 = chromStart + parseInt(((x+0)/dim.width)*cLen);\n" + "    var pos2 = chromStart + parseInt(((x+1)/dim.width)*cLen);\n" + "   openGenomeBrowser(contig,pos1,pos2);\n" + "}\n");
        // script
        w.writeEndElement();
        w.writeStartElement("g");
        w.writeAttribute("class", "maing");
        int y = 0;
        w.writeStartElement("g");
        w.writeComment("interval background");
        for (final CaptureInterval ci : userIntervals) {
            w.writeStartElement("text");
            w.writeAttribute("class", "captureLabel");
            w.writeAttribute("textLength", String.valueOf(ci.getPixelWidth() * 0.8));
            w.writeAttribute("lengthAdjust", "spacing");
            w.writeAttribute("x", String.valueOf(ci.getPixelX1() + ci.getPixelWidth() / 2.0));
            w.writeAttribute("y", String.valueOf(bed_header_height - 2));
            w.writeCharacters(ci.getName());
            w.writeStartElement("title");
            w.writeCharacters(ci.toNiceString());
            // title
            w.writeEndElement();
            // text
            w.writeEndElement();
            w.writeStartElement("rect");
            w.writeAttribute("style", "stroke:black;fill:none;");
            w.writeAttribute("x", String.valueOf(ci.getPixelX1()));
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
            w.writeAttribute("height", String.valueOf(dim.height));
            w.writeStartElement("title");
            w.writeCharacters(ci.getName());
            w.writeEndElement();
            w.writeEndElement();
        }
        // interval background
        w.writeEndElement();
        y += bed_header_height;
        for (final BamInput bi : this.bamInputs) {
            w.writeComment(bi.bamPath.toString());
            w.writeStartElement("g");
            w.writeAttribute("transform", "translate(0," + y + ")");
            if (normalize_on_median_flag) {
                final double medianDepth = Math.max(1.0, Percentile.median().evaluate(bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).toArray()).orElse(1.0));
                LOG.info("median" + medianDepth);
                for (final double[] coverage_array : bi.coverages) {
                    for (int px = 0; px < coverage_array.length; px++) {
                        coverage_array[px] /= medianDepth;
                    }
                }
            }
            final double maxDepth = bi.coverages.stream().flatMapToDouble(B -> Arrays.stream(B)).max().orElse(1.0);
            for (int ridx = 0; ridx < userIntervals.size(); ridx++) {
                final CaptureInterval ci = userIntervals.get(ridx);
                final String clickedAttribute = "clicked(evt,\"" + ci.getContig() + "\"," + ci.getStart() + "," + ci.getEnd() + ")";
                final double[] coverage_array = bi.coverages.get(ridx);
                final double leftx = ci.getPixelX1();
                w.writeStartElement("g");
                w.writeAttribute("transform", "translate(" + leftx + ",0)");
                final int segment_width = (int) ci.getPixelWidth();
                // coverage
                {
                    final List<Point2D.Double> points = new ArrayList<>(segment_width);
                    points.add(new Point2D.Double(0, bi.getPixelHeight()));
                    for (int px = 0; px < coverage_array.length; px++) {
                        final double y_avg_cov = coverage_array[px];
                        final double new_y = bi.getPixelHeight() - (y_avg_cov / maxDepth) * bi.getPixelHeight();
                        points.add(new Point2D.Double(px, new_y));
                    }
                    points.add(new Point2D.Double(ci.getPixelWidth(), bi.getPixelHeight()));
                    simplifyPoints.accept(points);
                    // close
                    points.add(new Point2D.Double(leftx, bi.getPixelHeight()));
                    w.writeEmptyElement("polygon");
                    w.writeAttribute("class", "area");
                    w.writeAttribute("title", ci.toNiceString());
                    // w.writeAttribute("onclick", clickedAttribute);
                    w.writeAttribute("points", points2str.apply(points));
                }
                // w.writeEndElement();//g
                int depthshift = 1;
                for (; ; ) {
                    final int numdiv = (int) (maxDepth / depthshift);
                    if (numdiv <= 10)
                        break;
                    depthshift *= 10;
                }
                int depth = depthshift;
                while (depth < maxDepth) {
                    double new_y = bi.getPixelHeight() - (depth / maxDepth) * bi.getPixelHeight();
                    w.writeStartElement("text");
                    w.writeAttribute("class", "linedp");
                    w.writeAttribute("x", "1");
                    w.writeAttribute("y", String.valueOf(new_y + 1));
                    w.writeCharacters(String.valueOf(depth));
                    // text
                    w.writeEndElement();
                    w.writeStartElement("line");
                    w.writeAttribute("class", "linedp");
                    w.writeAttribute("stroke-dasharray", "4");
                    w.writeAttribute("x1", "0");
                    w.writeAttribute("x2", String.valueOf(ci.getPixelWidth()));
                    w.writeAttribute("y1", String.valueOf(new_y));
                    w.writeAttribute("y2", String.valueOf(new_y));
                    w.writeStartElement("title");
                    w.writeCharacters(String.valueOf(depth));
                    w.writeEndElement();
                    // line
                    w.writeEndElement();
                    depth += depthshift;
                }
                // polyline
                w.writeEmptyElement("use");
                w.writeAttribute("xlink", XLINK.NS, "href", "#z" + ci.getId());
                w.writeAttribute("x", "0");
                w.writeAttribute("y", "0");
                // click
                w.writeStartElement("rect");
                w.writeAttribute("class", "clickRgn");
                w.writeAttribute("onclick", clickedAttribute);
                w.writeAttribute("x", "0");
                w.writeAttribute("y", "0");
                w.writeAttribute("width", String.valueOf(ci.getPixelWidth()));
                w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
                w.writeEndElement();
                // genotype
                if (vcfReader != null) {
                    try (CloseableIterator<VariantContext> iter = vcfReader.query(ci)) {
                        while (iter.hasNext()) {
                            final VariantContext ctx = iter.next();
                            final Genotype gt = ctx.getGenotype(bi.sample);
                            if (gt == null)
                                break;
                            String allele_id = null;
                            switch(gt.getType()) {
                                case HET:
                                    allele_id = "ar";
                                    break;
                                case HOM_REF:
                                    allele_id = "rr";
                                    break;
                                case HOM_VAR:
                                    allele_id = "aa";
                                    break;
                                default:
                                    allele_id = null;
                                    break;
                            }
                            if (allele_id != null) {
                                w.writeEmptyElement("use");
                                w.writeAttribute("xlink", XLINK.NS, "href", "#" + allele_id);
                                w.writeAttribute("x", format(((ctx.getStart() - ci.getStart()) / (double) ci.getLengthOnReference()) * ci.getPixelWidth()));
                                w.writeAttribute("y", format(bi.getPixelHeight() - 2 * genotype_radius));
                            }
                        }
                    }
                }
                // g
                w.writeEndElement();
            }
            // frame for this sample
            w.writeStartElement("rect");
            w.writeAttribute("class", "sampleFrame");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(dim.width));
            w.writeAttribute("height", String.valueOf(bi.getPixelHeight()));
            // rect
            w.writeEndElement();
            w.writeStartElement("text");
            w.writeAttribute("class", "sampleLabel");
            w.writeAttribute("x", "5");
            w.writeAttribute("y", "12");
            w.writeStartElement("title");
            w.writeCharacters(bi.bamPath.toString());
            w.writeEndElement();
            w.writeCharacters(bi.sample);
            // text
            w.writeEndElement();
            // g
            w.writeEndElement();
            y += bi.getPixelHeight();
        }
        w.writeStartElement("g");
        w.writeComment("interval lines");
        for (int n = 0; n <= userIntervals.size(); n++) {
            w.writeEmptyElement("line");
            String x1 = n < userIntervals.size() ? String.valueOf(userIntervals.get(n).getPixelX1()) : String.valueOf(dim.width);
            w.writeAttribute("x1", x1);
            w.writeAttribute("y1", "0");
            w.writeAttribute("x2", x1);
            w.writeAttribute("y2", String.valueOf(dim.height));
        }
        // interval lines
        w.writeEndElement();
        // g
        w.writeEndElement();
        // svg
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        w.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcfReader);
        CloserUtil.close(w);
        CloserUtil.close(fout);
        CloserUtil.close(r);
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(this.bamInputs);
    }
}
Also used : Arrays(java.util.Arrays) XLINK(com.github.lindenb.jvarkit.util.ns.XLINK) Point2D(java.awt.geom.Point2D) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SVG(com.github.lindenb.jvarkit.util.svg.SVG) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) Dimension(java.awt.Dimension) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Genotype(htsjdk.variant.variantcontext.Genotype) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) HtsCollectors(com.github.lindenb.jvarkit.stream.HtsCollectors) HashMap(java.util.HashMap) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) SamReader(htsjdk.samtools.SamReader) File(java.io.File) Consumer(java.util.function.Consumer) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) SamReader(htsjdk.samtools.SamReader) Point2D(java.awt.geom.Point2D) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) VCFReader(htsjdk.variant.vcf.VCFReader) IntervalExtender(com.github.lindenb.jvarkit.samtools.util.IntervalExtender) List(java.util.List) ArrayList(java.util.ArrayList) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Path(java.nio.file.Path) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Genotype(htsjdk.variant.variantcontext.Genotype) Dimension(java.awt.Dimension) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BufferedReader(java.io.BufferedReader)

Example 78 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class Biostar404363 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader in = null;
    SAMFileWriter out = null;
    final IntervalTreeMap<PosToChange> intervalTreeMap = new IntervalTreeMap<>();
    try {
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.faidx != null) {
            srf.referenceSequence(this.faidx);
            writingBamArgs.setReferencePath(this.faidx);
        }
        final String input = oneFileOrNull(args);
        if (input == null) {
            in = srf.open(SamInputResource.of(stdin()));
        } else {
            in = srf.open(SamInputResource.of(input));
        }
        final SAMFileHeader header = in.getFileHeader();
        final SAMProgramRecord prg = header.createProgramRecord();
        prg.setCommandLine(this.getProgramCommandLine());
        prg.setProgramName(this.getProgramName());
        prg.setProgramVersion(this.getGitHash());
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
        try (VCFReader vcfReader = VCFReaderFactory.makeDefault().open(this.vcfPath, false)) {
            try (CloseableIterator<VariantContext> iter = vcfReader.iterator()) {
                while (iter.hasNext()) {
                    final VariantContext ctx = iter.next();
                    final List<Allele> alts = ctx.getAlternateAlleles();
                    if (alts.size() != 1) {
                        LOG.error("Expected one ALT allele in " + ctx);
                        return -1;
                    }
                    final Allele alt = alts.get(0);
                    if (alt.isSymbolic() || alt.length() != 1 || !AcidNucleics.isATGCN(alt)) {
                        LOG.error("Bad ALT allele in " + ctx);
                        return -1;
                    }
                    final String theContig = contigNameConverter.apply(ctx.getContig());
                    if (StringUtils.isBlank(theContig))
                        throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
                    final PosToChange pos2change = new PosToChange(theContig, ctx.getStart());
                    pos2change.base = (byte) Character.toUpperCase(alt.getBases()[0]);
                    if (ctx.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
                        pos2change.proba = ctx.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 1.0);
                    }
                    final Interval key = new Interval(pos2change);
                    if (intervalTreeMap.containsKey(key)) {
                        LOG.error("Duplicate key " + key + " for " + ctx);
                        return -1;
                    }
                    intervalTreeMap.put(key, pos2change);
                }
            }
        }
        JVarkitVersion.getInstance().addMetaData(this, header);
        out = this.writingBamArgs.openSamWriter(this.outputFile, header, true);
        final SAMRecordIterator iter = in.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            final byte[] bases = rec.getReadBases();
            if (rec.getReadUnmappedFlag() || bases == SAMRecord.NULL_SEQUENCE) {
                out.addAlignment(rec);
                continue;
            }
            final List<PosToChange> changes = intervalTreeMap.getOverlapping(new SimpleInterval(rec.getContig(), rec.getUnclippedStart(), rec.getUnclippedEnd())).stream().collect(Collectors.toList());
            if (changes.isEmpty()) {
                out.addAlignment(rec);
                continue;
            }
            int NM = 0;
            if (rec.hasAttribute("NM"))
                NM = rec.getIntegerAttribute("NM");
            int readpos = 0;
            int ref = rec.getUnclippedStart();
            boolean changed = false;
            final Cigar cigar = rec.getCigar();
            for (final CigarElement ce : cigar) {
                final CigarOperator op = ce.getOperator();
                if ((op.equals(CigarOperator.S) && !this.ignore_clip) || (op.consumesReadBases() && op.consumesReferenceBases())) {
                    for (int i = 0; i < ce.getLength(); i++) {
                        final int the_pos = ref + i;
                        final byte the_base = bases[readpos + i];
                        final PosToChange pos2change = changes.stream().filter(P -> P.getPosition() == the_pos).filter(P -> P.base != the_base).filter(P -> Math.random() <= P.proba).findFirst().orElse(null);
                        if (pos2change == null)
                            continue;
                        bases[readpos + i] = pos2change.base;
                        if (op.isAlignment())
                            NM++;
                        changed = true;
                    }
                }
                if (op.equals(CigarOperator.S) || op.consumesReadBases()) {
                    readpos += ce.getLength();
                }
                if (op.isClipping() || op.consumesReferenceBases()) {
                    ref += ce.getLength();
                }
            }
            if (changed) {
                rec.setReadBases(bases);
                if (!keep_original_nm)
                    rec.setAttribute("NM", NM);
                rec.setAttribute("PG", prg.getId());
            }
            out.addAlignment(rec);
        }
        in.close();
        in = null;
        out.close();
        out = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(out);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) Interval(htsjdk.samtools.util.Interval) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) SimplePosition(com.github.lindenb.jvarkit.samtools.util.SimplePosition) List(java.util.List) SamInputResource(htsjdk.samtools.SamInputResource) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) VCFReader(htsjdk.variant.vcf.VCFReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Example 79 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class Biostar322664 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.use_bam_index && !this.input_is_not_sorted_on_queryname) {
        LOG.error("Cannot use option -index without option -nm");
        return -1;
    }
    if (this.both_pair_mode && this.input_is_not_sorted_on_queryname) {
        LOG.error("Cannot use option -and without sorting on queryname");
        return -1;
    }
    /* will be used as a SAMRecord attribute 'X'+meta_char containing the data about the variants */
    final char meta_char;
    if (this.meta_tag_str != null) {
        if (this.meta_tag_str.length() != 1) {
            LOG.error("Meta tag should be only one character, got " + meta_tag_str);
            return -1;
        }
        meta_char = this.meta_tag_str.charAt(0);
    } else {
        meta_char = '\0';
    }
    if (this.output_all && meta_char == '\0') {
        LOG.error("Cannot output all if not meta char -X defined");
        return -1;
    }
    final IntervalTreeMap<VariantContext> variantMap = new IntervalTreeMap<>();
    VCFReader vcfFileReader = null;
    CloseableIterator<VariantContext> viter = null;
    SamReader samReader = null;
    SAMFileWriter samFileWriter = null;
    try {
        LOG.info("reading VCF " + this.vcfFile + " in memory...");
        vcfFileReader = VCFReaderFactory.makeDefault().open(this.vcfFile, false);
        viter = vcfFileReader.iterator();
        viter.stream().filter(V -> V.isVariant() && V.getReference().length() == 1 && V.getAlternateAlleles().stream().anyMatch(A -> A.length() == 1)).map(V -> new VariantContextBuilder(V).attributes(Collections.emptyMap()).noID().unfiltered().noGenotypes().make()).forEach(V -> variantMap.put(new Interval(V.getContig(), V.getStart(), V.getEnd()), V));
        CloserUtil.close(viter);
        viter = null;
        vcfFileReader.close();
        vcfFileReader = null;
        LOG.info("Done Reading: " + this.vcfFile);
        samReader = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = samReader.getFileHeader();
        if (!this.input_is_not_sorted_on_queryname && header.getSortOrder() != SAMFileHeader.SortOrder.queryname) {
            LOG.error("Expected SAM input to be sorted on " + SAMFileHeader.SortOrder.queryname + " but got " + header.getSortOrder() + " See the options to work without 'mate'.");
            return -1;
        }
        samFileWriter = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        final CloseableIterator<SAMRecord> iter;
        if (!use_bam_index) {
            iter = samReader.iterator();
        } else {
            final SAMSequenceDictionary dict = header.getSequenceDictionary();
            if (dict == null)
                throw new JvarkitException.BamDictionaryMissing("input");
            final List<QueryInterval> intervals = new ArrayList<>();
            for (final VariantContext ctx : variantMap.values()) {
                int tid = dict.getSequenceIndex(ctx.getContig());
                if (tid < 0)
                    throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
                intervals.add(new QueryInterval(tid, ctx.getStart(), ctx.getEnd()));
            }
            QueryInterval[] intervalArray = intervals.toArray(new QueryInterval[intervals.size()]);
            intervalArray = QueryInterval.optimizeIntervals(intervalArray);
            iter = samReader.query(intervalArray, false);
        }
        Iterator<List<SAMRecord>> eq_range = null;
        if (this.input_is_not_sorted_on_queryname) {
            eq_range = new AbstractIterator<List<SAMRecord>>() {

                @Override
                protected List<SAMRecord> advance() {
                    return iter.hasNext() ? Collections.singletonList(iter.next()) : null;
                }
            };
        } else {
            eq_range = new EqualRangeIterator<>(iter, (R1, R2) -> R1.getReadName().compareTo(R2.getReadName()));
        }
        while (eq_range.hasNext()) {
            final List<SAMRecord> array = eq_range.next();
            final boolean[] carry_variant = new boolean[array.size()];
            Arrays.fill(carry_variant, false);
            for (int record_index = 0; record_index < array.size(); ++record_index) {
                final SAMRecord rec = array.get(record_index);
                // if(debug==true) LOG.info("got read! "+rec+" array.size="+array.size());
                if (rec.getReadUnmappedFlag()) {
                    continue;
                }
                final Set<String> meta_attribute = (meta_char == '\0' ? null : new HashSet<>());
                final Collection<VariantContext> variants = variantMap.getOverlapping(rec);
                if (variants.isEmpty()) {
                    continue;
                }
                final Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.isEmpty())
                    continue;
                final byte[] bases = rec.getReadBases();
                if (bases == null || bases.length == 0 || bases == SAMRecord.NULL_SEQUENCE)
                    continue;
                for (final VariantContext ctx : variants) {
                    if (ctx.getReference().length() != 1)
                        continue;
                    for (final Allele alt : ctx.getAlternateAlleles()) {
                        if (alt.isNoCall() || alt.isSymbolic())
                            continue;
                        if (alt.length() != 1)
                            continue;
                        final char altbase = (char) Character.toUpperCase(alt.getBases()[0]);
                        int ref1 = rec.getUnclippedStart();
                        int readpos = 0;
                        for (final CigarElement ce : cigar) {
                            if (ref1 > ctx.getEnd()) {
                                break;
                            }
                            final CigarOperator op = ce.getOperator();
                            switch(op) {
                                case P:
                                    break;
                                case S:
                                    {
                                        readpos += ce.getLength();
                                        ref1 += ce.getLength();
                                        break;
                                    }
                                case H:
                                    {
                                        ref1 += ce.getLength();
                                        break;
                                    }
                                case I:
                                    readpos += ce.getLength();
                                    break;
                                case D:
                                case N:
                                    ref1 += ce.getLength();
                                    break;
                                case M:
                                case EQ:
                                case X:
                                    {
                                        for (int x = 0; x < ce.getLength(); ++x) {
                                            final int rp2 = readpos + x;
                                            if (rp2 >= 0 && rp2 < bases.length && ref1 + x == ctx.getStart()) {
                                                final char readbase = Character.toUpperCase((char) bases[rp2]);
                                                if (readbase == altbase) {
                                                    // if(debug) LOG.debug(rec.getReadName()+" read["+rp2+"]="+readbase+" ref["+ctx.getStart()+"]="+altbase);
                                                    carry_variant[record_index] = true;
                                                    if (meta_attribute != null) {
                                                        meta_attribute.add(ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString() + "|" + ctx.getAlternateAlleles().stream().filter(A -> A.length() == 1).map(A -> A.getDisplayString()).collect(Collectors.joining(",")));
                                                    }
                                                    break;
                                                }
                                            }
                                        }
                                        readpos += ce.getLength();
                                        ref1 += ce.getLength();
                                        break;
                                    }
                                default:
                                    throw new IllegalStateException("bad cigar operator " + op);
                            }
                        }
                    }
                    // end of cigar loop
                    if (meta_attribute != null && !meta_attribute.isEmpty()) {
                        rec.setAttribute("X" + meta_char, String.join(";", meta_attribute));
                    }
                }
            // end of loop over variants
            }
            // end of for(Samrecord in array)
            boolean any_match = false;
            for (boolean m : carry_variant) {
                if (m) {
                    any_match = true;
                    break;
                }
            }
            if (any_match && this.both_pair_mode) {
                // reset
                any_match = false;
                for (int x = 0; x + 1 < array.size() && !any_match; ++x) {
                    if (!carry_variant[x])
                        continue;
                    final SAMRecord r1 = array.get(x);
                    if (!r1.getReadPairedFlag())
                        continue;
                    for (int y = x + 1; y < array.size() && !any_match; ++y) {
                        if (!carry_variant[y])
                            continue;
                        // check r2 is mate of r1
                        final SAMRecord r2 = array.get(x);
                        if (!r2.getReadPairedFlag())
                            continue;
                        if (r1.getFirstOfPairFlag() && r2.getFirstOfPairFlag())
                            continue;
                        if (!r1.getFirstOfPairFlag() && !r2.getFirstOfPairFlag())
                            continue;
                        if (r1.getAlignmentStart() != r2.getMateAlignmentStart())
                            continue;
                        if (r2.getAlignmentStart() != r1.getMateAlignmentStart())
                            continue;
                        if (!r1.getReferenceName().equals(r2.getMateReferenceName()))
                            continue;
                        any_match = true;
                    }
                }
            }
            if (any_match || this.output_all) {
                for (final SAMRecord rec : array) samFileWriter.addAlignment(rec);
            }
        }
        CloserUtil.close(eq_range);
        iter.close();
        samFileWriter.close();
        samFileWriter = null;
        samReader.close();
        samReader = null;
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(samReader);
        CloserUtil.close(samFileWriter);
        CloserUtil.close(vcfFileReader);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) AbstractIterator(htsjdk.samtools.util.AbstractIterator) Iterator(java.util.Iterator) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Collection(java.util.Collection) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) IOException(java.io.IOException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) QueryInterval(htsjdk.samtools.QueryInterval) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VCFReader(htsjdk.variant.vcf.VCFReader) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet) SAMFileWriter(htsjdk.samtools.SAMFileWriter) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval)

Example 80 with CloseableIterator

use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.

the class VcfGnomadExomeVsGenome method isVariantIn.

boolean isVariantIn(final VariantContext ctx, final BufferedVCFReader reader) {
    final String ctg = this.ctgNameConverter.apply(ctx.getContig());
    if (StringUtils.isBlank(ctg))
        return false;
    try (CloseableIterator<VariantContext> iterE = reader.query(new SimpleInterval(ctg, ctx.getStart(), ctx.getEnd()))) {
        while (iterE.hasNext()) {
            final VariantContext ctx2 = iterE.next();
            if (ctx.getStart() != ctx2.getStart())
                continue;
            if (!ctx.getReference().equals(ctx2.getReference()))
                continue;
            final Set<Allele> alt2 = ctx2.getAlternateAlleles().stream().filter(A -> AcidNucleics.isATGC(A)).collect(Collectors.toSet());
            if (ctx.getAlternateAlleles().stream().anyMatch(A -> alt2.contains(A)))
                return true;
        }
    }
    return false;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) BufferedVCFReader(com.github.lindenb.jvarkit.variant.vcf.BufferedVCFReader) UnaryOperator(java.util.function.UnaryOperator) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Allele(htsjdk.variant.variantcontext.Allele) VariantContext(htsjdk.variant.variantcontext.VariantContext) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval)

Aggregations

CloseableIterator (htsjdk.samtools.util.CloseableIterator)103 List (java.util.List)86 Logger (com.github.lindenb.jvarkit.util.log.Logger)85 Parameter (com.beust.jcommander.Parameter)82 Program (com.github.lindenb.jvarkit.util.jcommander.Program)78 ArrayList (java.util.ArrayList)73 Collectors (java.util.stream.Collectors)71 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)69 Path (java.nio.file.Path)69 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)66 CloserUtil (htsjdk.samtools.util.CloserUtil)64 Set (java.util.Set)64 VCFHeader (htsjdk.variant.vcf.VCFHeader)59 VariantContext (htsjdk.variant.variantcontext.VariantContext)54 IOException (java.io.IOException)53 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)51 SAMRecord (htsjdk.samtools.SAMRecord)51 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)51 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)50 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)49