Search in sources :

Example 1 with GtfReader

use of com.github.lindenb.jvarkit.util.bio.structure.GtfReader in project jvarkit by lindenb.

the class VcfToSvg method doWork.

@Override
public int doWork(final List<String> args) {
    VCFReader r = null;
    OutputStream outputStream = null;
    XMLStreamWriter w = null;
    PrintWriter manifestW = null;
    ArchiveFactory archiveFactory = null;
    try {
        r = VCFReaderFactory.makeDefault().open(Paths.get(oneAndOnlyOneFile(args)), true);
        final VCFHeader header = r.getHeader();
        final List<String> samples = new ArrayList<>(header.getSampleNamesInOrder());
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
        intervalListProvider.dictionary(dict);
        /* read gtf if any */
        final IntervalTreeMap<Gene> geneMap = new IntervalTreeMap<>();
        if (this.gtfPath != null) {
            try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
                gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
                gtfReader.getAllGenes().stream().filter(G -> !this.removeNonCoding || G.getTranscripts().stream().anyMatch(T -> T.isCoding())).forEach(G -> geneMap.put(new Interval(G), G));
            }
        }
        archiveFactory = ArchiveFactory.open(this.outputPath);
        if (manifestFile != null) {
            manifestW = IOUtils.openPathForPrintWriter(this.manifestFile);
        } else {
            manifestW = new PrintWriter(new NullOuputStream());
        }
        final Pedigree pedigree;
        if (this.pedPath == null) {
            pedigree = PedigreeParser.empty();
        } else {
            pedigree = new PedigreeParser().parse(this.pedPath);
        }
        final Path tmpSvg = Files.createTempFile("vcf.", ".svg");
        final XMLOutputFactory xof = XMLOutputFactory.newInstance();
        for (final Locatable interval : intervalListProvider.dictionary(dict).stream().collect(Collectors.toList())) {
            final List<VariantContext> variants = r.query(interval).stream().filter(V -> this.variantFILTEREDOpacity > 0 || !V.isFiltered()).filter(V -> this.variantIndelOpacity > 0 || !V.isIndel()).collect(Collectors.toCollection(ArrayList::new));
            if (variants.isEmpty())
                continue;
            final List<Transcript> transcripts = geneMap.getOverlapping(interval).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> !this.removeNonCoding || T.isCoding()).collect(Collectors.toList());
            variants.removeIf(V -> this.gtfPath != null && this.variantsInExonOnly && transcripts.stream().flatMap(T -> T.getExons().stream()).noneMatch(EX -> EX.overlaps(V)));
            if (variants.isEmpty())
                continue;
            final String geneId = transcripts.stream().map(T -> T.getGene().getId()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
            final String geneName = transcripts.stream().map(T -> T.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(HtsCollectors.oneAndOnlyOne()).orElse(null);
            outputStream = IOUtils.openPathForWriting(tmpSvg);
            w = xof.createXMLStreamWriter(outputStream);
            double featureHeight = 10;
            double TRANSCRIPT_HEIGHT = featureHeight;
            final int all_genotypes_width = variants.size() * this.genotype_width;
            final int drawinAreaWidth = Math.max(all_genotypes_width, 1000);
            final int interline_weight = 6;
            final int margin_top = 10;
            final int margin_bottom = 10;
            final int margin_right = 100;
            final int margin_left = 100;
            w.writeStartDocument("UTF-8", "1.0");
            w.writeStartElement("svg");
            w.writeDefaultNamespace(SVG.NS);
            w.writeNamespace("xlink", XLINK.NS);
            w.writeAttribute("version", "1.1");
            w.writeAttribute("width", String.valueOf(margin_right + margin_right + drawinAreaWidth));
            w.writeAttribute("height", String.valueOf(margin_top + margin_bottom + transcripts.size() * TRANSCRIPT_HEIGHT + interline_weight * featureHeight + samples.size() * this.genotype_width));
            title(w, interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
            w.writeStartElement("desc");
            w.writeCharacters("generated with " + getProgramName() + "\n" + "Author: Pierre Lindenbaum PhD. @yokofakun .");
            w.writeEndElement();
            // defs
            w.writeStartElement("defs");
            // genotypes
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.HOM_REF);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:lime;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.NO_CALL);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:silver;stroke;gray;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.HOM_VAR);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:crimson;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.MIXED);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:pink;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.UNAVAILABLE);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:gray;stroke;none;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(this.genotype_width));
            w.writeAttribute("height", String.valueOf(this.genotype_width));
            w.writeEndElement();
            w.writeStartElement("g");
            // 
            w.writeAttribute("id", "g_" + GenotypeType.HET);
            w.writeEmptyElement("rect");
            w.writeAttribute("style", "fill:lime;stroke;black;");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", "0");
            w.writeAttribute("width", String.valueOf(genotype_width));
            w.writeAttribute("height", String.valueOf(genotype_width));
            w.writeEmptyElement("polygon");
            w.writeAttribute("style", "fill:crimson;stroke;black;");
            w.writeAttribute("points", "0,0 " + genotype_width + ",0 0," + genotype_width + " 0,0");
            w.writeEndElement();
            // strand
            w.writeEmptyElement("polyline");
            w.writeAttribute("id", "strandF");
            w.writeAttribute("points", "-5,-5 0,0 -5,5");
            w.writeEmptyElement("polyline");
            w.writeAttribute("id", "strandR");
            w.writeAttribute("points", "5,-5 0,0 5,5");
            // gradients
            w.writeStartElement("linearGradient");
            w.writeAttribute("id", "grad01");
            w.writeAttribute("x1", "50%");
            w.writeAttribute("x2", "50%");
            w.writeAttribute("y1", "0%");
            w.writeAttribute("y2", "100%");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "0%");
            w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "50%");
            w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "100%");
            w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
            w.writeEndElement();
            // defs
            w.writeEndElement();
            w.writeStartElement("style");
            w.writeCharacters("svg {fill:none; stroke:black;}\n" + "text {fill:black;stroke:none;font-size:" + (featureHeight / 1.5) + "px;}\n" + ".ruler-label { stroke:red;}\n" + ".frame { stroke:black;fill:none;}\n" + ".kgexon {fill:url(#grad01);stroke:black;}\n" + ".gcpercent {fill:url(#grad02);stroke:black;}" + ".coverage {fill:url(#grad03);stroke:black;}" + ".kgcds {fill:yellow;stroke:black;opacity:0.7;}\n" + ".variant{stroke:none;fill:red;opacity:0.2;}\n" + ".xaxis{stroke:gray;fill:none;opacity:0.2;}\n" + ".postick{font-size:9px;stroke:black;stroke-width:1;}");
            // style
            w.writeEndElement();
            final IntFunction<Integer> trim = t -> Math.max(interval.getStart(), Math.min(interval.getEnd(), t));
            final IntFunction<Double> baseToPixel = t -> margin_left + drawinAreaWidth * (t - (double) interval.getStart()) / ((double) interval.getLengthOnReference());
            final IntFunction<Double> variantIndexToPixel = idx -> {
                final double variant_width = drawinAreaWidth / (double) variants.size();
                final double midx = variant_width * idx + variant_width / 2.0;
                return margin_left + midx - genotype_width / 2.0;
            };
            final Function<VariantContext, String> variantTitle = V -> (V.getContig().startsWith("chr") ? V.getContig().substring(3) : V.getContig()) + ":" + V.getStart() + " " + V.getReference().getDisplayString();
            /**
             * title
             */
            double y = 0;
            w.writeStartElement("text");
            w.writeAttribute("x", "0");
            w.writeAttribute("y", String.valueOf(featureHeight));
            w.writeCharacters(interval.toString());
            w.writeEndElement();
            y += featureHeight;
            for (final Transcript g : transcripts) {
                int cdsHeigh = 5;
                double exonHeight = TRANSCRIPT_HEIGHT - 5;
                double midY = TRANSCRIPT_HEIGHT / 2;
                w.writeStartElement("g");
                w.writeAttribute("transform", "translate(0," + y + ")");
                title(w, g.getId());
                w.writeStartElement("text");
                w.writeAttribute("x", String.valueOf(margin_left - 10));
                w.writeAttribute("y", String.valueOf(featureHeight));
                w.writeAttribute("style", "text-anchor:end;");
                w.writeCharacters(g.getId());
                w.writeEndElement();
                /* transcript line */
                w.writeEmptyElement("line");
                w.writeAttribute("class", "kgtr");
                w.writeAttribute("x1", String.valueOf(baseToPixel.apply(trim.apply(g.getTxStart()))));
                w.writeAttribute("y1", String.valueOf(midY));
                w.writeAttribute("x2", String.valueOf(baseToPixel.apply(trim.apply(g.getTxEnd()))));
                w.writeAttribute("y2", String.valueOf(midY));
                /* strand symbols */
                for (double pixX = 0; pixX < drawinAreaWidth; pixX += 30) {
                    double pos0 = interval.getStart() + (pixX / (double) drawinAreaWidth) * interval.getLengthOnReference();
                    if (pos0 + 1 < g.getTxStart())
                        continue;
                    if (pos0 > g.getTxEnd())
                        break;
                    w.writeEmptyElement("use");
                    w.writeAttribute("class", "kgstrand");
                    w.writeAttribute("xlink", XLINK.NS, "href", "#strand" + (g.isPositiveStrand() ? "F" : "R"));
                    w.writeAttribute("x", String.valueOf(margin_left + pixX));
                    w.writeAttribute("y", String.valueOf(midY));
                }
                /* exons */
                for (final Exon exon : g.getExons()) {
                    if (exon.getStart() + 1 >= interval.getEnd())
                        continue;
                    if (exon.getEnd() <= interval.getStart())
                        continue;
                    w.writeStartElement("rect");
                    w.writeAttribute("class", "kgexon");
                    w.writeAttribute("x", String.valueOf(baseToPixel.apply(trim.apply(exon.getStart()))));
                    w.writeAttribute("y", String.valueOf(midY - exonHeight / 2));
                    w.writeAttribute("width", String.valueOf(baseToPixel.apply(trim.apply(exon.getEnd())) - baseToPixel.apply((trim.apply(exon.getStart())))));
                    w.writeAttribute("height", String.valueOf(exonHeight));
                    title(w, exon.getName());
                    w.writeEndElement();
                }
                /* coding line */
                if (!g.isNonCoding() && g.hasCodonStartDefined() && g.hasCodonStopDefined()) {
                    final double codonx1 = baseToPixel.apply(trim.apply(g.getLeftmostCodon().get().getStart()));
                    final double codonx2 = baseToPixel.apply(trim.apply(g.getRightmostCodon().get().getEnd()));
                    w.writeEmptyElement("rect");
                    w.writeAttribute("class", "kgcds");
                    w.writeAttribute("x", String.valueOf(codonx1));
                    w.writeAttribute("y", String.valueOf(midY - cdsHeigh / 4.0));
                    w.writeAttribute("width", String.valueOf(baseToPixel.apply((int) (codonx2 - codonx1))));
                    w.writeAttribute("height", String.valueOf(cdsHeigh / 2.0));
                }
                // String label=String.format("%15s", g.getName());
                // w.writeEmptyElement("path");
                // double fontHeight=Math.min(10,0.8*TRANSCRIPT_HEIGHT);
                // w.writeAttribute("d",this.hershey.svgPath(label,-insets.left,midY-fontHeight/2,insets.left*0.9,fontHeight));
                w.writeEndElement();
                w.writeCharacters("\n");
                y += featureHeight;
            }
            /* draw lines to variants */
            for (int vidx = 0; vidx < variants.size(); ++vidx) {
                final VariantContext vc = variants.get(vidx);
                double x1 = baseToPixel.apply(vc.getStart());
                double x2 = baseToPixel.apply(vc.getEnd());
                final double y2 = y + featureHeight * interline_weight;
                w.writeStartElement("polygon");
                w.writeAttribute("style", "fill:" + (vidx % 2 == 0 ? "ghostwhite" : "lavender") + ";stroke:black;opacity:0.6;stroke-width:0.5;");
                w.writeAttribute("points", "" + x1 + "," + (y - featureHeight / 2.0) + " " + x2 + "," + (y - featureHeight / 2.0) + " " + variantIndexToPixel.apply(vidx) + "," + y2 + " " + (variantIndexToPixel.apply(vidx) + this.genotype_width) + "," + y2);
                title(w, variantTitle.apply(vc));
                w.writeEndElement();
            }
            for (int vidx = 0; vidx < variants.size(); ++vidx) {
                final VariantContext vc = variants.get(vidx);
                final double y2 = y + featureHeight * interline_weight;
                w.writeStartElement("text");
                w.writeAttribute("transform", "translate(" + (String.valueOf(variantIndexToPixel.apply(vidx) + genotype_width / 2.0)) + "," + String.valueOf(y2 - 5) + ") " + "rotate(-45)");
                w.writeAttribute("x", "0");
                w.writeAttribute("y", "0");
                w.writeAttribute("class", "postick");
                w.writeCharacters(variantTitle.apply(vc));
                w.writeEndElement();
                w.writeCharacters("\n");
            }
            y += featureHeight * interline_weight;
            w.writeStartElement("g");
            /* step 0: affected, 1: unaffected, 2: others */
            for (int step = 0; step < 3; ++step) {
                for (final String sample : samples) {
                    final Sample individual = pedigree.getSampleById(sample);
                    if (step == 0 && (individual == null || !individual.isAffected()))
                        continue;
                    if (step == 1 && (individual == null || !individual.isUnaffected()))
                        continue;
                    if (step == 2 && individual != null && individual.isStatusSet())
                        continue;
                    // sample
                    w.writeStartElement("g");
                    switch(step) {
                        case 0:
                            w.writeAttribute("style", "hue-rotate(195deg);");
                            break;
                        case 1:
                            w.writeAttribute("style", "hue-rotate(45deg);");
                            break;
                        default:
                            break;
                    }
                    for (int vidx = 0; vidx < variants.size(); ++vidx) {
                        final VariantContext vc = variants.get(vidx);
                        final Genotype g = vc.getGenotype(sample);
                        double opacity = 1.0;
                        if (vc.isIndel())
                            opacity *= this.variantIndelOpacity;
                        if (vc.isFiltered())
                            opacity *= this.variantFILTEREDOpacity;
                        if (opacity > 1)
                            opacity = 1;
                        if (opacity <= 0)
                            continue;
                        if (opacity < 1) {
                            w.writeStartElement("g");
                            w.writeAttribute("style", "opacity:" + opacity + ";");
                        }
                        w.writeEmptyElement("use");
                        w.writeAttribute("x", String.valueOf(variantIndexToPixel.apply(vidx)));
                        w.writeAttribute("y", String.valueOf(y));
                        w.writeAttribute("xlink", XLINK.NS, "href", "#g_" + g.getType());
                        if (opacity < 1) {
                            w.writeEndElement();
                        }
                    }
                    w.writeCharacters("\n");
                    w.writeStartElement("text");
                    w.writeAttribute("x", String.valueOf(margin_left - 10));
                    w.writeAttribute("y", String.valueOf(y + this.genotype_width / 2.0));
                    w.writeAttribute("style", "text-anchor:end;");
                    w.writeCharacters(sample);
                    // text
                    w.writeEndElement();
                    // g for sample
                    w.writeEndElement();
                    y += this.genotype_width;
                }
            }
            w.writeCharacters("\n");
            w.writeEndDocument();
            w.writeCharacters("\n");
            w.flush();
            w.close();
            final String md5 = StringUtils.md5(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd());
            final String filename = md5.substring(0, 2) + File.separatorChar + md5.substring(2) + File.separator + interval.getContig().replaceAll("[/\\:]", "_") + "_" + interval.getStart() + "_" + interval.getEnd() + (StringUtils.isBlank(geneName) ? "" : "." + geneName.replaceAll("[/\\:]", "")) + (StringUtils.isBlank(geneId) ? "" : "." + geneId.replaceAll("[/\\:]", "")) + ".svg";
            OutputStream os = archiveFactory.openOuputStream(filename);
            IOUtils.copyTo(tmpSvg, os);
            os.flush();
            os.close();
            Files.delete(tmpSvg);
            manifestW.print(interval.getContig());
            manifestW.print('\t');
            manifestW.print(interval.getStart() - 1);
            manifestW.print('\t');
            manifestW.print(interval.getEnd());
            manifestW.print('\t');
            manifestW.print(transcripts.stream().map(G -> G.getGene().getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
            manifestW.print('\t');
            manifestW.print(transcripts.stream().map(G -> G.getGene().getGeneName()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
            manifestW.print('\t');
            manifestW.print(transcripts.stream().map(G -> G.getId()).collect(Collectors.toSet()).stream().collect(Collectors.joining(";")));
            manifestW.print('\t');
            manifestW.print((archiveFactory.isTarOrZipArchive() ? "" : this.outputPath.toString() + File.separator) + filename);
            manifestW.print('\t');
            manifestW.println(variants.size());
        }
        r.close();
        manifestW.flush();
        manifestW.close();
        manifestW = null;
        archiveFactory.close();
        archiveFactory = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(archiveFactory);
        CloserUtil.close(r);
        CloserUtil.close(outputStream);
        CloserUtil.close(manifestW);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) XLINK(com.github.lindenb.jvarkit.util.ns.XLINK) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HtsCollectors(com.github.lindenb.jvarkit.stream.HtsCollectors) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) Function(java.util.function.Function) SVG(com.github.lindenb.jvarkit.util.svg.SVG) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) XMLStreamException(javax.xml.stream.XMLStreamException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Path(java.nio.file.Path) IntFunction(java.util.function.IntFunction) CloserUtil(htsjdk.samtools.util.CloserUtil) OutputStream(java.io.OutputStream) PrintWriter(java.io.PrintWriter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Files(java.nio.file.Files) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) VCFReader(htsjdk.variant.vcf.VCFReader) Collectors(java.util.stream.Collectors) File(java.io.File) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) List(java.util.List) Paths(java.nio.file.Paths) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) Sample(com.github.lindenb.jvarkit.pedigree.Sample) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) VCFReader(htsjdk.variant.vcf.VCFReader) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Genotype(htsjdk.variant.variantcontext.Genotype) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval) Locatable(htsjdk.samtools.util.Locatable)

Example 2 with GtfReader

use of com.github.lindenb.jvarkit.util.bio.structure.GtfReader in project jvarkit by lindenb.

the class Biostar81455 method doWork.

@Override
public int doWork(final List<String> args) {
    BufferedReader r = null;
    String line;
    PrintStream out = null;
    final CharSplitter tab = CharSplitter.TAB;
    try {
        try (final GtfReader gtfReader = new GtfReader(this.gtfPath)) {
            gtfReader.getAllGenes().stream().forEach(G -> this.geneMap.put(new Interval(G), G));
        }
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
    final ContigNameConverter contigNameConverter = ContigNameConverter.fromIntervalTreeMap(this.geneMap);
    try {
        r = super.openBufferedReader(oneFileOrNull(args));
        out = openPathOrStdoutAsPrintStream(this.outputFile);
        while ((line = r.readLine()) != null) {
            if (line.startsWith("#")) {
                out.println(line);
                continue;
            }
            boolean found = false;
            final String[] tokens = tab.split(line);
            final int pos1 = Integer.parseInt(tokens[1]) + (this.one_based ? 0 : 1);
            final String convertCtg = contigNameConverter.apply(tokens[0]);
            if (convertCtg == null) {
                LOG.error("CANNOT FIND contig " + tokens[0]);
                out.println("##UNKNOWN CONTIG " + line);
                continue;
            }
            final SimplePosition position = new SimplePosition(convertCtg, pos1);
            final List<Transcript> transcripts = this.geneMap.getOverlapping(position).stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.overlaps(position)).collect(Collectors.toList());
            if (transcripts.isEmpty()) {
                LOG.info("no gene found in chromosome " + tokens[0] + " (check chrom prefix?)");
            } else {
                for (final Transcript kg : transcripts) {
                    Exon bestExon = null;
                    for (final Exon exon : kg.getExons()) {
                        if (bestExon == null || Math.abs(distance(position.getPosition(), exon)) < Math.abs(distance(position.getPosition(), bestExon))) {
                            bestExon = exon;
                        }
                    }
                    if (bestExon != null) {
                        out.print(line);
                        out.print("\t");
                        out.print(bestExon.getTranscript().getId());
                        out.print("\t");
                        out.print(kg.getStart() - 1);
                        out.print("\t");
                        out.print(kg.getEnd());
                        out.print("\t");
                        out.print(kg.getStrand());
                        out.print("\t");
                        out.print(bestExon.getName());
                        out.print("\t");
                        out.print(bestExon.getStart() - 1);
                        out.print("\t");
                        out.print(bestExon.getEnd());
                        out.print("\t");
                        out.print(distance(position.getPosition(), bestExon));
                        out.println();
                        found = true;
                    }
                }
            }
            if (!found) {
                out.println(line + "\tNULL");
            }
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(out);
    }
}
Also used : PrintStream(java.io.PrintStream) Locatable(htsjdk.samtools.util.Locatable) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) Logger(com.github.lindenb.jvarkit.util.log.Logger) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) Collectors(java.util.stream.Collectors) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) SimplePosition(com.github.lindenb.jvarkit.samtools.util.SimplePosition) Interval(htsjdk.samtools.util.Interval) List(java.util.List) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) BufferedReader(java.io.BufferedReader) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintStream(java.io.PrintStream) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) BufferedReader(java.io.BufferedReader) SimplePosition(com.github.lindenb.jvarkit.samtools.util.SimplePosition) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Interval(htsjdk.samtools.util.Interval)

Example 3 with GtfReader

use of com.github.lindenb.jvarkit.util.bio.structure.GtfReader in project jvarkit by lindenb.

the class SamFindClippedRegions method doWork.

@Override
public int doWork(final List<String> args) {
    final int bad_mapq = 30;
    try {
        if (this.min_clip_depth > this.min_depth) {
            LOG.error("this.min_clip_depth>this.min_depth");
            return -1;
        }
        if (this.fraction < 0 || this.fraction > 1.0) {
            LOG.error("bad ratio: " + fraction);
            return -1;
        }
        final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.referenceFai);
        final List<Path> inputs = IOUtils.unrollPaths(args);
        if (inputs.isEmpty()) {
            LOG.error("input is missing");
            return -1;
        }
        final IntervalTreeMap<Interval> intronMap = new IntervalTreeMap<>();
        if (this.gtfPath != null) {
            try (GtfReader gtfReader = new GtfReader(this.gtfPath)) {
                gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
                gtfReader.getAllGenes().stream().flatMap(G -> G.getTranscripts().stream()).filter(T -> T.hasIntron()).flatMap(T -> T.getIntrons().stream()).map(I -> new Interval(I.getContig(), I.getStart(), I.getEnd(), I.isNegativeStrand(), I.getTranscript().getId())).forEach(I -> intronMap.put(I, I));
            }
        }
        final List<String> sample_list = new ArrayList<>();
        final QueryInterval[] queryIntervals;
        if (this.bedPath == null) {
            queryIntervals = null;
        } else {
            try (BedLineReader br = new BedLineReader(this.bedPath).setValidationStringency(ValidationStringency.LENIENT).setContigNameConverter(ContigNameConverter.fromOneDictionary(dict))) {
                queryIntervals = br.optimizeIntervals(dict);
            }
        }
        SortingCollection<Base> sortingCollection = SortingCollection.newInstance(Base.class, new BaseCodec(), (A, B) -> A.compare1(B), writingSortingCollection.getMaxRecordsInRam(), writingSortingCollection.getTmpPath());
        sortingCollection.setDestructiveIteration(true);
        final Predicate<Base> acceptBase = B -> {
            return B.clip() > 0;
        };
        final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.referenceFai);
        for (final Path path : inputs) {
            try (SamReader sr = srf.open(path)) {
                final SAMFileHeader header = sr.getFileHeader();
                SequenceUtil.assertSequenceDictionariesEqual(dict, SequenceDictionaryUtils.extractRequired(header));
                final String sample_name = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
                if (sample_list.contains(sample_name)) {
                    LOG.error("duplicate sample " + sample_name + " in " + path);
                    return -1;
                }
                final int sample_idx = sample_list.size();
                sample_list.add(sample_name);
                int prev_tid = -1;
                final SortedMap<Integer, Base> pos2base = new TreeMap<>();
                /* get base in pos2base, create it if needed */
                final BiFunction<Integer, Integer, Base> baseAt = (TID, POS) -> {
                    Base b = pos2base.get(POS);
                    if (b == null) {
                        b = new Base(sample_idx, TID, POS);
                        pos2base.put(POS, b);
                    }
                    return b;
                };
                try (CloseableIterator<SAMRecord> iter = queryIntervals == null ? sr.iterator() : sr.query(queryIntervals, false)) {
                    for (; ; ) {
                        final SAMRecord rec = (iter.hasNext() ? iter.next() : null);
                        if (rec != null && !SAMRecordDefaultFilter.accept(rec, this.min_mapq))
                            continue;
                        if (rec == null || prev_tid != rec.getReferenceIndex().intValue()) {
                            for (final Integer pos : pos2base.keySet()) {
                                final Base b = pos2base.get(pos);
                                if (acceptBase.test(b))
                                    sortingCollection.add(b);
                            }
                            if (rec == null)
                                break;
                            pos2base.clear();
                            prev_tid = rec.getReferenceIndex().intValue();
                        }
                        /* pop back previous bases */
                        for (Iterator<Integer> rpos = pos2base.keySet().iterator(); rpos.hasNext(); ) {
                            final Integer pos = rpos.next();
                            if (pos.intValue() + this.max_clip_length >= rec.getUnclippedStart())
                                break;
                            final Base b = pos2base.get(pos);
                            if (acceptBase.test(b))
                                sortingCollection.add(b);
                            rpos.remove();
                        }
                        final Cigar cigar = rec.getCigar();
                        int refPos = rec.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (op.consumesReferenceBases()) {
                                if (op.consumesReadBases()) {
                                    for (int x = 0; x < ce.getLength(); ++x) {
                                        final Base gt = baseAt.apply(prev_tid, refPos + x);
                                        gt.noClip++;
                                        gt.noClip_sum_mapq += rec.getMappingQuality();
                                    }
                                } else if (op.equals(CigarOperator.D) || op.equals(CigarOperator.N)) {
                                    baseAt.apply(prev_tid, refPos).del++;
                                    baseAt.apply(prev_tid, refPos + ce.getLength() - 1).del++;
                                }
                                refPos += ce.getLength();
                            }
                        }
                        CigarElement ce = cigar.getFirstCigarElement();
                        if (ce != null && ce.getOperator().isClipping() && ce.getLength() >= this.min_clip_operator_length) {
                            baseAt.apply(prev_tid, rec.getStart() - 1).leftClip++;
                        }
                        ce = cigar.getLastCigarElement();
                        if (ce != null && ce.getOperator().isClipping() && ce.getLength() >= this.min_clip_operator_length) {
                            baseAt.apply(prev_tid, rec.getEnd() + 1).rightClip++;
                        }
                    }
                }
                /* write last bases */
                for (final Integer pos : pos2base.keySet()) {
                    final Base b = pos2base.get(pos);
                    if (acceptBase.test(b))
                        sortingCollection.add(b);
                }
            }
        // end open reader
        }
        // end loop sam files
        sortingCollection.doneAdding();
        /* build VCF header */
        final Allele reference_allele = Allele.create("N", true);
        final Allele alt_allele = Allele.create("<CLIP>", false);
        final Set<VCFHeaderLine> vcfHeaderLines = new HashSet<>();
        VCFStandardHeaderLines.addStandardFormatLines(vcfHeaderLines, true, VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS);
        VCFStandardHeaderLines.addStandardInfoLines(vcfHeaderLines, true, VCFConstants.DEPTH_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_FREQUENCY_KEY);
        final VCFFormatHeaderLine leftClip = new VCFFormatHeaderLine("CL", 1, VCFHeaderLineType.Integer, "Left Clip");
        vcfHeaderLines.add(leftClip);
        final VCFFormatHeaderLine rightClip = new VCFFormatHeaderLine("RL", 1, VCFHeaderLineType.Integer, "Right Clip");
        vcfHeaderLines.add(rightClip);
        final VCFFormatHeaderLine totalCip = new VCFFormatHeaderLine("TL", 1, VCFHeaderLineType.Integer, "Total Clip");
        vcfHeaderLines.add(totalCip);
        final VCFFormatHeaderLine totalDel = new VCFFormatHeaderLine("DL", 1, VCFHeaderLineType.Integer, "Total Deletions");
        vcfHeaderLines.add(totalDel);
        final VCFFormatHeaderLine noClipMAPQ = new VCFFormatHeaderLine("MQ", 1, VCFHeaderLineType.Integer, "Average MAPQ for reads without clip at this position.");
        vcfHeaderLines.add(noClipMAPQ);
        final VCFInfoHeaderLine averageMAPQ = new VCFInfoHeaderLine("AVG_MAPQ", 1, VCFHeaderLineType.Integer, "Average MAPQ for called genotypes");
        vcfHeaderLines.add(averageMAPQ);
        final VCFInfoHeaderLine infoRetrogene = new VCFInfoHeaderLine("RETROGENE", 1, VCFHeaderLineType.String, "transcript name for Possible retrogene.");
        vcfHeaderLines.add(infoRetrogene);
        final VCFFilterHeaderLine filterRetrogene = new VCFFilterHeaderLine("POSSIBLE_RETROGENE", "Junction is a possible Retrogene.");
        vcfHeaderLines.add(filterRetrogene);
        final VCFFilterHeaderLine filterlowMapq = new VCFFilterHeaderLine("LOW_MAPQ", "Low average mapq (< " + bad_mapq + ")");
        vcfHeaderLines.add(filterlowMapq);
        final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines, sample_list);
        vcfHeader.setSequenceDictionary(dict);
        JVarkitVersion.getInstance().addMetaData(this, vcfHeader);
        this.writingVcfConfig.dictionary(dict);
        try (VariantContextWriter w = this.writingVcfConfig.open(this.outputFile)) {
            w.writeHeader(vcfHeader);
            try (CloseableIterator<Base> r1 = sortingCollection.iterator()) {
                try (EqualRangeIterator<Base> r2 = new EqualRangeIterator<>(r1, (A, B) -> A.compare2(B))) {
                    while (r2.hasNext()) {
                        final List<Base> array = r2.next();
                        final Base first = array.get(0);
                        if (first.pos < 1)
                            continue;
                        // no clip
                        if (array.stream().mapToInt(G -> G.clip()).sum() == 0)
                            continue;
                        if (array.stream().allMatch(G -> G.clip() < min_clip_depth))
                            continue;
                        if (array.stream().allMatch(G -> G.dp() < min_depth))
                            continue;
                        if (array.stream().allMatch(G -> G.ratio() < fraction))
                            continue;
                        final VariantContextBuilder vcb = new VariantContextBuilder();
                        final String chrom = dict.getSequence(first.tid).getSequenceName();
                        vcb.chr(chrom);
                        vcb.start(first.pos);
                        vcb.stop(first.pos);
                        vcb.alleles(Arrays.asList(reference_allele, alt_allele));
                        vcb.attribute(VCFConstants.DEPTH_KEY, array.stream().mapToInt(G -> G.dp()).sum());
                        final List<Genotype> genotypes = new ArrayList<>(array.size());
                        int AC = 0;
                        int AN = 0;
                        int max_clip = 1;
                        double sum_mapq = 0.0;
                        int count_mapq = 0;
                        for (final Base gt : array) {
                            final GenotypeBuilder gb = new GenotypeBuilder(sample_list.get(gt.sample_idx));
                            if (gt.clip() == 0 && gt.noClip == 0) {
                                gb.alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                            } else if (gt.noClip == 0) {
                                gb.alleles(Arrays.asList(alt_allele, alt_allele));
                                AC += 2;
                                sum_mapq += gt.noClipMapq();
                                count_mapq++;
                                AN += 2;
                            } else if (gt.clip() == 0) {
                                gb.alleles(Arrays.asList(reference_allele, reference_allele));
                                AN += 2;
                            } else {
                                gb.alleles(Arrays.asList(reference_allele, alt_allele));
                                AC++;
                                sum_mapq += gt.noClipMapq();
                                count_mapq++;
                                AN += 2;
                            }
                            gb.DP(gt.dp());
                            gb.attribute(leftClip.getID(), gt.leftClip);
                            gb.attribute(rightClip.getID(), gt.rightClip);
                            gb.attribute(totalCip.getID(), gt.clip());
                            gb.attribute(totalDel.getID(), gt.del);
                            gb.attribute(noClipMAPQ.getID(), gt.noClipMapq());
                            gb.AD(new int[] { gt.noClip, gt.clip() });
                            genotypes.add(gb.make());
                            max_clip = Math.max(max_clip, gt.clip());
                        }
                        if (count_mapq > 0) {
                            final int avg_mapq = (int) (sum_mapq / count_mapq);
                            vcb.attribute(averageMAPQ.getID(), avg_mapq);
                            if (avg_mapq < bad_mapq)
                                vcb.filter(filterlowMapq.getID());
                        }
                        if (gtfPath != null) {
                            final Locatable bounds1 = new SimpleInterval(chrom, Math.max(1, first.pos - max_intron_distance), first.pos + max_intron_distance);
                            intronMap.getOverlapping(bounds1).stream().filter(I -> Math.abs(I.getStart() - first.pos) <= this.max_intron_distance || Math.abs(I.getEnd() - first.pos) <= this.max_intron_distance).map(I -> I.getName()).findFirst().ifPresent(transcript_id -> {
                                vcb.attribute(infoRetrogene.getID(), transcript_id);
                                vcb.filter(filterRetrogene.getID());
                            });
                            ;
                        }
                        vcb.log10PError(max_clip / -10.0);
                        vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, AC);
                        vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, AN);
                        if (AN > 0)
                            vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, AC / (float) AN);
                        vcb.genotypes(genotypes);
                        w.add(vcb.make());
                    }
                // end while r2
                }
            // end r2
            }
        // end r1
        }
        // end writer
        sortingCollection.cleanup();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) BiFunction(java.util.function.BiFunction) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Path(java.nio.file.Path) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) EOFException(java.io.EOFException) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SortedMap(java.util.SortedMap) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) Cigar(htsjdk.samtools.Cigar) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) ValidationStringency(htsjdk.samtools.ValidationStringency) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFConstants(htsjdk.variant.vcf.VCFConstants) Locatable(htsjdk.samtools.util.Locatable) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) TreeMap(java.util.TreeMap) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) QueryInterval(htsjdk.samtools.QueryInterval) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) CigarOperator(htsjdk.samtools.CigarOperator) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval) Locatable(htsjdk.samtools.util.Locatable) QueryInterval(htsjdk.samtools.QueryInterval) BedLineReader(com.github.lindenb.jvarkit.bed.BedLineReader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) TreeMap(java.util.TreeMap) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap)

Example 4 with GtfReader

use of com.github.lindenb.jvarkit.util.bio.structure.GtfReader in project jvarkit by lindenb.

the class VcfStrechToSvg method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        this.afExtractor = new AFExtractorFactory().parseFieldExtractor(this.afExtractorFactoryStr);
        final String input = super.oneAndOnlyOneFile(args);
        if (!this.bamListPath.isEmpty()) {
            LOG.info("reading bam list");
            for (Path bamPath : IOUtils.unrollPaths(this.bamListPath)) {
                try (SamReader sr = openSamReader(bamPath)) {
                    final SAMFileHeader hdr = sr.getFileHeader();
                    for (final String sn : hdr.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).collect(Collectors.toSet())) {
                        if (this.sample2bam.containsKey(sn)) {
                            LOG.info("duplicate sample in bam " + bamPath + " and " + this.sample2bam.get(sn));
                            return -1;
                        }
                        this.sample2bam.put(sn, bamPath);
                    }
                }
            }
        }
        try (VCFReader r = VCFReaderFactory.makeDefault().open(input, true)) {
            final VCFHeader header = r.getHeader();
            this.afExtractor.validateHeader(header);
            final String searchFormat;
            switch(this.useFormat) {
                case PL:
                    searchFormat = VCFConstants.GENOTYPE_PL_KEY;
                    break;
                // through
                case AD:
                default:
                    searchFormat = VCFConstants.GENOTYPE_ALLELE_DEPTHS;
                    break;
            }
            if (header.getFormatHeaderLine(searchFormat) == null) {
                LOG.error("FORMAT/" + searchFormat + " undefined in " + input);
                return -1;
            }
            if (!header.hasGenotypingData()) {
                LOG.error("No genotype in input");
                return -1;
            }
            final SAMSequenceDictionary dict = header.getSequenceDictionary();
            if (dict != null && this.refFaidx != null) {
                SequenceUtil.assertSequenceDictionariesEqual(dict, SequenceDictionaryUtils.extractRequired(this.refFaidx));
            }
            if (this.gff3Path != null) {
                LOG.info("reading gtf" + this.gff3Path);
                try (GtfReader gtfReader = new GtfReader(this.gff3Path)) {
                    if (dict != null)
                        gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
                    gtfReader.getAllGenes().forEach(G -> {
                        this.all_genes.put(new Interval(G), G);
                    });
                }
            }
            try (BufferedReader br = IOUtils.openPathForBufferedReading(this.bedFile)) {
                try (PrintWriter manifest = (this.manifestPath == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.manifestPath))) {
                    try (ArchiveFactory out = ArchiveFactory.open(this.outputFile)) {
                        final BedLineCodec codec = new BedLineCodec();
                        br.lines().map(L -> codec.decode(L)).filter(B -> B != null).forEach(B -> {
                            run(out, B, header, r, manifest);
                        });
                    }
                    manifest.flush();
                }
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Path(java.nio.file.Path) Transformer(javax.xml.transform.Transformer) Text(org.w3c.dom.Text) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) SVG(com.github.lindenb.jvarkit.util.svg.SVG) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Hyperlink(com.github.lindenb.jvarkit.net.Hyperlink) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) CoverageFactory(com.github.lindenb.jvarkit.samtools.CoverageFactory) Document(org.w3c.dom.Document) Map(java.util.Map) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) SAMException(htsjdk.samtools.SAMException) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) ToDoubleFunction(java.util.function.ToDoubleFunction) VariantContext(htsjdk.variant.variantcontext.VariantContext) GZIPOutputStream(java.util.zip.GZIPOutputStream) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Genotype(htsjdk.variant.variantcontext.Genotype) DOMSource(javax.xml.transform.dom.DOMSource) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SequenceUtil(htsjdk.samtools.util.SequenceUtil) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) OptionalDouble(java.util.OptionalDouble) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) Node(org.w3c.dom.Node) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) VCFConstants(htsjdk.variant.vcf.VCFConstants) OutputStream(java.io.OutputStream) Locatable(htsjdk.samtools.util.Locatable) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DecimalFormat(java.text.DecimalFormat) VCFReader(htsjdk.variant.vcf.VCFReader) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Element(org.w3c.dom.Element) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DynamicParameter(com.beust.jcommander.DynamicParameter) BufferedReader(java.io.BufferedReader) TransformerFactory(javax.xml.transform.TransformerFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) SamReader(htsjdk.samtools.SamReader) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) VCFReader(htsjdk.variant.vcf.VCFReader) BufferedReader(java.io.BufferedReader) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) SAMFileHeader(htsjdk.samtools.SAMFileHeader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) Interval(htsjdk.samtools.util.Interval) PrintWriter(java.io.PrintWriter)

Example 5 with GtfReader

use of com.github.lindenb.jvarkit.util.bio.structure.GtfReader in project jvarkit by lindenb.

the class GtfUpstreamOrf method doWork.

@Override
public int doWork(final List<String> args) {
    GtfReader gtfReader = null;
    PrintWriter pw = null;
    try {
        this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidx);
        final SAMSequenceDictionary refDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
        this.refCtgNameConverter = ContigNameConverter.fromOneDictionary(refDict);
        final ContigDictComparator ctgDictComparator = new ContigDictComparator(refDict);
        final String input = oneFileOrNull(args);
        gtfReader = input == null ? new GtfReader(stdin()) : new GtfReader(input);
        gtfReader.setContigNameConverter(this.refCtgNameConverter);
        final List<Gene> genes = gtfReader.getAllGenes().stream().filter(G -> G.hasStrand()).sorted((A, B) -> {
            int i = ctgDictComparator.compare(A.getContig(), B.getContig());
            if (i != 0)
                return i;
            i = Integer.compare(A.getStart(), B.getStart());
            if (i != 0)
                return i;
            return Integer.compare(A.getEnd(), B.getEnd());
        }).collect(Collectors.toList());
        gtfReader.close();
        gtfReader = null;
        pw = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        for (final KozakSequence.Strength f : KozakSequence.Strength.values()) {
            pw.println("#kozak." + f.name() + "=" + kozakStrengthToScore(f));
        }
        final String gtfSource = getProgramName().toLowerCase();
        for (final SAMSequenceRecord ssr : refDict.getSequences()) {
            pw.println("##contig " + ssr.getSequenceName() + ": length:" + ssr.getSequenceLength());
        }
        pw.println("#" + gtfSource + ":" + JVarkitVersion.getInstance().toString());
        if (!StringUtils.isBlank(input)) {
            pw.println("#gtf:" + input);
        }
        final ProgressFactory.Watcher<Gene> progress = ProgressFactory.newInstance().dictionary(refDict).logger(LOG).build();
        for (final Gene gene : genes) {
            progress.apply(gene);
            /* new reference sequence */
            if (this.genomicSequence == null || !this.genomicSequence.getChrom().equals(gene.getContig())) {
                this.genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, gene.getContig());
            }
            final List<RNASequence> rnas = gene.getTranscripts().stream().filter(T -> T.isCoding()).filter(T -> T.hasStrand()).filter(T -> T.hasCodonStartDefined()).filter(T -> T.getTranscriptUTR5().isPresent()).map(T -> new RNASequence(T)).collect(Collectors.toList());
            if (rnas.isEmpty())
                continue;
            final Set<OpenReadingFrame> orfs = rnas.stream().flatMap(R -> R.getUpstreamOpenReadingFrames().stream()).collect(Collectors.toSet());
            if (orfs.isEmpty())
                continue;
            boolean gene_printed = false;
            for (final OpenReadingFrame uORF : orfs) {
                /* is there any other RNA containing this uORF ?*/
                if (rnas.stream().filter(other -> !other.getTranscript().getId().equals(uORF.mRNA.getTranscript().getId())).anyMatch(other -> {
                    // other must have atg in frame and ATG before the observed one
                    final int other_atg0 = other.getATG0InRNA();
                    if (uORF.in_rna_atg0 % 3 != other_atg0 % 3)
                        return false;
                    return uORF.in_rna_atg0 >= other_atg0;
                }))
                    continue;
                final String transcript_id = uORF.getTranscript().getId() + ".uorf" + (1 + uORF.in_rna_atg0);
                if (!gene_printed) {
                    gene_printed = true;
                    pw.print(gene.getContig());
                    pw.print("\t");
                    // source
                    pw.print(gtfSource);
                    pw.print("\t");
                    pw.print("gene");
                    pw.print("\t");
                    // start
                    pw.print(gene.getStart());
                    pw.print("\t");
                    // end
                    pw.print(gene.getEnd());
                    pw.print("\t");
                    // score
                    pw.print(".");
                    pw.print("\t");
                    // strand
                    pw.print(gene.getStrand());
                    pw.print("\t");
                    // phase
                    pw.print(".");
                    pw.print("\t");
                    pw.print(keyvalue("gene_id", gene.getId()));
                    pw.println();
                }
                // TRANSCRIPT
                final UTR utr_5_prime = uORF.getTranscript().getTranscriptUTR5().get();
                pw.print(gene.getContig());
                pw.print("\t");
                pw.print(gtfSource);
                pw.print("\t");
                pw.print("transcript");
                pw.print("\t");
                if (gene.isPositiveStrand()) {
                    pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_atg0] + 1);
                    pw.print("\t");
                    // pw.print(transcriptSequence.mrnaIndex0ToBase[uORF.in_rna_stop0]+1);
                    if (uORF.in_rna_stop0 == NPOS) {
                        pw.print(uORF.mRNA.getTranscript().getTxEnd());
                    } else {
                        pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_stop0] + 1);
                    }
                } else {
                    if (uORF.in_rna_stop0 == NPOS) {
                        pw.print(uORF.mRNA.getTranscript().getTxStart());
                    } else {
                        pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_stop0] + 1);
                    }
                    pw.print("\t");
                    pw.print(uORF.mRNA.mrnaIndex0ToGenomic0[uORF.in_rna_atg0] + 1);
                }
                pw.print("\t");
                // score
                pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
                pw.print("\t");
                // strand
                pw.print(gene.getStrand());
                pw.print("\t");
                // phase
                pw.print("0");
                pw.print("\t");
                pw.print(keyvalue("gene_id", gene.getId()));
                pw.print(keyvalue("transcript_id", transcript_id));
                pw.print(keyvalue("transcript_biotype", "uORF"));
                pw.print(keyvalue("kozak-seq", uORF.kozak.getString()));
                pw.print(keyvalue("kozak-strength", uORF.kozak.getStrength()));
                pw.print(keyvalue("translation", uORF.peptide));
                pw.print(keyvalue("uORF-atg-in-frame-with-transcript-atg", uORF.uorf_atg_in_frame));
                pw.print(keyvalue("utr", utr_5_prime.toString() + " " + utr_5_prime.getStart() + "-" + utr_5_prime.getEnd()));
                pw.println();
                // Exon
                for (final Exon exon : uORF.getTranscript().getExons()) {
                    pw.print(exon.getContig());
                    pw.print("\t");
                    pw.print(gtfSource);
                    pw.print("\t");
                    pw.print("exon");
                    pw.print("\t");
                    pw.print(exon.getStart());
                    pw.print("\t");
                    pw.print(exon.getEnd());
                    pw.print("\t");
                    // score
                    pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
                    pw.print("\t");
                    // strand
                    pw.print(exon.getStrand());
                    pw.print("\t");
                    // phase
                    pw.print(0);
                    pw.print("\t");
                    pw.print(keyvalue("gene_id", gene.getId()));
                    pw.print(keyvalue("transcript_id", transcript_id));
                    pw.println();
                }
                final List<Interval> startBlocks = uORF.mRNA.getCodonBlocks(uORF.in_rna_atg0, uORF.in_rna_atg0 + 1, uORF.in_rna_atg0 + 2);
                final List<Interval> stopBlocks = uORF.in_rna_stop0 != NPOS ? uORF.mRNA.getCodonBlocks(uORF.in_rna_stop0, uORF.in_rna_stop0 + 1, uORF.in_rna_stop0 + 2) : Collections.emptyList();
                // CDS
                if (!stopBlocks.isEmpty()) {
                    final int cdsStart = startBlocks.stream().mapToInt(B -> B.getStart()).min().orElseThrow(IllegalStateException::new);
                    final int cdsEnd = stopBlocks.stream().mapToInt(B -> B.getEnd()).max().orElseThrow(IllegalStateException::new);
                    for (final Exon exon : uORF.getTranscript().getExons()) {
                        if (exon.getEnd() < cdsStart)
                            continue;
                        if (exon.getStart() > cdsEnd)
                            break;
                        pw.print(exon.getContig());
                        pw.print("\t");
                        pw.print(gtfSource);
                        pw.print("\t");
                        pw.print("CDS");
                        pw.print("\t");
                        pw.print(Math.max(cdsStart, exon.getStart()));
                        pw.print("\t");
                        pw.print(Math.min(cdsEnd, exon.getEnd()));
                        pw.print("\t");
                        // score
                        pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
                        pw.print("\t");
                        // strand
                        pw.print(exon.getStrand());
                        pw.print("\t");
                        // phase
                        pw.print(uORF.getFrameAt(Math.max(cdsStart, exon.getStart())));
                        pw.print("\t");
                        pw.print(keyvalue("gene_id", gene.getId()));
                        pw.print(keyvalue("transcript_id", transcript_id));
                        pw.println();
                    }
                }
                // CODON START
                for (final Interval startc : startBlocks) {
                    PARANOID.assertLe(startc.getStart(), startc.getEnd());
                    pw.print(startc.getContig());
                    pw.print("\t");
                    pw.print(gtfSource);
                    pw.print("\t");
                    pw.print("start_codon");
                    pw.print("\t");
                    pw.print(startc.getStart());
                    pw.print("\t");
                    pw.print(startc.getEnd());
                    pw.print("\t");
                    // score
                    pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
                    pw.print("\t");
                    // strand
                    pw.print(gene.getStrand());
                    pw.print("\t");
                    // phase
                    pw.print(uORF.getFrameAt(startc.getStart()));
                    pw.print("\t");
                    pw.print(keyvalue("gene_id", gene.getId()));
                    pw.print(keyvalue("transcript_id", transcript_id));
                    pw.print(keyvalue("distance-mrna-atg", uORF.mRNA.getATG0InRNA() - uORF.in_rna_atg0));
                    pw.print(keyvalue("pos0-in-mrna", uORF.in_rna_atg0));
                    pw.print(keyvalue("spliced", startBlocks.size() > 1));
                    pw.println();
                }
                // CODON END
                for (final Interval stopc : stopBlocks) /* might be empty */
                {
                    PARANOID.assertLe(stopc.getStart(), stopc.getEnd());
                    pw.print(stopc.getContig());
                    pw.print("\t");
                    pw.print(gtfSource);
                    pw.print("\t");
                    pw.print("stop_codon");
                    pw.print("\t");
                    pw.print(stopc.getStart());
                    pw.print("\t");
                    pw.print(stopc.getEnd());
                    pw.print("\t");
                    // score
                    pw.print(kozakStrengthToScore(uORF.kozak.getStrength()));
                    pw.print("\t");
                    // strand
                    pw.print(gene.getStrand());
                    pw.print("\t");
                    // phase
                    pw.print(uORF.getFrameAt(stopc.getStart()));
                    pw.print("\t");
                    pw.print(keyvalue("gene_id", gene.getId()));
                    pw.print(keyvalue("transcript_id", transcript_id));
                    pw.print(keyvalue("spliced", stopBlocks.size() > 1));
                    pw.println();
                }
            }
        }
        progress.close();
        pw.flush();
        pw.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
    }
}
Also used : Arrays(java.util.Arrays) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) Transcript(com.github.lindenb.jvarkit.util.bio.structure.Transcript) AbstractCharSequence(com.github.lindenb.jvarkit.lang.AbstractCharSequence) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) HashMap(java.util.HashMap) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) UTR(com.github.lindenb.jvarkit.util.bio.structure.UTR) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) List(java.util.List) KozakSequence(com.github.lindenb.jvarkit.util.bio.KozakSequence) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) Paranoid(com.github.lindenb.jvarkit.lang.Paranoid) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) UTR(com.github.lindenb.jvarkit.util.bio.structure.UTR) Exon(com.github.lindenb.jvarkit.util.bio.structure.Exon) Gene(com.github.lindenb.jvarkit.util.bio.structure.Gene) KozakSequence(com.github.lindenb.jvarkit.util.bio.KozakSequence) PrintWriter(java.io.PrintWriter) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) GtfReader(com.github.lindenb.jvarkit.util.bio.structure.GtfReader) Interval(htsjdk.samtools.util.Interval)

Aggregations

GtfReader (com.github.lindenb.jvarkit.util.bio.structure.GtfReader)23 Path (java.nio.file.Path)22 Parameter (com.beust.jcommander.Parameter)20 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)20 Program (com.github.lindenb.jvarkit.util.jcommander.Program)20 Logger (com.github.lindenb.jvarkit.util.log.Logger)20 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)20 List (java.util.List)20 Collectors (java.util.stream.Collectors)18 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)15 Interval (htsjdk.samtools.util.Interval)15 ArrayList (java.util.ArrayList)15 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)14 Transcript (com.github.lindenb.jvarkit.util.bio.structure.Transcript)14 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)14 CloserUtil (htsjdk.samtools.util.CloserUtil)14 IntervalTreeMap (htsjdk.samtools.util.IntervalTreeMap)13 Locatable (htsjdk.samtools.util.Locatable)13 Set (java.util.Set)13 VariantContext (htsjdk.variant.variantcontext.VariantContext)12