Search in sources :

Example 6 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class CytobandToSvg method doWork.

@Override
public int doWork(final List<String> args) {
    XMLStreamWriter w = null;
    FileOutputStream fout = null;
    try {
        final List<Cytoband> cytobands;
        if (StringUtil.isBlank(this.cytobandsUri) || this.cytobandsUri.equals("-")) {
            cytobands = parseCytobands(IOUtils.openStdinForBufferedReader());
        } else {
            cytobands = parseCytobands(IOUtils.openURIForBufferedReading(this.cytobandsUri));
        }
        cytobands.stream().map(C -> C.getContig()).collect(Collectors.toCollection(LinkedHashSet::new)).stream().map(C -> new Contig(cytobands.stream().filter(T -> T.getContig().equals(C)).sorted((A, B) -> Integer.compare(A.getStart(), B.getStart())).collect(Collectors.toList()))).forEach(C -> this.name2contig.put(C.getContig(), C));
        ;
        this.max_contig_length = this.name2contig.values().stream().mapToLong(C -> C.getSequenceLength()).max().orElse(0L);
        if (this.name2contig.isEmpty() || this.max_contig_length <= 0) {
            LOG.error("no contig found");
            return -1;
        }
        final Rectangle drawingArea = new Rectangle(0, 0, this.width, this.height);
        final XMLOutputFactory xof = XMLOutputFactory.newFactory();
        if (this.outputFile == null) {
            w = xof.createXMLStreamWriter(stdout(), "UTF-8");
        } else {
            fout = new FileOutputStream(this.outputFile);
            w = xof.createXMLStreamWriter(fout, "UTF-8");
        }
        w.writeStartDocument("UTF-8", "1.0");
        w.writeStartElement("svg");
        w.writeAttribute("width", format(drawingArea.width));
        w.writeAttribute("height", format(drawingArea.height));
        w.writeAttribute("version", "1.1");
        w.writeDefaultNamespace(SVG.NS);
        w.writeNamespace("xlink", XLINK.NS);
        w.writeStartElement("title");
        w.writeCharacters(StringUtil.isBlank(this.title) ? CytobandToSvg.class.getName() : this.title);
        w.writeEndElement();
        w.writeStartElement("description");
        w.writeCharacters("Cmd:" + getProgramCommandLine() + "\n");
        w.writeCharacters("Version:" + getVersion() + "\n");
        w.writeCharacters("Author: Pierre Lindenbaum\n");
        w.writeEndElement();
        w.writeStartElement("defs");
        w.writeStartElement("linearGradient");
        w.writeAttribute("id", "grad01");
        w.writeAttribute("x1", "0%");
        w.writeAttribute("x2", "100%");
        w.writeAttribute("y1", "50%");
        w.writeAttribute("y2", "50%");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "0%");
        w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "50%");
        w.writeAttribute("style", "stop-color:white;stop-opacity:0;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "75%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:0;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "100%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:0.9;");
        w.writeEndElement();
        w.writeStartElement("filter");
        w.writeAttribute("id", "filter01");
        w.writeEmptyElement("feGaussianBlur");
        w.writeAttribute("in", "SourceGraphic");
        w.writeAttribute("stdDeviation", "10");
        // filter
        w.writeEndElement();
        double x = drawingArea.getX();
        double contig_width = drawingArea.getWidth() / (double) this.name2contig.size();
        for (final Contig contig : this.name2contig.values()) {
            contig.bounds.x = x;
            contig.bounds.y = drawingArea.getY();
            contig.bounds.width = contig_width;
            contig.bounds.height = drawingArea.getHeight();
            contig.def(w);
            x += contig_width;
        }
        w.writeEndElement();
        w.writeStartElement("style");
        w.writeCharacters(".ctgborderp {fill:url(#grad01);stroke:green;}" + ".ctgborderq {fill:url(#grad01);stroke:green;}" + ".ctglabel {text-anchor:middle;stroke:none;fill:darkgrey;font: bold 10px Verdana, Helvetica, Arial, sans-serif;}" + ".cytoband {fill:silver;stroke:none;}" + ".bedlabel {stroke:red;fill:none;text-anchor:start;font: normal 7px Verdana, Helvetica, Arial, sans-serif;}" + ".maintitle {stroke:none;fill:darkgrey;text-anchor:middle;font: normal 12px Verdana, Helvetica, Arial, sans-serif;}" + ".ctgback {fill:gainsboro;stroke:none;filter:url(#filter01);}");
        // style
        w.writeEndElement();
        w.writeStartElement("g");
        w.writeAttribute("id", "body");
        /* title */
        if (!StringUtil.isBlank(this.title)) {
            w.writeStartElement("text");
            w.writeAttribute("class", "maintitle");
            w.writeAttribute("x", format(this.width / 2.0));
            w.writeAttribute("y", format(12));
            w.writeCharacters(this.title);
            w.writeEndElement();
        }
        w.writeStartElement("g");
        w.writeAttribute("id", "karyotype");
        for (final Contig contig : this.name2contig.values()) {
            contig.paint(w);
        }
        // g@id=karyotype
        w.writeEndElement();
        final ContigNameConverter ctgNameConverter = ContigNameConverter.fromContigSet(this.name2contig.keySet());
        ctgNameConverter.setOnNotFound(ContigNameConverter.OnNotFound.SKIP);
        for (final String filename : args) {
            final Pattern tab = Pattern.compile("[\t]");
            w.writeStartElement("g");
            w.writeAttribute("id", "input" + filename);
            BufferedReader br = IOUtils.openURIForBufferedReading(filename);
            String line;
            while ((line = br.readLine()) != null) {
                if (line == null || StringUtil.isBlank(line) || BedLine.isBedHeader(line))
                    continue;
                final String[] tokens = tab.split(line, 4);
                if (tokens.length < 3)
                    continue;
                final String ctgName = ctgNameConverter.apply(tokens[0]);
                if (StringUtil.isBlank(ctgName))
                    continue;
                final Contig ctg = this.name2contig.get(ctgName);
                if (ctg == null)
                    continue;
                final Rectangle2D.Double r = ctg.getContigBounds();
                final int chromStart = Integer.parseInt(tokens[1]);
                final int chromEnd = Integer.parseInt(tokens[2]);
                final Map<String, String> attributes = new HashMap<>();
                if (tokens.length > 3) {
                    final StreamTokenizer st = new StreamTokenizer(new StringReader(tokens[3]));
                    st.wordChars('_', '_');
                    String key = null;
                    while (st.nextToken() != StreamTokenizer.TT_EOF) {
                        String s = null;
                        switch(st.ttype) {
                            case StreamTokenizer.TT_NUMBER:
                                s = String.valueOf(st.nval);
                                break;
                            case '"':
                            case '\'':
                            case StreamTokenizer.TT_WORD:
                                s = st.sval;
                                break;
                            case ';':
                                break;
                            default:
                                break;
                        }
                        if (s == null)
                            continue;
                        if (key == null) {
                            key = s;
                        } else {
                            attributes.put(key, s);
                            key = null;
                        }
                    }
                }
                w.writeStartElement("g");
                if (attributes.containsKey("href")) {
                    w.writeStartElement("a");
                    w.writeAttribute("href", attributes.get("href"));
                }
                w.writeStartElement("rect");
                w.writeAttribute("style", "stroke:red;");
                w.writeAttribute("x", format(r.getMaxX() + 2));
                w.writeAttribute("width", format(2));
                w.writeAttribute("y", format(ctg.pos2pixel(chromStart, r)));
                w.writeAttribute("height", format(ctg.pos2pixel(chromEnd, r) - ctg.pos2pixel(chromStart, r)));
                if (attributes.containsKey("title")) {
                    w.writeStartElement("title");
                    w.writeCharacters(attributes.get("title"));
                    w.writeEndElement();
                }
                w.writeEndElement();
                if (attributes.containsKey("label")) {
                    w.writeStartElement("text");
                    w.writeAttribute("class", "bedlabel");
                    w.writeAttribute("x", format(r.getMaxX() + 4));
                    w.writeAttribute("y", format(3 + ctg.pos2pixel(chromStart, r)));
                    w.writeCharacters(attributes.get("label"));
                    w.writeEndElement();
                }
                if (attributes.containsKey("href")) {
                    w.writeEndElement();
                }
                // g
                w.writeEndElement();
            }
            br.close();
            w.writeEndElement();
        }
        // g@id=body
        w.writeEndElement();
        // svg
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        w.close();
        if (fout != null) {
            fout.flush();
            fout.close();
            fout = null;
        }
        return 0;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(fout);
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Rectangle(java.awt.Rectangle) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) XLINK(com.github.lindenb.jvarkit.util.ns.XLINK) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) Rectangle2D(java.awt.geom.Rectangle2D) HashMap(java.util.HashMap) SVG(com.github.lindenb.jvarkit.util.svg.SVG) LinkedHashMap(java.util.LinkedHashMap) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) XMLStreamException(javax.xml.stream.XMLStreamException) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) StreamTokenizer(java.io.StreamTokenizer) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) Locatable(htsjdk.samtools.util.Locatable) Logger(com.github.lindenb.jvarkit.util.log.Logger) DecimalFormat(java.text.DecimalFormat) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) List(java.util.List) StringReader(java.io.StringReader) BufferedReader(java.io.BufferedReader) Pattern(java.util.regex.Pattern) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) Pattern(java.util.regex.Pattern) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Rectangle(java.awt.Rectangle) Rectangle2D(java.awt.geom.Rectangle2D) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) StringReader(java.io.StringReader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) StreamTokenizer(java.io.StreamTokenizer)

Example 7 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class FindAllCoverageAtPosition method convertFromSamHeader.

private Mutation convertFromSamHeader(final File f, SAMFileHeader h, final Mutation src) {
    SAMSequenceDictionary dict = h.getSequenceDictionary();
    if (dict == null) {
        LOG.warn("No dictionary in " + h);
        return null;
    }
    final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
    converter.setOnNotFound(ContigNameConverter.OnNotFound.SKIP);
    final String ctg = converter.apply(src.chrom);
    if (ctg == null)
        return null;
    if (ctg != null && ctg.equals(src.chrom))
        return src;
    return new Mutation(ctg, src.pos);
}
Also used : SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)

Example 8 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class VcfPeekVcf method doVcfToVcf.

/**
 * public for knime
 */
@Override
public int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
    try {
        final Set<String> unmatchedcontigs = new HashSet<>();
        final VCFHeader h = vcfIn.getHeader();
        final VCFHeader h2 = new VCFHeader(h);
        super.addMetaData(h2);
        final Map<String, VCFInfoHeaderLine> databaseTags = new HashMap<String, VCFInfoHeaderLine>();
        final VCFHeader databaseHeader = this.indexedVcfFileReader.getFileHeader();
        final ContigNameConverter nameConverter = (h.getSequenceDictionary() != null && !h.getSequenceDictionary().isEmpty() && databaseHeader.getSequenceDictionary() != null && !databaseHeader.getSequenceDictionary().isEmpty() ? ContigNameConverter.fromDictionaries(h.getSequenceDictionary(), databaseHeader.getSequenceDictionary()) : ContigNameConverter.getIdentity()).setOnNotFound(this.onContigNotFound);
        ;
        for (final String key : this.peek_info_tags) {
            VCFInfoHeaderLine hinfo = databaseHeader.getInfoHeaderLine(key);
            if (hinfo == null) {
                final String msg = "INFO name=" + key + " missing in " + this.resourceVcfFile;
                if (this.missingIdIsError) {
                    LOG.warn(msg);
                    continue;
                } else {
                    LOG.error(msg);
                    return -1;
                }
            }
            switch(hinfo.getCountType()) {
                case G:
                    throw new JvarkitException.UserError("Cannot handle VCFHeaderLineCount.G for " + hinfo.getID());
                default:
                    databaseTags.put(hinfo.getID(), hinfo);
                    break;
            }
            hinfo = VCFUtils.renameVCFInfoHeaderLine(hinfo, this.peekTagPrefix + key);
            if (h2.getInfoHeaderLine(hinfo.getID()) != null) {
                throw new JvarkitException.UserError("key " + this.peekTagPrefix + key + " already defined in VCF header");
            }
            h2.addMetaDataLine(hinfo);
            ;
        }
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(h).logger(LOG);
        while (vcfIn.hasNext()) {
            final VariantContext ctx = progress.watch(vcfIn.next());
            final String outContig = nameConverter.apply(ctx.getContig());
            if (outContig == null) {
                unmatchedcontigs.add(ctx.getContig());
                continue;
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            CloseableIterator<VariantContext> iter = this.indexedVcfFileReader.query(outContig, Math.max(0, ctx.getStart() - 1), (ctx.getEnd() + 1));
            while (iter.hasNext()) {
                final VariantContext ctx2 = iter.next();
                if (!outContig.equals(ctx2.getContig()))
                    continue;
                if (ctx.getStart() != ctx2.getStart())
                    continue;
                if (!ctx.getReference().equals(ctx2.getReference()))
                    continue;
                final boolean okAllele;
                switch(altAlleleMatcher) {
                    case all:
                        {
                            okAllele = ctx.getAlternateAlleles().stream().filter(A -> ctx2.hasAlternateAllele(A)).count() == ctx.getAlternateAlleles().size();
                            break;
                        }
                    case at_least_one:
                        {
                            okAllele = ctx.getAlternateAlleles().stream().filter(A -> ctx2.hasAlternateAllele(A)).findAny().isPresent();
                            break;
                        }
                    case none:
                        okAllele = true;
                        break;
                    default:
                        throw new IllegalStateException(altAlleleMatcher.name());
                }
                if (!okAllele)
                    continue;
                if (this.peekId && ctx2.hasID()) {
                    vcb.id(ctx2.getID());
                }
                boolean somethingWasChanged = false;
                for (final String key : databaseTags.keySet()) {
                    if (!ctx2.hasAttribute(key))
                        continue;
                    final VCFInfoHeaderLine dbHeader = databaseTags.get(key);
                    switch(dbHeader.getCountType()) {
                        case A:
                            {
                                final List<Object> newatt = new ArrayList<>();
                                final List<Object> ctx2att = ctx2.getAttributeAsList(key);
                                for (int i = 0; i < ctx.getAlternateAlleles().size(); ++i) {
                                    final Allele ctxalt = ctx.getAlternateAllele(i);
                                    int index2 = ctx2.getAlternateAlleles().indexOf(ctxalt);
                                    if (index2 == -1 || index2 >= ctx2att.size()) {
                                        newatt.add(null);
                                    } else {
                                        newatt.add(ctx2att.get(index2));
                                    }
                                }
                                if (newatt.stream().filter(Obj -> !(Obj == null || VCFConstants.EMPTY_INFO_FIELD.equals(Obj))).count() > 0) {
                                    vcb.attribute(this.peekTagPrefix + key, newatt);
                                    somethingWasChanged = true;
                                }
                                break;
                            }
                        case R:
                            {
                                final List<Object> newatt = new ArrayList<>();
                                final List<Object> ctx2att = ctx2.getAttributeAsList(key);
                                for (int i = 0; i < ctx.getAlleles().size(); ++i) {
                                    final Allele ctxalt = ctx.getAlleles().get(i);
                                    int index2 = ctx2.getAlleleIndex(ctxalt);
                                    if (index2 == -1 || index2 >= ctx2att.size()) {
                                        newatt.add(null);
                                    } else {
                                        newatt.add(ctx2att.get(index2));
                                    }
                                }
                                if (newatt.stream().filter(Obj -> !(Obj == null || VCFConstants.EMPTY_INFO_FIELD.equals(Obj))).count() > 0) {
                                    vcb.attribute(this.peekTagPrefix + key, newatt);
                                    somethingWasChanged = true;
                                }
                                break;
                            }
                        default:
                            {
                                final Object o = ctx2.getAttribute(key);
                                vcb.attribute(this.peekTagPrefix + key, o);
                                somethingWasChanged = true;
                                break;
                            }
                    }
                }
                if (somethingWasChanged)
                    break;
            }
            iter.close();
            iter = null;
            out.add(vcb.make());
            if (out.checkError())
                break;
        }
        progress.finish();
        if (!unmatchedcontigs.isEmpty()) {
            LOG.debug("Unmatched contigs: " + unmatchedcontigs.stream().collect(Collectors.joining("; ")));
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) PostponedVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.PostponedVariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Aggregations

ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)8 HashSet (java.util.HashSet)4 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)3 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Parameter (com.beust.jcommander.Parameter)2 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)2 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)2 Program (com.github.lindenb.jvarkit.util.jcommander.Program)2 Logger (com.github.lindenb.jvarkit.util.log.Logger)2 KnownGene (com.github.lindenb.jvarkit.util.ucsc.KnownGene)2 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)2 CloserUtil (htsjdk.samtools.util.CloserUtil)2 Interval (htsjdk.samtools.util.Interval)2 Allele (htsjdk.variant.variantcontext.Allele)2 VCFHeader (htsjdk.variant.vcf.VCFHeader)2 File (java.io.File)2