Search in sources :

Example 1 with GBQualifier

use of gov.nih.nlm.ncbi.gb.GBQualifier in project jvarkit by lindenb.

the class Biostar95652 method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        if (args.isEmpty()) {
            LOG.error("protein ID missing");
            return -1;
        }
        if (!this.ncbiApiKey.isApiKeyDefined()) {
            LOG.error("NCBI API key is not defined");
            return -1;
        }
        JAXBContext context = JAXBContext.newInstance("gov.nih.nlm.ncbi.gb");
        final Unmarshaller unmarshaller = context.createUnmarshaller();
        // https://stackoverflow.com/questions/31293624
        try {
            unmarshaller.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "all");
        } catch (final Throwable err) {
            LOG.warn("Ignoring :" + err.getMessage());
        }
        for (final String arg : args) {
            String uri = NcbiConstants.efetch() + "?db=protein&rettype=gb&retmode=xml&id=" + URLEncoder.encode(arg, "UTF-8") + this.ncbiApiKey.getAmpParamValue();
            LOG.info("Reading from " + uri);
            // https://stackoverflow.com/questions/24460892/
            SAXParserFactory spf = SAXParserFactory.newInstance();
            // Not required for JAXB/XInclude
            spf.setValidating(false);
            final XMLReader xr = spf.newSAXParser().getXMLReader();
            final SAXSource source = new SAXSource(xr, new InputSource(uri));
            GBSet gbset = (GBSet) unmarshaller.unmarshal(source);
            if (gbset.getGBSeq().isEmpty()) {
                LOG.info("Nothing in " + uri);
                continue;
            }
            GBSeq gbseq = gbset.getGBSeq().get(0);
            Protein protein = new Protein();
            protein.length = Integer.parseInt(gbseq.getGBSeqLength());
            protein.locus = gbseq.getGBSeqLocus();
            protein.definition = gbseq.getGBSeqDefinition();
            for (GBFeature feat : gbseq.getGBSeqFeatureTable().getGBFeature()) {
                if (feat.getGBFeatureIntervals().getGBInterval().isEmpty())
                    continue;
                String cdd = null;
                String region_name = null;
                for (GBQualifier qual : feat.getGBFeatureQuals().getGBQualifier()) {
                    if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("CDD:")) {
                        cdd = qual.getGBQualifierValue().substring(4);
                    } else if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("taxon:")) {
                        protein.taxon_id = qual.getGBQualifierValue().substring(6);
                    } else if (qual.getGBQualifierName().equals("region_name")) {
                        region_name = qual.getGBQualifierValue();
                    }
                }
                if (cdd == null || region_name == null) {
                    continue;
                }
                Domain domain = cdd2domain.get(cdd);
                if (domain == null) {
                    domain = new Domain();
                    domain.cdd = cdd;
                    domain.region_name = region_name;
                    domain.color = COLORS[cdd2domain.size() % COLORS.length];
                    cdd2domain.put(domain.cdd, domain);
                }
                for (GBInterval interval : feat.getGBFeatureIntervals().getGBInterval()) {
                    if (interval.getGBIntervalFrom() == null || interval.getGBIntervalTo() == null)
                        continue;
                    DomainRegion region = new DomainRegion();
                    region.domain = domain;
                    int start = Integer.parseInt(interval.getGBIntervalFrom());
                    int end = Integer.parseInt(interval.getGBIntervalTo());
                    if (start < end) {
                        region.start = start;
                        region.end = end;
                        region.strand = '+';
                    } else {
                        region.start = end;
                        region.end = start;
                        region.strand = '-';
                    }
                    protein.domains.add(region);
                }
                LinkedList<String> lineage = new LinkedList<String>(Arrays.asList(gbseq.getGBSeqTaxonomy().split("[;][ ]*")));
                lineage.add(gbseq.getGBSeqOrganism());
                Collections.sort(protein.domains, new Comparator<DomainRegion>() {

                    @Override
                    public int compare(DomainRegion o1, DomainRegion o2) {
                        return o2.length() - o1.length();
                    }
                });
                this.root.insert(lineage, protein);
            }
        }
        root.simplify();
        root.compile();
        root.x = 0;
        root.y = (this.leafList.size() * seqHeight) / 2.0;
        root.compileXY(0, this.leafList.size() * seqHeight);
        PrintStream ps = super.openFileOrStdoutAsPrintStream(outputFile);
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        xof.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.TRUE);
        XMLStreamWriter w = xof.createXMLStreamWriter(ps, "UTF-8");
        w.writeStartDocument("UTF-8", "1.0");
        w.writeStartElement("svg");
        w.writeDefaultNamespace(SVG.NS);
        w.writeNamespace("xlink", XLINK);
        w.writeAttribute("version", "1.1");
        w.writeAttribute("width", String.valueOf(2 + this.treeWidth + this.organismWidth + this.acnWidth + this.seqWidth));
        w.writeAttribute("height", String.valueOf(2 + this.leafList.size() * seqHeight));
        w.writeComment(this.getProgramCommandLine());
        w.writeComment("Version:" + getVersion());
        w.writeComment("Author: Pierre lindenbaum Phd");
        w.writeStartElement("defs");
        w.writeStartElement("linearGradient");
        w.writeAttribute("id", "grad01");
        w.writeAttribute("x1", "50%");
        w.writeAttribute("x2", "50%");
        w.writeAttribute("y1", "0%");
        w.writeAttribute("y2", "100%");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "0%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "50%");
        w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "100%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
        w.writeEndElement();
        for (Domain cdd : this.cdd2domain.values()) {
            w.writeStartElement("linearGradient");
            w.writeAttribute("id", "grad" + cdd.cdd);
            w.writeAttribute("x1", "50%");
            w.writeAttribute("x2", "50%");
            w.writeAttribute("y1", "0%");
            w.writeAttribute("y2", "100%");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "0%");
            w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "50%");
            w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "100%");
            w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
            w.writeEndElement();
        }
        // defs
        w.writeEndElement();
        w.writeStartElement("style");
        w.writeCharacters("svg {fill:none; stroke:black;}\n" + ".protein { stroke:red;}\n" + ".tree { stroke:black;fill:none;stroke-width:2}\n" + ".organism { stroke:black;fill:none;stroke-width:2}\n" + ".acn { stroke:blue;fill:none;stroke-width:2}\n" + ".protein {fill:url(#grad01);stroke:black;}\n");
        for (Domain cdd : this.cdd2domain.values()) {
            w.writeCharacters(".cdd" + cdd.cdd + " {fill:url(#grad" + cdd.cdd + ");stroke:orange;stroke-width:3;fill-opacity:0.8;}\n");
        }
        // style
        w.writeEndElement();
        w.writeStartElement("g");
        this.root.paint(w);
        // g
        w.writeEndElement();
        // svg
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        w.close();
        ps.close();
        ps = null;
        LOG.info("Done");
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : GBSet(gov.nih.nlm.ncbi.gb.GBSet) InputSource(org.xml.sax.InputSource) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) GBSeq(gov.nih.nlm.ncbi.gb.GBSeq) JAXBContext(javax.xml.bind.JAXBContext) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Unmarshaller(javax.xml.bind.Unmarshaller) GBInterval(gov.nih.nlm.ncbi.gb.GBInterval) XMLReader(org.xml.sax.XMLReader) PrintStream(java.io.PrintStream) GBQualifier(gov.nih.nlm.ncbi.gb.GBQualifier) LinkedList(java.util.LinkedList) XMLStreamException(javax.xml.stream.XMLStreamException) SAXSource(javax.xml.transform.sax.SAXSource) GBFeature(gov.nih.nlm.ncbi.gb.GBFeature) SAXParserFactory(javax.xml.parsers.SAXParserFactory)

Aggregations

GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)1 GBInterval (gov.nih.nlm.ncbi.gb.GBInterval)1 GBQualifier (gov.nih.nlm.ncbi.gb.GBQualifier)1 GBSeq (gov.nih.nlm.ncbi.gb.GBSeq)1 GBSet (gov.nih.nlm.ncbi.gb.GBSet)1 PrintStream (java.io.PrintStream)1 LinkedList (java.util.LinkedList)1 JAXBContext (javax.xml.bind.JAXBContext)1 Unmarshaller (javax.xml.bind.Unmarshaller)1 SAXParserFactory (javax.xml.parsers.SAXParserFactory)1 XMLOutputFactory (javax.xml.stream.XMLOutputFactory)1 XMLStreamException (javax.xml.stream.XMLStreamException)1 XMLStreamWriter (javax.xml.stream.XMLStreamWriter)1 SAXSource (javax.xml.transform.sax.SAXSource)1 InputSource (org.xml.sax.InputSource)1 XMLReader (org.xml.sax.XMLReader)1