Search in sources :

Example 1 with GBSet

use of gov.nih.nlm.ncbi.gb.GBSet in project jvarkit by lindenb.

the class Biostar95652 method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        if (args.isEmpty()) {
            LOG.error("protein ID missing");
            return -1;
        }
        if (!this.ncbiApiKey.isApiKeyDefined()) {
            LOG.error("NCBI API key is not defined");
            return -1;
        }
        JAXBContext context = JAXBContext.newInstance("gov.nih.nlm.ncbi.gb");
        final Unmarshaller unmarshaller = context.createUnmarshaller();
        // https://stackoverflow.com/questions/31293624
        try {
            unmarshaller.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "all");
        } catch (final Throwable err) {
            LOG.warn("Ignoring :" + err.getMessage());
        }
        for (final String arg : args) {
            String uri = NcbiConstants.efetch() + "?db=protein&rettype=gb&retmode=xml&id=" + URLEncoder.encode(arg, "UTF-8") + this.ncbiApiKey.getAmpParamValue();
            LOG.info("Reading from " + uri);
            // https://stackoverflow.com/questions/24460892/
            SAXParserFactory spf = SAXParserFactory.newInstance();
            // Not required for JAXB/XInclude
            spf.setValidating(false);
            final XMLReader xr = spf.newSAXParser().getXMLReader();
            final SAXSource source = new SAXSource(xr, new InputSource(uri));
            GBSet gbset = (GBSet) unmarshaller.unmarshal(source);
            if (gbset.getGBSeq().isEmpty()) {
                LOG.info("Nothing in " + uri);
                continue;
            }
            GBSeq gbseq = gbset.getGBSeq().get(0);
            Protein protein = new Protein();
            protein.length = Integer.parseInt(gbseq.getGBSeqLength());
            protein.locus = gbseq.getGBSeqLocus();
            protein.definition = gbseq.getGBSeqDefinition();
            for (GBFeature feat : gbseq.getGBSeqFeatureTable().getGBFeature()) {
                if (feat.getGBFeatureIntervals().getGBInterval().isEmpty())
                    continue;
                String cdd = null;
                String region_name = null;
                for (GBQualifier qual : feat.getGBFeatureQuals().getGBQualifier()) {
                    if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("CDD:")) {
                        cdd = qual.getGBQualifierValue().substring(4);
                    } else if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("taxon:")) {
                        protein.taxon_id = qual.getGBQualifierValue().substring(6);
                    } else if (qual.getGBQualifierName().equals("region_name")) {
                        region_name = qual.getGBQualifierValue();
                    }
                }
                if (cdd == null || region_name == null) {
                    continue;
                }
                Domain domain = cdd2domain.get(cdd);
                if (domain == null) {
                    domain = new Domain();
                    domain.cdd = cdd;
                    domain.region_name = region_name;
                    domain.color = COLORS[cdd2domain.size() % COLORS.length];
                    cdd2domain.put(domain.cdd, domain);
                }
                for (GBInterval interval : feat.getGBFeatureIntervals().getGBInterval()) {
                    if (interval.getGBIntervalFrom() == null || interval.getGBIntervalTo() == null)
                        continue;
                    DomainRegion region = new DomainRegion();
                    region.domain = domain;
                    int start = Integer.parseInt(interval.getGBIntervalFrom());
                    int end = Integer.parseInt(interval.getGBIntervalTo());
                    if (start < end) {
                        region.start = start;
                        region.end = end;
                        region.strand = '+';
                    } else {
                        region.start = end;
                        region.end = start;
                        region.strand = '-';
                    }
                    protein.domains.add(region);
                }
                LinkedList<String> lineage = new LinkedList<String>(Arrays.asList(gbseq.getGBSeqTaxonomy().split("[;][ ]*")));
                lineage.add(gbseq.getGBSeqOrganism());
                Collections.sort(protein.domains, new Comparator<DomainRegion>() {

                    @Override
                    public int compare(DomainRegion o1, DomainRegion o2) {
                        return o2.length() - o1.length();
                    }
                });
                this.root.insert(lineage, protein);
            }
        }
        root.simplify();
        root.compile();
        root.x = 0;
        root.y = (this.leafList.size() * seqHeight) / 2.0;
        root.compileXY(0, this.leafList.size() * seqHeight);
        PrintStream ps = super.openFileOrStdoutAsPrintStream(outputFile);
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        xof.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.TRUE);
        XMLStreamWriter w = xof.createXMLStreamWriter(ps, "UTF-8");
        w.writeStartDocument("UTF-8", "1.0");
        w.writeStartElement("svg");
        w.writeDefaultNamespace(SVG.NS);
        w.writeNamespace("xlink", XLINK);
        w.writeAttribute("version", "1.1");
        w.writeAttribute("width", String.valueOf(2 + this.treeWidth + this.organismWidth + this.acnWidth + this.seqWidth));
        w.writeAttribute("height", String.valueOf(2 + this.leafList.size() * seqHeight));
        w.writeComment(this.getProgramCommandLine());
        w.writeComment("Version:" + getVersion());
        w.writeComment("Author: Pierre lindenbaum Phd");
        w.writeStartElement("defs");
        w.writeStartElement("linearGradient");
        w.writeAttribute("id", "grad01");
        w.writeAttribute("x1", "50%");
        w.writeAttribute("x2", "50%");
        w.writeAttribute("y1", "0%");
        w.writeAttribute("y2", "100%");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "0%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "50%");
        w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "100%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
        w.writeEndElement();
        for (Domain cdd : this.cdd2domain.values()) {
            w.writeStartElement("linearGradient");
            w.writeAttribute("id", "grad" + cdd.cdd);
            w.writeAttribute("x1", "50%");
            w.writeAttribute("x2", "50%");
            w.writeAttribute("y1", "0%");
            w.writeAttribute("y2", "100%");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "0%");
            w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "50%");
            w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "100%");
            w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
            w.writeEndElement();
        }
        // defs
        w.writeEndElement();
        w.writeStartElement("style");
        w.writeCharacters("svg {fill:none; stroke:black;}\n" + ".protein { stroke:red;}\n" + ".tree { stroke:black;fill:none;stroke-width:2}\n" + ".organism { stroke:black;fill:none;stroke-width:2}\n" + ".acn { stroke:blue;fill:none;stroke-width:2}\n" + ".protein {fill:url(#grad01);stroke:black;}\n");
        for (Domain cdd : this.cdd2domain.values()) {
            w.writeCharacters(".cdd" + cdd.cdd + " {fill:url(#grad" + cdd.cdd + ");stroke:orange;stroke-width:3;fill-opacity:0.8;}\n");
        }
        // style
        w.writeEndElement();
        w.writeStartElement("g");
        this.root.paint(w);
        // g
        w.writeEndElement();
        // svg
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        w.close();
        ps.close();
        ps = null;
        LOG.info("Done");
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : GBSet(gov.nih.nlm.ncbi.gb.GBSet) InputSource(org.xml.sax.InputSource) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) GBSeq(gov.nih.nlm.ncbi.gb.GBSeq) JAXBContext(javax.xml.bind.JAXBContext) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Unmarshaller(javax.xml.bind.Unmarshaller) GBInterval(gov.nih.nlm.ncbi.gb.GBInterval) XMLReader(org.xml.sax.XMLReader) PrintStream(java.io.PrintStream) GBQualifier(gov.nih.nlm.ncbi.gb.GBQualifier) LinkedList(java.util.LinkedList) XMLStreamException(javax.xml.stream.XMLStreamException) SAXSource(javax.xml.transform.sax.SAXSource) GBFeature(gov.nih.nlm.ncbi.gb.GBFeature) SAXParserFactory(javax.xml.parsers.SAXParserFactory)

Example 2 with GBSet

use of gov.nih.nlm.ncbi.gb.GBSet in project jvarkit by lindenb.

the class BlastMapAnnotations method doWork.

@Override
public int doWork(List<String> args) {
    try {
        /**
         * xml parser
         */
        DocumentBuilder docBuilder;
        /**
         * transforms XML/DOM to GBC entry
         */
        Unmarshaller unmarshaller;
        // create a DOM parser
        DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
        f.setCoalescing(true);
        // f.setNamespaceAware(true); no, why does it break the parsing of uniprot ??
        f.setValidating(false);
        f.setExpandEntityReferences(true);
        docBuilder = f.newDocumentBuilder();
        docBuilder.setEntityResolver(new EntityResolver() {

            @Override
            public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
                return new InputSource(new StringReader(""));
            }
        });
        // create a Unmarshaller for NCBI
        JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.gb:gov.nih.nlm.ncbi.blast:org.uniprot");
        unmarshaller = jc.createUnmarshaller();
        LOG.info("reading entry " + IN);
        Document domEntry = docBuilder.parse(IN);
        GBSet gbSet = null;
        Uniprot uniprotSet = null;
        if ("GBSet".equals(domEntry.getDocumentElement().getNodeName())) {
            LOG.info("parsing as GBSet");
            gbSet = unmarshaller.unmarshal(domEntry, GBSet.class).getValue();
        } else if ("uniprot".equals(domEntry.getDocumentElement().getNodeName())) {
            LOG.info("parsing as Uniprot " + domEntry.getDocumentElement());
            uniprotSet = unmarshaller.unmarshal(domEntry, Uniprot.class).getValue();
        // LOG.info(uniprotSet.getEntry().size());
        // jc.createMarshaller().marshal(uniprotSet, System.err);
        } else {
            LOG.info("unknown root element:" + domEntry.getDocumentElement().getNodeName());
            return -1;
        }
        Document blastDom;
        if (args.size() == 1) {
            LOG.info("reading " + args.get(0));
            blastDom = docBuilder.parse(new File(args.get(0)));
        } else if (args.isEmpty()) {
            LOG.info("reading from stdin");
            blastDom = docBuilder.parse(stdin());
        } else {
            LOG.error("Illegal number of args");
            return -1;
        }
        this.blastOutput = unmarshaller.unmarshal(blastDom, BlastOutput.class).getValue();
        if (uniprotSet != null)
            printUniprot(uniprotSet);
        if (gbSet != null)
            printGB(gbSet);
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : GBSet(gov.nih.nlm.ncbi.gb.GBSet) InputSource(org.xml.sax.InputSource) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) JAXBContext(javax.xml.bind.JAXBContext) EntityResolver(org.xml.sax.EntityResolver) IOException(java.io.IOException) Document(org.w3c.dom.Document) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException) SAXException(org.xml.sax.SAXException) Uniprot(org.uniprot.Uniprot) DocumentBuilder(javax.xml.parsers.DocumentBuilder) StringReader(java.io.StringReader) Unmarshaller(javax.xml.bind.Unmarshaller) File(java.io.File)

Aggregations

GBSet (gov.nih.nlm.ncbi.gb.GBSet)2 JAXBContext (javax.xml.bind.JAXBContext)2 Unmarshaller (javax.xml.bind.Unmarshaller)2 InputSource (org.xml.sax.InputSource)2 GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)1 GBInterval (gov.nih.nlm.ncbi.gb.GBInterval)1 GBQualifier (gov.nih.nlm.ncbi.gb.GBQualifier)1 GBSeq (gov.nih.nlm.ncbi.gb.GBSeq)1 File (java.io.File)1 IOException (java.io.IOException)1 PrintStream (java.io.PrintStream)1 StringReader (java.io.StringReader)1 LinkedList (java.util.LinkedList)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 SAXParserFactory (javax.xml.parsers.SAXParserFactory)1 XMLOutputFactory (javax.xml.stream.XMLOutputFactory)1 XMLStreamException (javax.xml.stream.XMLStreamException)1 XMLStreamWriter (javax.xml.stream.XMLStreamWriter)1 SAXSource (javax.xml.transform.sax.SAXSource)1