Search in sources :

Example 1 with GBInterval

use of gov.nih.nlm.ncbi.gb.GBInterval in project jvarkit by lindenb.

the class Biostar95652 method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        if (args.isEmpty()) {
            LOG.error("protein ID missing");
            return -1;
        }
        if (!this.ncbiApiKey.isApiKeyDefined()) {
            LOG.error("NCBI API key is not defined");
            return -1;
        }
        JAXBContext context = JAXBContext.newInstance("gov.nih.nlm.ncbi.gb");
        final Unmarshaller unmarshaller = context.createUnmarshaller();
        // https://stackoverflow.com/questions/31293624
        try {
            unmarshaller.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "all");
        } catch (final Throwable err) {
            LOG.warn("Ignoring :" + err.getMessage());
        }
        for (final String arg : args) {
            String uri = NcbiConstants.efetch() + "?db=protein&rettype=gb&retmode=xml&id=" + URLEncoder.encode(arg, "UTF-8") + this.ncbiApiKey.getAmpParamValue();
            LOG.info("Reading from " + uri);
            // https://stackoverflow.com/questions/24460892/
            SAXParserFactory spf = SAXParserFactory.newInstance();
            // Not required for JAXB/XInclude
            spf.setValidating(false);
            final XMLReader xr = spf.newSAXParser().getXMLReader();
            final SAXSource source = new SAXSource(xr, new InputSource(uri));
            GBSet gbset = (GBSet) unmarshaller.unmarshal(source);
            if (gbset.getGBSeq().isEmpty()) {
                LOG.info("Nothing in " + uri);
                continue;
            }
            GBSeq gbseq = gbset.getGBSeq().get(0);
            Protein protein = new Protein();
            protein.length = Integer.parseInt(gbseq.getGBSeqLength());
            protein.locus = gbseq.getGBSeqLocus();
            protein.definition = gbseq.getGBSeqDefinition();
            for (GBFeature feat : gbseq.getGBSeqFeatureTable().getGBFeature()) {
                if (feat.getGBFeatureIntervals().getGBInterval().isEmpty())
                    continue;
                String cdd = null;
                String region_name = null;
                for (GBQualifier qual : feat.getGBFeatureQuals().getGBQualifier()) {
                    if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("CDD:")) {
                        cdd = qual.getGBQualifierValue().substring(4);
                    } else if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("taxon:")) {
                        protein.taxon_id = qual.getGBQualifierValue().substring(6);
                    } else if (qual.getGBQualifierName().equals("region_name")) {
                        region_name = qual.getGBQualifierValue();
                    }
                }
                if (cdd == null || region_name == null) {
                    continue;
                }
                Domain domain = cdd2domain.get(cdd);
                if (domain == null) {
                    domain = new Domain();
                    domain.cdd = cdd;
                    domain.region_name = region_name;
                    domain.color = COLORS[cdd2domain.size() % COLORS.length];
                    cdd2domain.put(domain.cdd, domain);
                }
                for (GBInterval interval : feat.getGBFeatureIntervals().getGBInterval()) {
                    if (interval.getGBIntervalFrom() == null || interval.getGBIntervalTo() == null)
                        continue;
                    DomainRegion region = new DomainRegion();
                    region.domain = domain;
                    int start = Integer.parseInt(interval.getGBIntervalFrom());
                    int end = Integer.parseInt(interval.getGBIntervalTo());
                    if (start < end) {
                        region.start = start;
                        region.end = end;
                        region.strand = '+';
                    } else {
                        region.start = end;
                        region.end = start;
                        region.strand = '-';
                    }
                    protein.domains.add(region);
                }
                LinkedList<String> lineage = new LinkedList<String>(Arrays.asList(gbseq.getGBSeqTaxonomy().split("[;][ ]*")));
                lineage.add(gbseq.getGBSeqOrganism());
                Collections.sort(protein.domains, new Comparator<DomainRegion>() {

                    @Override
                    public int compare(DomainRegion o1, DomainRegion o2) {
                        return o2.length() - o1.length();
                    }
                });
                this.root.insert(lineage, protein);
            }
        }
        root.simplify();
        root.compile();
        root.x = 0;
        root.y = (this.leafList.size() * seqHeight) / 2.0;
        root.compileXY(0, this.leafList.size() * seqHeight);
        PrintStream ps = super.openFileOrStdoutAsPrintStream(outputFile);
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        xof.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.TRUE);
        XMLStreamWriter w = xof.createXMLStreamWriter(ps, "UTF-8");
        w.writeStartDocument("UTF-8", "1.0");
        w.writeStartElement("svg");
        w.writeDefaultNamespace(SVG.NS);
        w.writeNamespace("xlink", XLINK);
        w.writeAttribute("version", "1.1");
        w.writeAttribute("width", String.valueOf(2 + this.treeWidth + this.organismWidth + this.acnWidth + this.seqWidth));
        w.writeAttribute("height", String.valueOf(2 + this.leafList.size() * seqHeight));
        w.writeComment(this.getProgramCommandLine());
        w.writeComment("Version:" + getVersion());
        w.writeComment("Author: Pierre lindenbaum Phd");
        w.writeStartElement("defs");
        w.writeStartElement("linearGradient");
        w.writeAttribute("id", "grad01");
        w.writeAttribute("x1", "50%");
        w.writeAttribute("x2", "50%");
        w.writeAttribute("y1", "0%");
        w.writeAttribute("y2", "100%");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "0%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "50%");
        w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
        w.writeEmptyElement("stop");
        w.writeAttribute("offset", "100%");
        w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
        w.writeEndElement();
        for (Domain cdd : this.cdd2domain.values()) {
            w.writeStartElement("linearGradient");
            w.writeAttribute("id", "grad" + cdd.cdd);
            w.writeAttribute("x1", "50%");
            w.writeAttribute("x2", "50%");
            w.writeAttribute("y1", "0%");
            w.writeAttribute("y2", "100%");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "0%");
            w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "50%");
            w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
            w.writeEmptyElement("stop");
            w.writeAttribute("offset", "100%");
            w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
            w.writeEndElement();
        }
        // defs
        w.writeEndElement();
        w.writeStartElement("style");
        w.writeCharacters("svg {fill:none; stroke:black;}\n" + ".protein { stroke:red;}\n" + ".tree { stroke:black;fill:none;stroke-width:2}\n" + ".organism { stroke:black;fill:none;stroke-width:2}\n" + ".acn { stroke:blue;fill:none;stroke-width:2}\n" + ".protein {fill:url(#grad01);stroke:black;}\n");
        for (Domain cdd : this.cdd2domain.values()) {
            w.writeCharacters(".cdd" + cdd.cdd + " {fill:url(#grad" + cdd.cdd + ");stroke:orange;stroke-width:3;fill-opacity:0.8;}\n");
        }
        // style
        w.writeEndElement();
        w.writeStartElement("g");
        this.root.paint(w);
        // g
        w.writeEndElement();
        // svg
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        w.close();
        ps.close();
        ps = null;
        LOG.info("Done");
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : GBSet(gov.nih.nlm.ncbi.gb.GBSet) InputSource(org.xml.sax.InputSource) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) GBSeq(gov.nih.nlm.ncbi.gb.GBSeq) JAXBContext(javax.xml.bind.JAXBContext) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Unmarshaller(javax.xml.bind.Unmarshaller) GBInterval(gov.nih.nlm.ncbi.gb.GBInterval) XMLReader(org.xml.sax.XMLReader) PrintStream(java.io.PrintStream) GBQualifier(gov.nih.nlm.ncbi.gb.GBQualifier) LinkedList(java.util.LinkedList) XMLStreamException(javax.xml.stream.XMLStreamException) SAXSource(javax.xml.transform.sax.SAXSource) GBFeature(gov.nih.nlm.ncbi.gb.GBFeature) SAXParserFactory(javax.xml.parsers.SAXParserFactory)

Example 2 with GBInterval

use of gov.nih.nlm.ncbi.gb.GBInterval in project jvarkit by lindenb.

the class BlastMapAnnotations method printGB.

private void printGB(GBSet gbSet) {
    for (GBSeq gbSeq : gbSet.getGBSeq()) {
        BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
        for (Iteration iteration : iterations.getIteration()) {
            for (GBFeature feature : gbSeq.getGBSeqFeatureTable().getGBFeature()) {
                if (feature.getGBFeatureIntervals() == null)
                    continue;
                if (!acceptfeature(feature.getGBFeatureKey()))
                    continue;
                for (GBInterval interval : feature.getGBFeatureIntervals().getGBInterval()) {
                    for (Hit hit : iteration.getIterationHits().getHit()) {
                        for (Hsp hsp : hit.getHitHsps().getHsp()) {
                            GenbankInterval bi = new GenbankInterval();
                            bi.gbSeq = gbSeq;
                            bi.gbFeature = feature;
                            bi.gbInterval = interval;
                            bi.hit = hit;
                            bi.hsp = hsp;
                            LOG.debug("interval " + bi);
                            if (!bi.isGbForward())
                                LOG.info("CHECK INTERVAL REVERSE");
                            if (!bi.isFeatureOverlapHsp())
                                continue;
                            stdout().println(bi.toBedString());
                        }
                    }
                }
            }
            break;
        }
    }
// System.err.println("OK");
}
Also used : BlastOutputIterations(gov.nih.nlm.ncbi.blast.BlastOutputIterations) Hit(gov.nih.nlm.ncbi.blast.Hit) GBSeq(gov.nih.nlm.ncbi.gb.GBSeq) Hsp(gov.nih.nlm.ncbi.blast.Hsp) GBFeature(gov.nih.nlm.ncbi.gb.GBFeature) Iteration(gov.nih.nlm.ncbi.blast.Iteration) GBInterval(gov.nih.nlm.ncbi.gb.GBInterval)

Aggregations

GBFeature (gov.nih.nlm.ncbi.gb.GBFeature)2 GBInterval (gov.nih.nlm.ncbi.gb.GBInterval)2 GBSeq (gov.nih.nlm.ncbi.gb.GBSeq)2 BlastOutputIterations (gov.nih.nlm.ncbi.blast.BlastOutputIterations)1 Hit (gov.nih.nlm.ncbi.blast.Hit)1 Hsp (gov.nih.nlm.ncbi.blast.Hsp)1 Iteration (gov.nih.nlm.ncbi.blast.Iteration)1 GBQualifier (gov.nih.nlm.ncbi.gb.GBQualifier)1 GBSet (gov.nih.nlm.ncbi.gb.GBSet)1 PrintStream (java.io.PrintStream)1 LinkedList (java.util.LinkedList)1 JAXBContext (javax.xml.bind.JAXBContext)1 Unmarshaller (javax.xml.bind.Unmarshaller)1 SAXParserFactory (javax.xml.parsers.SAXParserFactory)1 XMLOutputFactory (javax.xml.stream.XMLOutputFactory)1 XMLStreamException (javax.xml.stream.XMLStreamException)1 XMLStreamWriter (javax.xml.stream.XMLStreamWriter)1 SAXSource (javax.xml.transform.sax.SAXSource)1 InputSource (org.xml.sax.InputSource)1 XMLReader (org.xml.sax.XMLReader)1