use of gov.nih.nlm.ncbi.gb.GBSet in project jvarkit by lindenb.
the class Biostar95652 method doWork.
@Override
public int doWork(final List<String> args) {
try {
if (args.isEmpty()) {
LOG.error("protein ID missing");
return -1;
}
if (!this.ncbiApiKey.isApiKeyDefined()) {
LOG.error("NCBI API key is not defined");
return -1;
}
JAXBContext context = JAXBContext.newInstance("gov.nih.nlm.ncbi.gb");
final Unmarshaller unmarshaller = context.createUnmarshaller();
// https://stackoverflow.com/questions/31293624
try {
unmarshaller.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "all");
} catch (final Throwable err) {
LOG.warn("Ignoring :" + err.getMessage());
}
for (final String arg : args) {
String uri = NcbiConstants.efetch() + "?db=protein&rettype=gb&retmode=xml&id=" + URLEncoder.encode(arg, "UTF-8") + this.ncbiApiKey.getAmpParamValue();
LOG.info("Reading from " + uri);
// https://stackoverflow.com/questions/24460892/
SAXParserFactory spf = SAXParserFactory.newInstance();
// Not required for JAXB/XInclude
spf.setValidating(false);
final XMLReader xr = spf.newSAXParser().getXMLReader();
final SAXSource source = new SAXSource(xr, new InputSource(uri));
GBSet gbset = (GBSet) unmarshaller.unmarshal(source);
if (gbset.getGBSeq().isEmpty()) {
LOG.info("Nothing in " + uri);
continue;
}
GBSeq gbseq = gbset.getGBSeq().get(0);
Protein protein = new Protein();
protein.length = Integer.parseInt(gbseq.getGBSeqLength());
protein.locus = gbseq.getGBSeqLocus();
protein.definition = gbseq.getGBSeqDefinition();
for (GBFeature feat : gbseq.getGBSeqFeatureTable().getGBFeature()) {
if (feat.getGBFeatureIntervals().getGBInterval().isEmpty())
continue;
String cdd = null;
String region_name = null;
for (GBQualifier qual : feat.getGBFeatureQuals().getGBQualifier()) {
if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("CDD:")) {
cdd = qual.getGBQualifierValue().substring(4);
} else if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("taxon:")) {
protein.taxon_id = qual.getGBQualifierValue().substring(6);
} else if (qual.getGBQualifierName().equals("region_name")) {
region_name = qual.getGBQualifierValue();
}
}
if (cdd == null || region_name == null) {
continue;
}
Domain domain = cdd2domain.get(cdd);
if (domain == null) {
domain = new Domain();
domain.cdd = cdd;
domain.region_name = region_name;
domain.color = COLORS[cdd2domain.size() % COLORS.length];
cdd2domain.put(domain.cdd, domain);
}
for (GBInterval interval : feat.getGBFeatureIntervals().getGBInterval()) {
if (interval.getGBIntervalFrom() == null || interval.getGBIntervalTo() == null)
continue;
DomainRegion region = new DomainRegion();
region.domain = domain;
int start = Integer.parseInt(interval.getGBIntervalFrom());
int end = Integer.parseInt(interval.getGBIntervalTo());
if (start < end) {
region.start = start;
region.end = end;
region.strand = '+';
} else {
region.start = end;
region.end = start;
region.strand = '-';
}
protein.domains.add(region);
}
LinkedList<String> lineage = new LinkedList<String>(Arrays.asList(gbseq.getGBSeqTaxonomy().split("[;][ ]*")));
lineage.add(gbseq.getGBSeqOrganism());
Collections.sort(protein.domains, new Comparator<DomainRegion>() {
@Override
public int compare(DomainRegion o1, DomainRegion o2) {
return o2.length() - o1.length();
}
});
this.root.insert(lineage, protein);
}
}
root.simplify();
root.compile();
root.x = 0;
root.y = (this.leafList.size() * seqHeight) / 2.0;
root.compileXY(0, this.leafList.size() * seqHeight);
PrintStream ps = super.openFileOrStdoutAsPrintStream(outputFile);
XMLOutputFactory xof = XMLOutputFactory.newFactory();
xof.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.TRUE);
XMLStreamWriter w = xof.createXMLStreamWriter(ps, "UTF-8");
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("svg");
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK);
w.writeAttribute("version", "1.1");
w.writeAttribute("width", String.valueOf(2 + this.treeWidth + this.organismWidth + this.acnWidth + this.seqWidth));
w.writeAttribute("height", String.valueOf(2 + this.leafList.size() * seqHeight));
w.writeComment(this.getProgramCommandLine());
w.writeComment("Version:" + getVersion());
w.writeComment("Author: Pierre lindenbaum Phd");
w.writeStartElement("defs");
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEndElement();
for (Domain cdd : this.cdd2domain.values()) {
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad" + cdd.cdd);
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
w.writeEndElement();
}
// defs
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("svg {fill:none; stroke:black;}\n" + ".protein { stroke:red;}\n" + ".tree { stroke:black;fill:none;stroke-width:2}\n" + ".organism { stroke:black;fill:none;stroke-width:2}\n" + ".acn { stroke:blue;fill:none;stroke-width:2}\n" + ".protein {fill:url(#grad01);stroke:black;}\n");
for (Domain cdd : this.cdd2domain.values()) {
w.writeCharacters(".cdd" + cdd.cdd + " {fill:url(#grad" + cdd.cdd + ");stroke:orange;stroke-width:3;fill-opacity:0.8;}\n");
}
// style
w.writeEndElement();
w.writeStartElement("g");
this.root.paint(w);
// g
w.writeEndElement();
// svg
w.writeEndElement();
w.writeEndDocument();
w.flush();
w.close();
ps.close();
ps = null;
LOG.info("Done");
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
}
}
use of gov.nih.nlm.ncbi.gb.GBSet in project jvarkit by lindenb.
the class BlastMapAnnotations method doWork.
@Override
public int doWork(List<String> args) {
try {
/**
* xml parser
*/
DocumentBuilder docBuilder;
/**
* transforms XML/DOM to GBC entry
*/
Unmarshaller unmarshaller;
// create a DOM parser
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
f.setCoalescing(true);
// f.setNamespaceAware(true); no, why does it break the parsing of uniprot ??
f.setValidating(false);
f.setExpandEntityReferences(true);
docBuilder = f.newDocumentBuilder();
docBuilder.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
return new InputSource(new StringReader(""));
}
});
// create a Unmarshaller for NCBI
JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.gb:gov.nih.nlm.ncbi.blast:org.uniprot");
unmarshaller = jc.createUnmarshaller();
LOG.info("reading entry " + IN);
Document domEntry = docBuilder.parse(IN);
GBSet gbSet = null;
Uniprot uniprotSet = null;
if ("GBSet".equals(domEntry.getDocumentElement().getNodeName())) {
LOG.info("parsing as GBSet");
gbSet = unmarshaller.unmarshal(domEntry, GBSet.class).getValue();
} else if ("uniprot".equals(domEntry.getDocumentElement().getNodeName())) {
LOG.info("parsing as Uniprot " + domEntry.getDocumentElement());
uniprotSet = unmarshaller.unmarshal(domEntry, Uniprot.class).getValue();
// LOG.info(uniprotSet.getEntry().size());
// jc.createMarshaller().marshal(uniprotSet, System.err);
} else {
LOG.info("unknown root element:" + domEntry.getDocumentElement().getNodeName());
return -1;
}
Document blastDom;
if (args.size() == 1) {
LOG.info("reading " + args.get(0));
blastDom = docBuilder.parse(new File(args.get(0)));
} else if (args.isEmpty()) {
LOG.info("reading from stdin");
blastDom = docBuilder.parse(stdin());
} else {
LOG.error("Illegal number of args");
return -1;
}
this.blastOutput = unmarshaller.unmarshal(blastDom, BlastOutput.class).getValue();
if (uniprotSet != null)
printUniprot(uniprotSet);
if (gbSet != null)
printGB(gbSet);
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
}
}
Aggregations