use of gov.nih.nlm.ncbi.gb.GBSeq in project jvarkit by lindenb.
the class Biostar95652 method doWork.
@Override
public int doWork(final List<String> args) {
try {
if (args.isEmpty()) {
LOG.error("protein ID missing");
return -1;
}
if (!this.ncbiApiKey.isApiKeyDefined()) {
LOG.error("NCBI API key is not defined");
return -1;
}
JAXBContext context = JAXBContext.newInstance("gov.nih.nlm.ncbi.gb");
final Unmarshaller unmarshaller = context.createUnmarshaller();
// https://stackoverflow.com/questions/31293624
try {
unmarshaller.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "all");
} catch (final Throwable err) {
LOG.warn("Ignoring :" + err.getMessage());
}
for (final String arg : args) {
String uri = NcbiConstants.efetch() + "?db=protein&rettype=gb&retmode=xml&id=" + URLEncoder.encode(arg, "UTF-8") + this.ncbiApiKey.getAmpParamValue();
LOG.info("Reading from " + uri);
// https://stackoverflow.com/questions/24460892/
SAXParserFactory spf = SAXParserFactory.newInstance();
// Not required for JAXB/XInclude
spf.setValidating(false);
final XMLReader xr = spf.newSAXParser().getXMLReader();
final SAXSource source = new SAXSource(xr, new InputSource(uri));
GBSet gbset = (GBSet) unmarshaller.unmarshal(source);
if (gbset.getGBSeq().isEmpty()) {
LOG.info("Nothing in " + uri);
continue;
}
GBSeq gbseq = gbset.getGBSeq().get(0);
Protein protein = new Protein();
protein.length = Integer.parseInt(gbseq.getGBSeqLength());
protein.locus = gbseq.getGBSeqLocus();
protein.definition = gbseq.getGBSeqDefinition();
for (GBFeature feat : gbseq.getGBSeqFeatureTable().getGBFeature()) {
if (feat.getGBFeatureIntervals().getGBInterval().isEmpty())
continue;
String cdd = null;
String region_name = null;
for (GBQualifier qual : feat.getGBFeatureQuals().getGBQualifier()) {
if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("CDD:")) {
cdd = qual.getGBQualifierValue().substring(4);
} else if (qual.getGBQualifierName().equals("db_xref") && qual.getGBQualifierValue().startsWith("taxon:")) {
protein.taxon_id = qual.getGBQualifierValue().substring(6);
} else if (qual.getGBQualifierName().equals("region_name")) {
region_name = qual.getGBQualifierValue();
}
}
if (cdd == null || region_name == null) {
continue;
}
Domain domain = cdd2domain.get(cdd);
if (domain == null) {
domain = new Domain();
domain.cdd = cdd;
domain.region_name = region_name;
domain.color = COLORS[cdd2domain.size() % COLORS.length];
cdd2domain.put(domain.cdd, domain);
}
for (GBInterval interval : feat.getGBFeatureIntervals().getGBInterval()) {
if (interval.getGBIntervalFrom() == null || interval.getGBIntervalTo() == null)
continue;
DomainRegion region = new DomainRegion();
region.domain = domain;
int start = Integer.parseInt(interval.getGBIntervalFrom());
int end = Integer.parseInt(interval.getGBIntervalTo());
if (start < end) {
region.start = start;
region.end = end;
region.strand = '+';
} else {
region.start = end;
region.end = start;
region.strand = '-';
}
protein.domains.add(region);
}
LinkedList<String> lineage = new LinkedList<String>(Arrays.asList(gbseq.getGBSeqTaxonomy().split("[;][ ]*")));
lineage.add(gbseq.getGBSeqOrganism());
Collections.sort(protein.domains, new Comparator<DomainRegion>() {
@Override
public int compare(DomainRegion o1, DomainRegion o2) {
return o2.length() - o1.length();
}
});
this.root.insert(lineage, protein);
}
}
root.simplify();
root.compile();
root.x = 0;
root.y = (this.leafList.size() * seqHeight) / 2.0;
root.compileXY(0, this.leafList.size() * seqHeight);
PrintStream ps = super.openFileOrStdoutAsPrintStream(outputFile);
XMLOutputFactory xof = XMLOutputFactory.newFactory();
xof.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, Boolean.TRUE);
XMLStreamWriter w = xof.createXMLStreamWriter(ps, "UTF-8");
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("svg");
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK);
w.writeAttribute("version", "1.1");
w.writeAttribute("width", String.valueOf(2 + this.treeWidth + this.organismWidth + this.acnWidth + this.seqWidth));
w.writeAttribute("height", String.valueOf(2 + this.leafList.size() * seqHeight));
w.writeComment(this.getProgramCommandLine());
w.writeComment("Version:" + getVersion());
w.writeComment("Author: Pierre lindenbaum Phd");
w.writeStartElement("defs");
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:black;stop-opacity:1;");
w.writeEndElement();
for (Domain cdd : this.cdd2domain.values()) {
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad" + cdd.cdd);
w.writeAttribute("x1", "50%");
w.writeAttribute("x2", "50%");
w.writeAttribute("y1", "0%");
w.writeAttribute("y2", "100%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:" + cdd.color + ";stop-opacity:1;");
w.writeEndElement();
}
// defs
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters("svg {fill:none; stroke:black;}\n" + ".protein { stroke:red;}\n" + ".tree { stroke:black;fill:none;stroke-width:2}\n" + ".organism { stroke:black;fill:none;stroke-width:2}\n" + ".acn { stroke:blue;fill:none;stroke-width:2}\n" + ".protein {fill:url(#grad01);stroke:black;}\n");
for (Domain cdd : this.cdd2domain.values()) {
w.writeCharacters(".cdd" + cdd.cdd + " {fill:url(#grad" + cdd.cdd + ");stroke:orange;stroke-width:3;fill-opacity:0.8;}\n");
}
// style
w.writeEndElement();
w.writeStartElement("g");
this.root.paint(w);
// g
w.writeEndElement();
// svg
w.writeEndElement();
w.writeEndDocument();
w.flush();
w.close();
ps.close();
ps = null;
LOG.info("Done");
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
}
}
use of gov.nih.nlm.ncbi.gb.GBSeq in project jvarkit by lindenb.
the class BlastMapAnnotations method printGB.
private void printGB(GBSet gbSet) {
for (GBSeq gbSeq : gbSet.getGBSeq()) {
BlastOutputIterations iterations = this.blastOutput.getBlastOutputIterations();
for (Iteration iteration : iterations.getIteration()) {
for (GBFeature feature : gbSeq.getGBSeqFeatureTable().getGBFeature()) {
if (feature.getGBFeatureIntervals() == null)
continue;
if (!acceptfeature(feature.getGBFeatureKey()))
continue;
for (GBInterval interval : feature.getGBFeatureIntervals().getGBInterval()) {
for (Hit hit : iteration.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
GenbankInterval bi = new GenbankInterval();
bi.gbSeq = gbSeq;
bi.gbFeature = feature;
bi.gbInterval = interval;
bi.hit = hit;
bi.hsp = hsp;
LOG.debug("interval " + bi);
if (!bi.isGbForward())
LOG.info("CHECK INTERVAL REVERSE");
if (!bi.isFeatureOverlapHsp())
continue;
stdout().println(bi.toBedString());
}
}
}
}
break;
}
}
// System.err.println("OK");
}
Aggregations