use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class XsltStream method doWork.
@Override
public int doWork(final List<String> args) {
if (this.targetQnames.isEmpty()) {
LOG.error("No target name defined");
return -1;
}
XMLEventReader xmlReader = null;
OutputStream outputStream = null;
try {
final String inputSource = oneFileOrNull(args);
final TransformerFactory transformerFactory = TransformerFactory.newInstance();
final Templates template = transformerFactory.newTemplates(new StreamSource(this.templateFile));
final Transformer transformer = template.newTransformer();
outputStream = openFileOrStdoutAsStream(this.outputFile);
final XMLInputFactory xif = XMLInputFactory.newFactory();
xmlReader = (inputSource == null ? xif.createXMLEventReader(stdin()) : xif.createXMLEventReader(new StreamSource(new File(inputSource))));
final Document dom = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
while (xmlReader.hasNext()) {
final XMLEvent evt = xmlReader.nextEvent();
if (!evt.isStartElement()) {
continue;
}
final StartElement SE = evt.asStartElement();
if (!this.targetQnames.contains(SE.getName())) {
continue;
}
dom.appendChild(parseDocument(dom, SE, xmlReader));
transformer.transform(new DOMSource(dom), new StreamResult(outputStream));
while (dom.hasChildNodes()) dom.removeChild(dom.getFirstChild());
}
outputStream.flush();
outputStream.close();
outputStream = null;
xmlReader.close();
xmlReader = null;
return 0;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(xmlReader);
CloserUtil.close(outputStream);
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class PubmedGraph method eSummary.
private void eSummary(final Article a) throws IOException, XMLStreamException {
final QName attName = new QName("Name");
final QName attType = new QName("Type");
final String url = NcbiConstants.esummary() + "?retmode=xml&db=pubmed&" + "id=" + a.pmid + this.ncbiApiKey.getAmpParamValue();
LOG.info(url);
StreamSource src = new StreamSource(url);
XMLEventReader reader = this.xmlInputFactory.createXMLEventReader(src);
int in_title = 0;
int in_date = 0;
while (reader.hasNext() && !(a.title != null && a.year != null)) {
XMLEvent evt = reader.nextEvent();
if (evt.isStartElement()) {
StartElement startE = evt.asStartElement();
String localName = startE.getName().getLocalPart();
if (localName.equals("Item")) {
Attribute name = startE.getAttributeByName(attName);
Attribute type = startE.getAttributeByName(attType);
if (name.getValue().equals("Title") && type.getValue().equals("String")) {
in_title = 1;
} else if (name.getValue().equals("PubDate") && type.getValue().equals("Date")) {
in_date = 1;
}
}
} else if (evt.isCharacters()) {
if (in_title == 1) {
a.title = evt.asCharacters().getData();
} else if (in_date == 1) {
a.year = evt.asCharacters().getData();
}
in_title = 0;
in_date = 0;
}
}
reader.close();
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class PubmedMap method doWork.
@Override
public int doWork(final List<String> args) {
final String inputName = oneFileOrNull(args);
OutputStream out = null;
XMLEventReader r = null;
InputStream in = null;
XMLEventWriter w = null;
try {
final QName attDomainSuffix = new QName("domain");
final QName attPlaceSuffix = new QName("place");
final XMLEventFactory xmlEventFactory = XMLEventFactory.newFactory();
final XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setXMLResolver(new XMLResolver() {
@Override
public Object resolveEntity(String publicID, String systemID, String baseURI, String namespace) throws XMLStreamException {
LOG.debug("Ignoring resolve Entity");
return new ByteArrayInputStream(new byte[0]);
}
});
in = (inputName == null ? stdin() : IOUtils.openURIForReading(inputName));
r = xmlInputFactory.createXMLEventReader(in);
out = super.openFileOrStdoutAsStream(this.outFile);
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
w = xof.createXMLEventWriter(out, "UTF-8");
while (r.hasNext()) {
final XMLEvent evt = r.nextEvent();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Affiliation") && r.peek().isCharacters()) {
final List<Attribute> attributes = new ArrayList<>();
Iterator<?> t = evt.asStartElement().getAttributes();
while (t.hasNext()) {
final Attribute att = (Attribute) t.next();
if (att.getName().equals(attDomainSuffix))
continue;
if (att.getName().equals(attPlaceSuffix))
continue;
attributes.add(att);
}
final XMLEvent textEvt = r.nextEvent();
final String affiliation = textEvt.asCharacters().getData();
final Country country = decodeAffiliation(affiliation);
if (country != null) {
String suffix = country.suffix;
if (suffix.equals("gov"))
suffix = "us";
attributes.add(xmlEventFactory.createAttribute(attDomainSuffix, suffix));
attributes.add(xmlEventFactory.createAttribute(attPlaceSuffix, country.name));
}
w.add(xmlEventFactory.createStartElement(evt.asStartElement().getName(), attributes.iterator(), evt.asStartElement().getNamespaces()));
w.add(textEvt);
continue;
}
w.add(evt);
}
r.close();
r = null;
in.close();
in = null;
w.flush();
w.close();
w = null;
out.flush();
out.close();
out = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(in);
CloserUtil.close(w);
CloserUtil.close(out);
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class PubmedOrcidGraph method scanArticle.
private void scanArticle(final String rootName, final XMLEventReader r, int depth) throws XMLStreamException, IOException {
final Article article = new Article();
boolean PubDate = false;
final List<Author> authors = new ArrayList<>();
while (r.hasNext()) {
final XMLEvent evt = r.nextEvent();
if (evt.isStartElement()) {
final StartElement start = evt.asStartElement();
final String eltName = start.getName().getLocalPart();
if (article.pmid == null && eltName.equals("PMID")) {
article.pmid = r.getElementText();
} else if (article.ISOAbbreviation == null && eltName.equals("ISOAbbreviation")) {
article.ISOAbbreviation = r.getElementText();
} else if (article.ArticleTitle == null && eltName.equals("ArticleTitle")) {
article.ArticleTitle = this.textContent(r);
} else if (eltName.equals("PubDate")) {
PubDate = true;
} else if (article.doi == null && eltName.equals("ArticleId")) {
final Attribute idType = start.getAttributeByName(new QName("IdType"));
if (idType != null && idType.getValue().equalsIgnoreCase("doi")) {
article.doi = r.getElementText().trim();
}
} else if (article.Year == null && PubDate && eltName.equals("Year")) {
article.Year = r.getElementText();
} else if (eltName.equals("Author")) {
final Author author = parseAuthor(r, article.pmid, depth + 1);
if (author != null && author.orcid != null) {
authors.add(author);
}
}
} else if (evt.isEndElement()) {
final EndElement end = evt.asEndElement();
final String eltName = end.getName().getLocalPart();
if (eltName.equals("PubDate")) {
PubDate = false;
}
if (eltName.equals(rootName))
break;
}
}
if (authors.isEmpty()) {
// do nothing
return;
}
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
Collections.sort(authors);
for (int x = 0; x + 1 < authors.size(); ++x) {
for (int y = x + 1; y < authors.size(); ++y) {
Link L = null;
final String orcid1 = authors.get(x).orcid;
final String orcid2 = authors.get(y).orcid;
if (this.all_links_between_authors) {
LongBinding.longToEntry(++ID_GENERATOR, key);
} else {
StringBinding.stringToEntry(orcid1 + "~" + orcid2, key);
if (this.linkDatabase.get(txn, key, data, LockMode.DEFAULT) != OperationStatus.NOTFOUND) {
L = this.linkBinding.entryToObject(data);
}
}
if (L == null) {
L = new Link();
L.orcid1 = orcid1;
L.orcid2 = orcid2;
}
L.pmids.add(article.pmid);
this.linkBinding.objectToEntry(L, data);
if (this.linkDatabase.put(txn, key, data) != OperationStatus.SUCCESS) {
throw new JvarkitException.BerkeleyDbError("Cannot put in article db");
}
}
}
// for comparing names
final Collator collator = Collator.getInstance(Locale.US);
collator.setStrength(Collator.PRIMARY);
StringBinding.stringToEntry(article.pmid, key);
if (this.articleDatabase.get(txn, key, data, LockMode.DEFAULT) != OperationStatus.NOTFOUND) {
LOG.debug("Article already in database : " + article.pmid);
} else {
LOG.debug("inserting article " + article.pmid);
this.articleBinding.objectToEntry(article, data);
if (this.articleDatabase.put(txn, key, data) != OperationStatus.SUCCESS) {
throw new JvarkitException.BerkeleyDbError("Cannot put in article db");
}
for (final Author au : authors) {
StringBinding.stringToEntry(au.orcid, key);
if (this.authorDatabase.get(txn, key, data, LockMode.DEFAULT) != OperationStatus.NOTFOUND) {
LOG.debug("Author already in database : " + au.orcid);
final Author other = this.authorBinding.entryToObject(data);
if (!StringUtil.isBlank(other.lastName) && !StringUtil.isBlank(au.lastName) && collator.compare(au.lastName, other.lastName) != 0) {
this.errPrintWriter.println("Conflict\t" + au.orcid + "\t" + au.foreName + "\t" + other.foreName);
}
} else {
this.authorBinding.objectToEntry(au, data);
if (this.authorDatabase.put(txn, key, data) != OperationStatus.SUCCESS) {
throw new JvarkitException.BerkeleyDbError("Cannot put in author db");
}
}
}
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class PubmedOrcidGraph method scanOrcid.
private void scanOrcid(final String orcid, int depth) throws IOException {
if (orcid == null || orcid.trim().isEmpty()) {
LOG.debug("empty orcid");
return;
}
if (depth >= this.maxdepth)
return;
// author was already scanned
Author au = getAuthorByOrcid(orcid);
if (au != null) {
if (au.depth > depth) {
au.depth = depth;
insertAuthor(au);
}
if (au.reviewed)
return;
}
String WebEnv = null;
String QueryKey = null;
InputStream in = null;
String urlstr = new StringBuilder(NcbiConstants.esearch()).append("?db=pubmed&usehistory=1&retmax=100000&term=").append(URLEncoder.encode("\"" + orcid + "\"[auid]", "UTF-8")).append(this.ncbiApiKey.getAmpParamValue()).toString();
;
XMLEventReader r = null;
/* first get NCBI WebEnv */
try {
LOG.debug(urlstr);
in = new URL(urlstr).openStream();
r = createXMLInputFactory().createXMLEventReader(in);
while (r.hasNext()) {
final XMLEvent evt = r.nextEvent();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("WebEnv")) {
WebEnv = r.getElementText();
} else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("QueryKey")) {
QueryKey = r.getElementText();
}
if (QueryKey != null && WebEnv != null)
break;
}
} catch (final Exception err) {
LOG.error(err);
} finally {
CloserUtil.close(r);
CloserUtil.close(in);
r = null;
in = null;
}
if (QueryKey == null || WebEnv == null) {
LOG.debug("Cannot get QueryKey/WebEnv " + urlstr);
} else {
urlstr = new StringBuilder(NcbiConstants.efetch()).append("?db=pubmed&usehistory=1&retmode=xml").append(this.ncbiApiKey.getAmpParamValue()).append("&query_key=").append(QueryKey).append("&webenv=").append(WebEnv).toString();
;
LOG.debug(urlstr);
try {
in = new URL(urlstr).openStream();
scanArticles(in, depth);
} catch (Exception err) {
LOG.error(err);
} finally {
CloserUtil.close(in);
r = null;
in = null;
}
}
/* we check we found an author with this orcid */
au = getAuthorByOrcid(orcid);
if (au == null) {
au = new Author();
au.orcid = orcid;
au.reviewed = true;
au.depth = depth;
au.foreName = NAME_NOT_FOUND;
au.lastName = NAME_NOT_FOUND;
}
au.reviewed = true;
if (au.depth > depth)
au.depth = depth;
insertAuthor(au);
}
Aggregations