use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class TreePackApp method parseNode.
private Node parseNode(XMLEventReader r, StartElement E, Node parent) throws XMLStreamException {
Node node = new Node(parent);
Attribute att = E.getAttributeByName(new QName("label"));
node.label = (att != null ? att.getValue() : "");
while (r.hasNext()) {
XMLEvent evt = r.nextEvent();
if (evt.isStartElement()) {
QName qName = evt.asStartElement().getName();
if (qName.getLocalPart().equals("node")) {
Node child = parseNode(r, evt.asStartElement(), node);
if (node.children == null)
node.children = new HashMap<String, Node>();
node.children.put(node.label, child);
} else {
skip(r);
}
} else if (evt.isEndElement()) {
if (node.children == null || node.children.isEmpty()) {
att = E.getAttributeByName(new QName("weight"));
if (att == null) {
node.weight = 1.0;
} else {
node.weight = Double.parseDouble(att.getValue());
if (node.weight <= 0)
throw new XMLStreamException("bad @weight:" + node.weight, E.getLocation());
}
}
return node;
}
}
throw new IllegalStateException();
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class PubmedCodingLanguages method scanArticle.
private void scanArticle(PrintStream out, final String rootName, final XMLEventReader r) throws XMLStreamException, IOException {
String article_pmid = null;
String article_title = "";
String article_year = null;
String abstractText = "";
boolean PubDate = false;
while (r.hasNext()) {
final XMLEvent evt = r.nextEvent();
if (evt.isStartElement()) {
final StartElement start = evt.asStartElement();
final String eltName = start.getName().getLocalPart();
if (article_pmid == null && eltName.equals("PMID")) {
article_pmid = r.getElementText();
} else if (article_title.isEmpty() && eltName.equals("ArticleTitle")) {
article_title = textContent(r);
} else if (eltName.equals("PubDate")) {
PubDate = true;
} else if (article_year == null && PubDate && eltName.equals("Year")) {
article_year = r.getElementText();
} else if (eltName.equals("Abstract")) {
abstractText = textContent(r);
}
} else if (evt.isEndElement()) {
final EndElement end = evt.asEndElement();
final String eltName = end.getName().getLocalPart();
if (eltName.equals("PubDate")) {
PubDate = false;
} else if (eltName.equals(rootName)) {
break;
}
}
}
// end of xml read
final String normalizedAbstract = (article_title + " " + abstractText).replace('\n', ' ').toLowerCase();
boolean found_something = false;
for (final ProgLanguage pg : this.languages) {
if (this.common_only_false && !pg.isCommon())
continue;
final String context = pg.match(normalizedAbstract);
if (context == null)
continue;
out.print(article_pmid);
out.print('\t');
out.print(pg.getName());
out.print('\t');
out.print(article_title);
out.print('\t');
out.print(article_year);
out.print('\t');
out.print(context);
out.println();
found_something = true;
}
if (!found_something && (normalizedAbstract.contains("programming language") || normalizedAbstract.contains("written in"))) {
LOG.warning("Cannot get information in:\t" + normalizedAbstract);
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class PubmedFilterJS method doWork.
@Override
public int doWork(List<String> args) {
final String inputName = oneFileOrNull(args);
CompiledScript compiledScript = null;
Unmarshaller unmarshaller;
Marshaller marshaller;
try {
compiledScript = this.compileJavascript(this.javascriptExpr, this.javascriptFile);
JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.pubmed");
unmarshaller = jc.createUnmarshaller();
marshaller = jc.createMarshaller();
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlInputFactory.setXMLResolver(new XMLResolver() {
@Override
public Object resolveEntity(String publicID, String systemID, String baseURI, String namespace) throws XMLStreamException {
LOG.info("ignoring " + publicID + " " + baseURI + " " + namespace);
return new ByteArrayInputStream(new byte[0]);
}
});
marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
PrintWriter pw = openFileOrStdoutAsPrintWriter(this.outFile);
XMLOutputFactory xof = XMLOutputFactory.newFactory();
XMLEventWriter w = xof.createXMLEventWriter(pw);
StreamSource src = null;
if (inputName == null) {
LOG.info("Reading stdin");
src = new StreamSource(System.in);
} else {
LOG.info("Reading file");
src = new StreamSource(new File(inputName));
}
XMLEventReader r = xmlInputFactory.createXMLEventReader(src);
XMLEventFactory eventFactory = XMLEventFactory.newFactory();
SimpleBindings bindings = new SimpleBindings();
long nArticles = 0L;
while (r.hasNext()) {
XMLEvent evt = r.peek();
switch(evt.getEventType()) {
case XMLEvent.START_ELEMENT:
{
String localName = evt.asStartElement().getName().getLocalPart();
Object article = null;
JAXBElement<?> jaxbElement = null;
if (localName.equals("PubmedArticle")) {
jaxbElement = unmarshaller.unmarshal(r, PubmedArticle.class);
article = jaxbElement.getValue();
} else /* no more in the latest dtd else if(localName.equals("PubmedBookArticle"))
{
jaxbElement= unmarshaller.unmarshal(r,PubmedBookArticle.class);
article=jaxbElement.getValue();
} */
{
w.add(r.nextEvent());
break;
}
if (article != null) {
bindings.put("article", article);
bindings.put("index", nArticles++);
if (!this.evalJavaScriptBoolean(compiledScript, bindings)) {
break;
}
marshaller.marshal(jaxbElement, w);
w.add(eventFactory.createCharacters("\n"));
}
break;
}
default:
{
w.add(r.nextEvent());
break;
}
}
r.close();
}
w.flush();
w.close();
pw.flush();
pw.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class SkipXmlElements method doWork.
@Override
public int doWork(List<String> args) {
String scriptExpr = null;
File scriptFile = null;
CompiledScript compiledScript = null;
try {
compiledScript = super.compileJavascript(scriptExpr, scriptFile);
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
PrintWriter pw = super.openFileOrStdoutAsPrintWriter(outputFile);
XMLOutputFactory xof = XMLOutputFactory.newFactory();
XMLEventWriter w = xof.createXMLEventWriter(pw);
StreamSource src = null;
if (args.isEmpty()) {
src = new StreamSource(stdin());
} else if (args.size() == 1) {
src = new StreamSource(new File(args.get(0)));
} else {
LOG.error("illegal.number.of.arguments");
return -1;
}
XMLEventReader r = xmlInputFactory.createXMLEventReader(src);
SimpleBindings bindings = new SimpleBindings();
TagImpl curr = null;
while (r.hasNext()) {
XMLEvent evt = r.peek();
switch(evt.getEventType()) {
case XMLEvent.START_ELEMENT:
{
if (curr == null) {
curr = new TagImpl(evt.asStartElement());
} else {
TagImpl node = new TagImpl(evt.asStartElement());
node.parent = curr;
curr = node;
}
int keep = 1;
bindings.put("element", curr);
Object result = compiledScript.eval(bindings);
if (result == null) {
throw new RuntimeException("User's Script returned null");
} else if ((result instanceof Boolean)) {
keep = (Boolean.class.cast(result).booleanValue() ? 1 : 0);
} else if (!(result instanceof Number)) {
throw new RuntimeException("User's Script didn't return a number.");
} else // if(result instanceof Number)
{
keep = ((Number) result).intValue();
}
if (keep == KEEP_ELEMENT) {
w.add(r.nextEvent());
} else /* skip this element or keep and descendant */
{
curr = curr.parent;
int depth = 0;
while (r.hasNext()) {
evt = r.nextEvent();
switch(evt.getEventType()) {
case XMLEvent.START_ELEMENT:
{
depth++;
break;
}
case XMLEvent.END_ELEMENT:
{
depth--;
break;
}
default:
break;
}
if (keep == KEEP_ELEMENT_AND_DESCENDANTS) {
w.add(evt);
}
if (depth == 0)
break;
}
}
break;
}
case XMLEvent.COMMENT:
{
// just consumme
r.nextEvent();
break;
}
case XMLEvent.END_ELEMENT:
{
curr = curr.parent;
w.add(r.nextEvent());
break;
}
default:
{
w.add(r.nextEvent());
break;
}
}
r.close();
}
w.flush();
w.close();
pw.flush();
pw.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class XsltStream method parseDocument.
private Node parseDocument(final Document owner, final StartElement startE, final XMLEventReader r) throws XMLStreamException {
final QName qname = startE.getName();
final Element root;
if (qname.getNamespaceURI() == null || qname.getNamespaceURI().isEmpty()) {
root = owner.createElement(toQNAME(qname));
} else {
root = owner.createElementNS(qname.getNamespaceURI(), toQNAME(qname));
}
Iterator<?> it = startE.getNamespaces();
while (it.hasNext()) {
final Namespace att = Namespace.class.cast(it.next());
root.setAttribute("xmlns:" + att.getPrefix(), att.getNamespaceURI());
}
it = startE.getAttributes();
while (it.hasNext()) {
final Attribute att = Attribute.class.cast(it.next());
final QName attName = att.getName();
if (attName.getNamespaceURI() == null || attName.getNamespaceURI().isEmpty()) {
root.setAttribute(toQNAME(attName), att.getValue());
} else {
root.setAttributeNS(attName.getNamespaceURI(), toQNAME(attName), att.getValue());
}
}
while (r.hasNext()) {
final XMLEvent evt = r.nextEvent();
if (evt.isCharacters()) {
root.appendChild(owner.createTextNode(evt.asCharacters().getData()));
} else if (evt.isProcessingInstruction()) {
} else if (evt.isEndElement()) {
break;
} else if (evt.isStartElement()) {
if (this.skipQNames.contains(evt.asStartElement().getName())) {
skip(evt.asStartElement(), r);
} else {
root.appendChild(parseDocument(owner, evt.asStartElement(), r));
}
} else {
LOG.warn("Cannot handle " + evt);
}
}
return root;
}
Aggregations