use of nu.xom.Builder in project CoreNLP by stanfordnlp.
the class CoreNLPServlet method init.
public void init() throws ServletException {
pipeline = new StanfordCoreNLP();
String xslPath = getServletContext().getRealPath("/WEB-INF/data/CoreNLP-to-HTML.xsl");
try {
Builder builder = new Builder();
Document stylesheet = builder.build(new File(xslPath));
corenlpTransformer = new XSLTransform(stylesheet);
} catch (Exception e) {
throw new ServletException(e);
}
}
use of nu.xom.Builder in project CoreNLP by stanfordnlp.
the class ParsedGigawordReader method toAnnotation.
/*
* Old implementation based on JDOM.
* No longer maintained due to JDOM licensing issues.
private static Annotation toAnnotation(String xml) throws IOException {
Element docElem;
try {
docElem = new SAXBuilder().build(new StringReader(xml)).getRootElement();
} catch (JDOMException e) {
throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml));
}
Element textElem = docElem.getChild("TEXT");
StringBuilder text = new StringBuilder();
int offset = 0;
List<CoreMap> sentences = new ArrayList<CoreMap>();
for (Object sentObj: textElem.getChildren("SENT")) {
CoreMap sentence = new ArrayCoreMap();
sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
Element sentElem = (Element)sentObj;
Tree tree = Tree.valueOf(sentElem.getText());
List<CoreLabel> tokens = new ArrayList<CoreLabel>();
List<Tree> preTerminals = preTerminals(tree);
for (Tree preTerminal: preTerminals) {
String posTag = preTerminal.value();
for (Tree wordTree: preTerminal.children()) {
String word = wordTree.value();
CoreLabel token = new CoreLabel();
token.set(CoreAnnotations.TextAnnotation.class, word);
token.set(CoreAnnotations.TextAnnotation.class, word);
token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag);
token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
offset += word.length();
token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
text.append(word);
text.append(' ');
offset += 1;
tokens.add(token);
}
}
if (preTerminals.size() > 0) {
text.setCharAt(text.length() - 1, '\n');
}
sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1);
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
sentences.add(sentence);
}
String docID = docElem.getAttributeValue("id");
Matcher matcher = datePattern.matcher(docID);
matcher.find();
Calendar docDate = new Timex(matcher.group(1)).getDate();
Annotation document = new Annotation(text.toString());
document.set(CoreAnnotations.DocIDAnnotation.class, docID);
document.set(CoreAnnotations.CalendarAnnotation.class, docDate);
document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
return document;
}
*/
private static Annotation toAnnotation(String xml) throws IOException {
Element docElem;
try {
Builder parser = new Builder();
StringReader in = new StringReader(xml);
docElem = parser.build(in).getRootElement();
} catch (ParsingException e) {
throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml));
} catch (IOException e) {
throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml));
}
Element textElem = docElem.getFirstChildElement("TEXT");
StringBuilder text = new StringBuilder();
int offset = 0;
List<CoreMap> sentences = new ArrayList<>();
Elements sentenceElements = textElem.getChildElements("SENT");
for (int crtsent = 0; crtsent < sentenceElements.size(); crtsent++) {
Element sentElem = sentenceElements.get(crtsent);
CoreMap sentence = new ArrayCoreMap();
sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
// XXX ms: is this the same as sentElem.getText() in JDOM?
Tree tree = Tree.valueOf(sentElem.getChild(0).getValue());
List<CoreLabel> tokens = new ArrayList<>();
List<Tree> preTerminals = preTerminals(tree);
for (Tree preTerminal : preTerminals) {
String posTag = preTerminal.value();
for (Tree wordTree : preTerminal.children()) {
String word = wordTree.value();
CoreLabel token = new CoreLabel();
token.set(CoreAnnotations.TextAnnotation.class, word);
token.set(CoreAnnotations.TextAnnotation.class, word);
token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag);
token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
offset += word.length();
token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
text.append(word);
text.append(' ');
offset += 1;
tokens.add(token);
}
}
if (preTerminals.size() > 0) {
text.setCharAt(text.length() - 1, '\n');
}
sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1);
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
sentences.add(sentence);
}
String docID = docElem.getAttributeValue("id");
Matcher matcher = datePattern.matcher(docID);
matcher.find();
Calendar docDate = new Timex("DATE", matcher.group(1)).getDate();
Annotation document = new Annotation(text.toString());
document.set(CoreAnnotations.DocIDAnnotation.class, docID);
document.set(CoreAnnotations.CalendarAnnotation.class, docDate);
document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
return document;
}
use of nu.xom.Builder in project apn-proxy by apn-proxy.
the class ApnProxyAbstractXmlConfigReader method read.
public final void read(InputStream xmlConfigFileInputStream) {
Document doc = null;
try {
Builder parser = new Builder();
doc = parser.build(xmlConfigFileInputStream);
} catch (ParsingException ex) {
logger.error(ex.getMessage(), ex);
} catch (IOException ex) {
logger.error(ex.getMessage(), ex);
}
if (doc == null) {
return;
}
Element rootElement = doc.getRootElement();
realReadProcess(rootElement);
}
Aggregations