Search in sources :

Example 1 with Builder

use of nu.xom.Builder in project CoreNLP by stanfordnlp.

the class CoreNLPServlet method init.

public void init() throws ServletException {
    pipeline = new StanfordCoreNLP();
    String xslPath = getServletContext().getRealPath("/WEB-INF/data/CoreNLP-to-HTML.xsl");
    try {
        Builder builder = new Builder();
        Document stylesheet = builder.build(new File(xslPath));
        corenlpTransformer = new XSLTransform(stylesheet);
    } catch (Exception e) {
        throw new ServletException(e);
    }
}
Also used : ServletException(javax.servlet.ServletException) Builder(nu.xom.Builder) Document(nu.xom.Document) File(java.io.File) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) ServletException(javax.servlet.ServletException) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) XSLTransform(nu.xom.xslt.XSLTransform)

Example 2 with Builder

use of nu.xom.Builder in project CoreNLP by stanfordnlp.

the class ParsedGigawordReader method toAnnotation.

/*
   * Old implementation based on JDOM.
   * No longer maintained due to JDOM licensing issues.
  private static Annotation toAnnotation(String xml) throws IOException {
    Element docElem;
    try {
      docElem = new SAXBuilder().build(new StringReader(xml)).getRootElement();
    } catch (JDOMException e) {
      throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml));
    }
    Element textElem = docElem.getChild("TEXT");
    StringBuilder text = new StringBuilder();
    int offset = 0;
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    for (Object sentObj: textElem.getChildren("SENT")) {
      CoreMap sentence = new ArrayCoreMap();
      sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
      Element sentElem = (Element)sentObj;
      Tree tree = Tree.valueOf(sentElem.getText());
      List<CoreLabel> tokens = new ArrayList<CoreLabel>();
      List<Tree> preTerminals = preTerminals(tree);
      for (Tree preTerminal: preTerminals) {
        String posTag = preTerminal.value();
        for (Tree wordTree: preTerminal.children()) {
          String word = wordTree.value();
          CoreLabel token = new CoreLabel();
          token.set(CoreAnnotations.TextAnnotation.class, word);
          token.set(CoreAnnotations.TextAnnotation.class, word);
          token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag);
          token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
          offset += word.length();
          token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
          text.append(word);
          text.append(' ');
          offset += 1;
          tokens.add(token);
        }
      }
      if (preTerminals.size() > 0) {
        text.setCharAt(text.length() - 1, '\n');
      }
      sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1);
      sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
      sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
      sentences.add(sentence);
    }

    String docID = docElem.getAttributeValue("id");
    Matcher matcher = datePattern.matcher(docID);
    matcher.find();
    Calendar docDate = new Timex(matcher.group(1)).getDate();

    Annotation document = new Annotation(text.toString());
    document.set(CoreAnnotations.DocIDAnnotation.class, docID);
    document.set(CoreAnnotations.CalendarAnnotation.class, docDate);
    document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    return document;
  }
  */
private static Annotation toAnnotation(String xml) throws IOException {
    Element docElem;
    try {
        Builder parser = new Builder();
        StringReader in = new StringReader(xml);
        docElem = parser.build(in).getRootElement();
    } catch (ParsingException e) {
        throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml));
    } catch (IOException e) {
        throw new RuntimeException(String.format("error:\n%s\ninput:\n%s", e, xml));
    }
    Element textElem = docElem.getFirstChildElement("TEXT");
    StringBuilder text = new StringBuilder();
    int offset = 0;
    List<CoreMap> sentences = new ArrayList<>();
    Elements sentenceElements = textElem.getChildElements("SENT");
    for (int crtsent = 0; crtsent < sentenceElements.size(); crtsent++) {
        Element sentElem = sentenceElements.get(crtsent);
        CoreMap sentence = new ArrayCoreMap();
        sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
        // XXX ms: is this the same as sentElem.getText() in JDOM?
        Tree tree = Tree.valueOf(sentElem.getChild(0).getValue());
        List<CoreLabel> tokens = new ArrayList<>();
        List<Tree> preTerminals = preTerminals(tree);
        for (Tree preTerminal : preTerminals) {
            String posTag = preTerminal.value();
            for (Tree wordTree : preTerminal.children()) {
                String word = wordTree.value();
                CoreLabel token = new CoreLabel();
                token.set(CoreAnnotations.TextAnnotation.class, word);
                token.set(CoreAnnotations.TextAnnotation.class, word);
                token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag);
                token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
                offset += word.length();
                token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
                text.append(word);
                text.append(' ');
                offset += 1;
                tokens.add(token);
            }
        }
        if (preTerminals.size() > 0) {
            text.setCharAt(text.length() - 1, '\n');
        }
        sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1);
        sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
        sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
        sentences.add(sentence);
    }
    String docID = docElem.getAttributeValue("id");
    Matcher matcher = datePattern.matcher(docID);
    matcher.find();
    Calendar docDate = new Timex("DATE", matcher.group(1)).getDate();
    Annotation document = new Annotation(text.toString());
    document.set(CoreAnnotations.DocIDAnnotation.class, docID);
    document.set(CoreAnnotations.CalendarAnnotation.class, docDate);
    document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    return document;
}
Also used : ArrayCoreMap(edu.stanford.nlp.util.ArrayCoreMap) Matcher(java.util.regex.Matcher) Element(nu.xom.Element) Builder(nu.xom.Builder) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) Elements(nu.xom.Elements) Annotation(edu.stanford.nlp.pipeline.Annotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) ParsingException(nu.xom.ParsingException) StringReader(java.io.StringReader) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap) ArrayCoreMap(edu.stanford.nlp.util.ArrayCoreMap)

Example 3 with Builder

use of nu.xom.Builder in project apn-proxy by apn-proxy.

the class ApnProxyAbstractXmlConfigReader method read.

public final void read(InputStream xmlConfigFileInputStream) {
    Document doc = null;
    try {
        Builder parser = new Builder();
        doc = parser.build(xmlConfigFileInputStream);
    } catch (ParsingException ex) {
        logger.error(ex.getMessage(), ex);
    } catch (IOException ex) {
        logger.error(ex.getMessage(), ex);
    }
    if (doc == null) {
        return;
    }
    Element rootElement = doc.getRootElement();
    realReadProcess(rootElement);
}
Also used : Builder(nu.xom.Builder) ParsingException(nu.xom.ParsingException) Element(nu.xom.Element) Document(nu.xom.Document)

Aggregations

Builder (nu.xom.Builder)3 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)2 IOException (java.io.IOException)2 Document (nu.xom.Document)2 Element (nu.xom.Element)2 ParsingException (nu.xom.ParsingException)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 Annotation (edu.stanford.nlp.pipeline.Annotation)1 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)1 Tree (edu.stanford.nlp.trees.Tree)1 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)1 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)1 CoreMap (edu.stanford.nlp.util.CoreMap)1 File (java.io.File)1 StringReader (java.io.StringReader)1 Matcher (java.util.regex.Matcher)1 ServletException (javax.servlet.ServletException)1 Elements (nu.xom.Elements)1 XSLTransform (nu.xom.xslt.XSLTransform)1