Search in sources :

Example 1 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class DepAnnotator method addView.

@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
    for (String reqView : requiredViews) if (!ta.hasView(reqView))
        throw new AnnotatorException("TextAnnotation must have view: " + reqView);
    DepInst sent = new DepInst(ta);
    DepStruct deptree;
    try {
        deptree = (DepStruct) model.infSolver.getBestStructure(model.wv, sent);
    } catch (Exception e) {
        throw new AnnotatorException("Sentence cannot be parsed");
    }
    TreeView treeView = new TreeView(ViewNames.DEPENDENCY, ta);
    int rootPos = findRoot(deptree);
    // All the node positions are -1 to account for the extra <root> node added
    Pair<String, Integer> nodePair = new Pair<>(sent.forms[rootPos], rootPos - 1);
    Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
    populateChildren(tree, deptree, sent, rootPos);
    treeView.setDependencyTree(0, tree);
    ta.addView(ViewNames.DEPENDENCY, treeView);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) DepInst(edu.illinois.cs.cogcomp.depparse.core.DepInst) DepStruct(edu.illinois.cs.cogcomp.depparse.core.DepStruct) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 2 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class OntonotesTreebankReader method parseLines.

/**
 * parse the pen treebank parse file, producing an annotation covering the entire file.
 * @param lines the data from the file, each line.
 * @return the text annotation.
 * @throws AnnotatorException
 */
protected TextAnnotation parseLines(ArrayList<String> lines) throws AnnotatorException {
    StringBuilder sb = new StringBuilder();
    int numParen = 0;
    int currentLineId = 0;
    ArrayList<String[]> sentences = new ArrayList<>();
    ArrayList<Tree<String>> trees = new ArrayList<>();
    while (currentLineId < lines.size()) {
        String line = lines.get(currentLineId++);
        if (line.length() == 0)
            continue;
        numParen += countUnclosedParens(line);
        sb.append(line);
        if (numParen == 0) {
            // parse the tree, add the sentence tokens to the list of sentences.
            Tree<String> tree = TreeParserFactory.getStringTreeParser().parse(sb.toString().replaceAll("\\\\/", "/"));
            // get the tokens.
            String[] text = ParseUtils.getTerminalStringSentence(tree);
            if (text.length != 0) {
                sentences.add(text);
                trees.add(tree);
                treesProduced++;
            } else {
                System.err.println("This tree produced no sentence text:\n" + tree);
                System.err.println("from file:\n" + this.currentfile);
                System.err.flush();
                return null;
            }
            sb = new StringBuilder();
        }
    }
    TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(VIEW_NAME, currentfile, sentences);
    TreeView parse = new TreeView(VIEW_NAME, this.getClass().getCanonicalName(), ta, 1.0);
    // add each parse tree
    int treecount = 0;
    for (Tree<String> tree : trees) {
        parse.setParseTree(treecount++, tree);
    }
    ta.addView(VIEW_NAME, parse);
    POSFromParse pos = new POSFromParse(VIEW_NAME);
    ta.addView(pos);
    return ta;
}
Also used : ArrayList(java.util.ArrayList) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) POSFromParse(edu.illinois.cs.cogcomp.nlp.utilities.POSFromParse)

Example 3 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class StanfordDepHandler method addView.

@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
    // If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
    // parse
    StanfordParseHandler.checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
    TreeView treeView = new TreeView(ViewNames.DEPENDENCY_STANFORD, "StanfordDepHandler", textAnnotation, 1d);
    // The (tokenized) sentence offset in case we have more than one sentences in the record
    List<CoreMap> sentences = StanfordParseHandler.buildStanfordSentences(textAnnotation);
    Annotation document = new Annotation(sentences);
    posAnnotator.annotate(document);
    parseAnnotator.annotate(document);
    sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
        // This is most like because we ran out of time
        throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
    }
    for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
        boolean runtimeExceptionWasThrown = false;
        CoreMap sentence = sentences.get(sentenceId);
        if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
            logger.warn(HandlerUtils.getSentenceLengthError(textAnnotation.getId(), sentence.toString(), maxParseSentenceLength));
        } else {
            SemanticGraph depGraph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
            IndexedWord root = null;
            try {
                root = depGraph.getFirstRoot();
            } catch (RuntimeException e) {
                String msg = "ERROR in getting root of dep graph for sentence.  Sentence is:\n" + sentence.toString() + "'\nDependency graph is:\n" + depGraph.toCompactString() + "\nText is:\n" + textAnnotation.getText();
                logger.error(msg);
                System.err.println(msg);
                e.printStackTrace();
                if (throwExceptionOnSentenceLengthCheck)
                    throw e;
                else
                    runtimeExceptionWasThrown = true;
            }
            if (!runtimeExceptionWasThrown) {
                int tokenStart = getNodePosition(textAnnotation, root, sentenceId);
                Pair<String, Integer> nodePair = new Pair<>(root.originalText(), tokenStart);
                Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
                populateChildren(depGraph, root, tree, textAnnotation, sentenceId);
                treeView.setDependencyTree(sentenceId, tree);
            }
        }
    }
    textAnnotation.addView(getViewName(), treeView);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 4 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class StanfordDepHandler method populateChildren.

private void populateChildren(SemanticGraph depGraph, IndexedWord root, Tree<Pair<String, Integer>> tree, TextAnnotation ta, int sentId) {
    if (depGraph.getChildren(root).size() == 0)
        return;
    for (IndexedWord child : depGraph.getChildren(root)) {
        int childPosition = getNodePosition(ta, child, sentId);
        Pair<String, Integer> nodePair = new Pair<>(child.originalText(), childPosition);
        Tree<Pair<String, Integer>> childTree = new Tree<>(nodePair);
        tree.addSubtree(childTree, new Pair<>(depGraph.getEdge(root, child).toString(), childPosition));
        populateChildren(depGraph, child, childTree, ta, sentId);
    }
}
Also used : Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 5 with Tree

use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.

the class StanfordParseHandler method addView.

@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
    // If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
    // parse
    checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
    TreeView treeView = new TreeView(ViewNames.PARSE_STANFORD, "StanfordParseHandler", textAnnotation, 1d);
    // The (tokenized) sentence offset in case we have more than one sentences in the record
    List<CoreMap> sentences = buildStanfordSentences(textAnnotation);
    Annotation document = new Annotation(sentences);
    posAnnotator.annotate(document);
    parseAnnotator.annotate(document);
    sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
        // This is most like because we ran out of time
        throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
    }
    for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
        CoreMap sentence = sentences.get(sentenceId);
        if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
            logger.warn("Unable to parse TextAnnotation " + textAnnotation.getId() + " since it is larger than the maximum sentence length of the parser (" + maxParseSentenceLength + ").");
        } else {
            edu.stanford.nlp.trees.Tree stanfordTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            Tree<String> tree = new Tree<>(stanfordTree.value());
            for (edu.stanford.nlp.trees.Tree pt : stanfordTree.getChildrenAsList()) {
                tree.addSubtree(generateNode(pt));
            }
            treeView.setParseTree(sentenceId, tree);
        }
    }
    textAnnotation.addView(getViewName(), treeView);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap) ArrayCoreMap(edu.stanford.nlp.util.ArrayCoreMap)

Aggregations

Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)15 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)10 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)7 ArrayList (java.util.ArrayList)7 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)6 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)5 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)4 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)3 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)2 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)2 ITransformer (edu.illinois.cs.cogcomp.core.transformers.ITransformer)2 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)2 IndexedWord (edu.stanford.nlp.ling.IndexedWord)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)1 DepInst (edu.illinois.cs.cogcomp.depparse.core.DepInst)1 DepStruct (edu.illinois.cs.cogcomp.depparse.core.DepStruct)1 SRLNode (edu.illinois.cs.cogcomp.nlp.corpusreaders.ontonotes.utils.SRLNode)1