use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class DepAnnotator method addView.
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
for (String reqView : requiredViews) if (!ta.hasView(reqView))
throw new AnnotatorException("TextAnnotation must have view: " + reqView);
DepInst sent = new DepInst(ta);
DepStruct deptree;
try {
deptree = (DepStruct) model.infSolver.getBestStructure(model.wv, sent);
} catch (Exception e) {
throw new AnnotatorException("Sentence cannot be parsed");
}
TreeView treeView = new TreeView(ViewNames.DEPENDENCY, ta);
int rootPos = findRoot(deptree);
// All the node positions are -1 to account for the extra <root> node added
Pair<String, Integer> nodePair = new Pair<>(sent.forms[rootPos], rootPos - 1);
Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
populateChildren(tree, deptree, sent, rootPos);
treeView.setDependencyTree(0, tree);
ta.addView(ViewNames.DEPENDENCY, treeView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class OntonotesTreebankReader method parseLines.
/**
* parse the pen treebank parse file, producing an annotation covering the entire file.
* @param lines the data from the file, each line.
* @return the text annotation.
* @throws AnnotatorException
*/
protected TextAnnotation parseLines(ArrayList<String> lines) throws AnnotatorException {
StringBuilder sb = new StringBuilder();
int numParen = 0;
int currentLineId = 0;
ArrayList<String[]> sentences = new ArrayList<>();
ArrayList<Tree<String>> trees = new ArrayList<>();
while (currentLineId < lines.size()) {
String line = lines.get(currentLineId++);
if (line.length() == 0)
continue;
numParen += countUnclosedParens(line);
sb.append(line);
if (numParen == 0) {
// parse the tree, add the sentence tokens to the list of sentences.
Tree<String> tree = TreeParserFactory.getStringTreeParser().parse(sb.toString().replaceAll("\\\\/", "/"));
// get the tokens.
String[] text = ParseUtils.getTerminalStringSentence(tree);
if (text.length != 0) {
sentences.add(text);
trees.add(tree);
treesProduced++;
} else {
System.err.println("This tree produced no sentence text:\n" + tree);
System.err.println("from file:\n" + this.currentfile);
System.err.flush();
return null;
}
sb = new StringBuilder();
}
}
TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens(VIEW_NAME, currentfile, sentences);
TreeView parse = new TreeView(VIEW_NAME, this.getClass().getCanonicalName(), ta, 1.0);
// add each parse tree
int treecount = 0;
for (Tree<String> tree : trees) {
parse.setParseTree(treecount++, tree);
}
ta.addView(VIEW_NAME, parse);
POSFromParse pos = new POSFromParse(VIEW_NAME);
ta.addView(pos);
return ta;
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class StanfordDepHandler method addView.
@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
// If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
// parse
StanfordParseHandler.checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
TreeView treeView = new TreeView(ViewNames.DEPENDENCY_STANFORD, "StanfordDepHandler", textAnnotation, 1d);
// The (tokenized) sentence offset in case we have more than one sentences in the record
List<CoreMap> sentences = StanfordParseHandler.buildStanfordSentences(textAnnotation);
Annotation document = new Annotation(sentences);
posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
// This is most like because we ran out of time
throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
}
for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
boolean runtimeExceptionWasThrown = false;
CoreMap sentence = sentences.get(sentenceId);
if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
logger.warn(HandlerUtils.getSentenceLengthError(textAnnotation.getId(), sentence.toString(), maxParseSentenceLength));
} else {
SemanticGraph depGraph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
IndexedWord root = null;
try {
root = depGraph.getFirstRoot();
} catch (RuntimeException e) {
String msg = "ERROR in getting root of dep graph for sentence. Sentence is:\n" + sentence.toString() + "'\nDependency graph is:\n" + depGraph.toCompactString() + "\nText is:\n" + textAnnotation.getText();
logger.error(msg);
System.err.println(msg);
e.printStackTrace();
if (throwExceptionOnSentenceLengthCheck)
throw e;
else
runtimeExceptionWasThrown = true;
}
if (!runtimeExceptionWasThrown) {
int tokenStart = getNodePosition(textAnnotation, root, sentenceId);
Pair<String, Integer> nodePair = new Pair<>(root.originalText(), tokenStart);
Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
populateChildren(depGraph, root, tree, textAnnotation, sentenceId);
treeView.setDependencyTree(sentenceId, tree);
}
}
}
textAnnotation.addView(getViewName(), treeView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class StanfordDepHandler method populateChildren.
private void populateChildren(SemanticGraph depGraph, IndexedWord root, Tree<Pair<String, Integer>> tree, TextAnnotation ta, int sentId) {
if (depGraph.getChildren(root).size() == 0)
return;
for (IndexedWord child : depGraph.getChildren(root)) {
int childPosition = getNodePosition(ta, child, sentId);
Pair<String, Integer> nodePair = new Pair<>(child.originalText(), childPosition);
Tree<Pair<String, Integer>> childTree = new Tree<>(nodePair);
tree.addSubtree(childTree, new Pair<>(depGraph.getEdge(root, child).toString(), childPosition));
populateChildren(depGraph, child, childTree, ta, sentId);
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.trees.Tree in project cogcomp-nlp by CogComp.
the class StanfordParseHandler method addView.
@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
// If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
// parse
checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
TreeView treeView = new TreeView(ViewNames.PARSE_STANFORD, "StanfordParseHandler", textAnnotation, 1d);
// The (tokenized) sentence offset in case we have more than one sentences in the record
List<CoreMap> sentences = buildStanfordSentences(textAnnotation);
Annotation document = new Annotation(sentences);
posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
// This is most like because we ran out of time
throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
}
for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
CoreMap sentence = sentences.get(sentenceId);
if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
logger.warn("Unable to parse TextAnnotation " + textAnnotation.getId() + " since it is larger than the maximum sentence length of the parser (" + maxParseSentenceLength + ").");
} else {
edu.stanford.nlp.trees.Tree stanfordTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
Tree<String> tree = new Tree<>(stanfordTree.value());
for (edu.stanford.nlp.trees.Tree pt : stanfordTree.getChildrenAsList()) {
tree.addSubtree(generateNode(pt));
}
treeView.setParseTree(sentenceId, tree);
}
}
textAnnotation.addView(getViewName(), treeView);
}
Aggregations