use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView in project cogcomp-nlp by CogComp.
the class StanfordParseHandler method addView.
@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
// If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
// parse
checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
TreeView treeView = new TreeView(ViewNames.PARSE_STANFORD, "StanfordParseHandler", textAnnotation, 1d);
// The (tokenized) sentence offset in case we have more than one sentences in the record
List<CoreMap> sentences = buildStanfordSentences(textAnnotation);
Annotation document = new Annotation(sentences);
posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
// This is most like because we ran out of time
throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
}
for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
CoreMap sentence = sentences.get(sentenceId);
if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
logger.warn("Unable to parse TextAnnotation " + textAnnotation.getId() + " since it is larger than the maximum sentence length of the parser (" + maxParseSentenceLength + ").");
} else {
edu.stanford.nlp.trees.Tree stanfordTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
Tree<String> tree = new Tree<>(stanfordTree.value());
for (edu.stanford.nlp.trees.Tree pt : stanfordTree.getChildrenAsList()) {
tree.addSubtree(generateNode(pt));
}
treeView.setParseTree(sentenceId, tree);
}
}
textAnnotation.addView(getViewName(), treeView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView in project cogcomp-nlp by CogComp.
the class SubcategorizationFrame method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
Set<Feature> features = new LinkedHashSet<>();
TreeView view = (TreeView) c.getTextAnnotation().getView(parseViewName);
Constituent phrase;
try {
phrase = view.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
List<Relation> incomingRelations = phrase.getIncomingRelations();
if (incomingRelations == null) {
features.add(DiscreteFeature.create("root"));
} else {
Constituent parent = incomingRelations.get(0).getSource();
StringBuilder subcat = new StringBuilder();
subcat.append(parent.getLabel()).append(">");
for (Relation r : parent.getOutgoingRelations()) {
if (r.getTarget() == phrase) {
subcat.append("(").append(r.getTarget().getLabel()).append(")");
} else {
subcat.append(r.getTarget().getLabel());
}
}
features.add(DiscreteFeature.create(subcat.toString()));
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView in project cogcomp-nlp by CogComp.
the class ParsePhraseTypeOnly method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView tree = (TreeView) ta.getView(parseViewname);
Constituent phrase;
try {
phrase = tree.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Set<Feature> features = new LinkedHashSet<>();
if (phrase != null)
features.add(DiscreteFeature.create(phrase.getLabel()));
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView in project cogcomp-nlp by CogComp.
the class DependencyPathNgrams method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
Set<Feature> features = new LinkedHashSet<>();
TreeView parse = (TreeView) ta.getView(dependencyViewName);
// get equivalent of c in the parse view
Constituent c2 = parse.getConstituentsCoveringToken(c.getStartSpan()).get(0);
List<Relation> incomingRelations = c2.getIncomingRelations();
if (incomingRelations.size() > 0) {
Constituent c1 = parse.getConstituentsCoveringToken(incomingRelations.get(0).getSource().getStartSpan()).get(0);
Pair<List<Constituent>, List<Constituent>> paths = PathFeatureHelper.getPathsToCommonAncestor(c1, c2, 400);
List<String> path = new ArrayList<>();
List<String> pos = new ArrayList<>();
for (int i = 0; i < paths.getFirst().size() - 1; i++) {
Constituent cc = paths.getFirst().get(i);
path.add(cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);
pos.add(WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);
}
Constituent top = paths.getFirst().get(paths.getFirst().size() - 1);
pos.add(WordHelpers.getPOS(ta, top.getStartSpan()) + ":*");
path.add("*");
if (paths.getSecond().size() > 1) {
for (int i = paths.getSecond().size() - 2; i >= 0; i--) {
Constituent cc = paths.getSecond().get(i);
pos.add(WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + PathFeatureHelper.PATH_DOWN_STRING);
path.add(PathFeatureHelper.PATH_DOWN_STRING);
}
}
features.addAll(getNgrams(path, ""));
features.addAll(getNgrams(pos, "pos"));
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView in project cogcomp-nlp by CogComp.
the class ParseHeadWordPOS method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView tree = (TreeView) ta.getView(parseViewName);
Constituent phrase;
try {
phrase = tree.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Set<Feature> features = new LinkedHashSet<>();
int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
features.add(DiscreteFeature.create("hw:" + ta.getToken(head).toLowerCase().trim()));
features.add(DiscreteFeature.create("h-pos:" + WordHelpers.getPOS(ta, head)));
return features;
}
Aggregations