use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ParsePhraseType method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView tree = (TreeView) ta.getView(parseViewname);
Constituent phrase;
try {
phrase = tree.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Set<Feature> features = new LinkedHashSet<>();
if (phrase != null) {
features.add(DiscreteFeature.create(phrase.getLabel()));
String parentLabel = "ROOT";
if (phrase.getIncomingRelations().size() > 0) {
Constituent parent = phrase.getIncomingRelations().get(0).getSource();
parentLabel = parent.getLabel();
int parentHead = CollinsHeadFinder.getInstance().getHeadWordPosition(parent);
features.add(DiscreteFeature.create("pt:h:" + ta.getToken(parentHead).toLowerCase().trim()));
features.add(DiscreteFeature.create("pt:h-pos:" + WordHelpers.getPOS(ta, parentHead)));
}
features.add(DiscreteFeature.create("pt:" + parentLabel));
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ParseSiblings method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Constituent phrase;
try {
phrase = parse.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Set<Feature> features = new LinkedHashSet<>();
if (phrase.getIncomingRelations().size() == 0) {
features.add(DiscreteFeature.create("ONLY_CHILD"));
} else {
Relation incomingEdge = phrase.getIncomingRelations().get(0);
Constituent parent = incomingEdge.getSource();
int position = -1;
for (int i = 0; i < parent.getOutgoingRelations().size(); i++) {
if (parent.getOutgoingRelations().get(i) == incomingEdge) {
position = i;
break;
}
}
assert position >= 0;
if (position == 0)
features.add(DiscreteFeature.create("FIRST_CHILD"));
else if (position == parent.getOutgoingRelations().size() - 1)
features.add(DiscreteFeature.create("LAST_CHILD"));
if (position != 0) {
Constituent sibling = parent.getOutgoingRelations().get(position - 1).getTarget();
String phraseType = sibling.getLabel();
int headWord = CollinsHeadFinder.getInstance().getHeadWordPosition(sibling);
String token = ta.getToken(headWord).toLowerCase().trim();
String pos = WordHelpers.getPOS(ta, headWord);
features.add(DiscreteFeature.create("lsis.pt:" + phraseType));
features.add(DiscreteFeature.create("lsis.hw:" + token));
features.add(DiscreteFeature.create("lsis.hw.pos:" + pos));
}
if (position != parent.getOutgoingRelations().size() - 1) {
Constituent sibling = parent.getOutgoingRelations().get(position + 1).getTarget();
String phraseType = sibling.getLabel();
int headWord = CollinsHeadFinder.getInstance().getHeadWordPosition(sibling);
String token = ta.getToken(headWord).toLowerCase().trim();
String pos = WordHelpers.getPOS(ta, headWord);
features.add(DiscreteFeature.create("rsis.pt:" + phraseType));
features.add(DiscreteFeature.create("rsis.hw:" + token));
features.add(DiscreteFeature.create("rsis.hw.pos:" + pos));
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class SyntacticFrame method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
Set<Feature> features = new LinkedHashSet<>();
List<Relation> incomingRelations = c.getIncomingRelations();
if (incomingRelations.size() > 0) {
Constituent pred = incomingRelations.get(0).getSource();
TextAnnotation ta = c.getTextAnnotation();
TreeView parse = (TreeView) ta.getView(parseViewName);
Constituent predicate, arg;
try {
predicate = parse.getParsePhrase(pred);
arg = parse.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Constituent vp = TreeView.getParent(predicate);
// go over VP's siblings before it
StringBuffer sb1 = new StringBuffer();
StringBuffer sb2 = new StringBuffer();
StringBuffer sb3 = new StringBuffer();
if (!TreeView.isRoot(vp)) {
Constituent vpParent = TreeView.getParent(vp);
for (int i = 0; i < vpParent.getOutgoingRelations().size(); i++) {
Constituent target = vpParent.getOutgoingRelations().get(i).getTarget();
if (target == vp)
break;
addToFeature(target, arg, sb1, sb2, sb3);
}
}
for (int i = 0; i < vp.getOutgoingRelations().size(); i++) {
Constituent target = vp.getOutgoingRelations().get(i).getTarget();
if (target.getSpan().equals(predicate.getSpan())) {
sb1.append("v-");
sb2.append("v-");
sb3.append(WordHelpers.getLemma(ta, target.getStartSpan())).append("-");
} else {
addToFeature(target, arg, sb1, sb2, sb3);
}
}
features.add(DiscreteFeature.create(sb1.toString()));
features.add(DiscreteFeature.create("general:" + sb2.toString()));
features.add(DiscreteFeature.create("lemma:" + sb3.toString()));
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ParseHelper method getPhraseFromHead.
/**
* Primarily a fix for prepSRL objects; converts them from single head words to constituents.
* E.g. for the sentence "the man with the telescope", the object of the preposition will be
* "the telescope" instead of just "telescope".
*
* @param predicate The predicate of the construction (e.g. "with")
* @param argHead The head-word of the argument of the construction (e.g. "telescope")
* @param parseViewName The name of the parse view used to extract the phrase-structure tree
* @return The full constituent phrase containing the argument head
*/
public static Constituent getPhraseFromHead(Constituent predicate, Constituent argHead, String parseViewName) {
// Get the path from the argument to the preposition
// but only if the predicate node "m-commands" the arg
TextAnnotation ta = argHead.getTextAnnotation();
int sentenceOffset = ta.getSentence(ta.getSentenceId(argHead)).getStartSpan();
int argStart = argHead.getStartSpan() - sentenceOffset;
Tree<Pair<String, IntPair>> predParentTree = getTokenIndexedTreeCovering(predicate, parseViewName).getParent();
boolean found = false;
for (Tree<Pair<String, IntPair>> s : predParentTree.getYield()) {
if (s.getLabel().getSecond().getFirst() == argStart)
found = true;
}
if (!found)
return null;
// Now follow the path from the argument node to get to the preposition
Tree<Pair<String, IntPair>> argPhrase = getTokenIndexedTreeCovering(argHead, parseViewName);
while (!checkForPredicate(argPhrase.getParent(), predicate.getStartSpan() - sentenceOffset)) {
if (argPhrase.getParent() == null)
break;
argPhrase = argPhrase.getParent();
}
// If the phrase covering the constituent is the whole sentence then the annotation is wrong
if (argPhrase.getParent() == null)
return null;
int start = predicate.getStartSpan() + 1;
int end = start + argPhrase.getYield().size();
return new Constituent(argHead.getLabel(), argHead.getViewName(), argHead.getTextAnnotation(), start, end);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ParseHelper method getParseTreeCovering.
/**
* Get a parse tree from a text annotation that covers the specified constituent.
*
* @param parseViewName The name of the parse view
* @param c The constituent that we care about
* @return The portion of the parse tree of the {@link TextAnnotation} to which the constituent
* belongs which covers the constituent.
*/
public static Tree<String> getParseTreeCovering(String parseViewName, Constituent c) {
TextAnnotation ta = c.getTextAnnotation();
int sentenceId = ta.getSentenceId(c);
Tree<String> tree = getParseTree(parseViewName, ta, sentenceId);
int sentenceStartSpan = ta.getSentence(sentenceId).getStartSpan();
int start = c.getStartSpan() - sentenceStartSpan;
int end = c.getEndSpan() - sentenceStartSpan;
return getTreeCovering(tree, start, end);
}
Aggregations