use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class DependencyCorefMentionFinder method extractNPorPRPFromDependency.
private void extractNPorPRPFromDependency(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
SemanticGraph basic = s.get(BasicDependenciesAnnotation.class);
// DT is for "this, these, etc"
List<IndexedWord> nounsOrPrp = basic.getAllNodesByPartOfSpeechPattern("N.*|PRP.*|DT");
Tree tree = s.get(TreeAnnotation.class);
for (IndexedWord w : nounsOrPrp) {
SemanticGraphEdge edge = basic.getEdge(basic.getParent(w), w);
GrammaticalRelation rel = null;
// if edge is null, it's root
String shortname = "root";
if (edge != null) {
rel = edge.getRelation();
shortname = rel.getShortName();
}
// TODO: what to remove? remove more?
if (shortname.matches("det|compound")) {
continue;
} else {
extractMentionForHeadword(w, basic, s, mentions, mentionSpanSet, namedEntitySpanSet);
}
}
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class Mention method getCoordination.
public int getCoordination() {
if (headIndexedWord == null)
return 0;
Set<GrammaticalRelation> relations = enhancedDependency.childRelns(headIndexedWord);
for (GrammaticalRelation rel : relations) {
if (rel.toString().startsWith("conj:")) {
return 1;
}
}
Set<GrammaticalRelation> parent_relations = enhancedDependency.relns(headIndexedWord);
for (GrammaticalRelation rel : parent_relations) {
if (rel.toString().startsWith("conj:")) {
return 1;
}
}
return 0;
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class BasicRelationFeatureFactory method addDependencyPathFeatures.
protected void addDependencyPathFeatures(Counter<String> features, RelationMention rel, EntityMention arg0, EntityMention arg1, List<String> types, List<String> checklist, Logger logger) {
SemanticGraph graph = null;
// needed for backwards compatibility. old serialized models don't have it
if (dependencyType == null)
dependencyType = DEPENDENCY_TYPE.COLLAPSED_CCPROCESSED;
if (dependencyType == DEPENDENCY_TYPE.COLLAPSED_CCPROCESSED)
graph = rel.getSentence().get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
else if (dependencyType == DEPENDENCY_TYPE.COLLAPSED)
graph = rel.getSentence().get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
else if (dependencyType == DEPENDENCY_TYPE.BASIC)
graph = rel.getSentence().get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
else
throw new RuntimeException("ERROR: unknown dependency type: " + dependencyType);
if (graph == null) {
Tree tree = rel.getSentence().get(TreeAnnotation.class);
if (tree == null) {
log.info("WARNING: found sentence without TreeAnnotation. Skipped dependency-path features.");
return;
}
try {
graph = SemanticGraphFactory.makeFromTree(tree, Mode.COLLAPSED, GrammaticalStructure.Extras.NONE, null, true);
} catch (Exception e) {
log.info("WARNING: failed to generate dependencies from tree " + tree.toString());
e.printStackTrace();
log.info("Skipped dependency-path features.");
return;
}
}
IndexedWord node0 = graph.getNodeByIndexSafe(arg0.getSyntacticHeadTokenPosition() + 1);
IndexedWord node1 = graph.getNodeByIndexSafe(arg1.getSyntacticHeadTokenPosition() + 1);
if (node0 == null) {
checklist.removeAll(dependencyFeatures);
return;
}
if (node1 == null) {
checklist.removeAll(dependencyFeatures);
return;
}
List<SemanticGraphEdge> edgePath = graph.getShortestUndirectedPathEdges(node0, node1);
List<IndexedWord> pathNodes = graph.getShortestUndirectedPathNodes(node0, node1);
if (edgePath == null) {
checklist.removeAll(dependencyFeatures);
return;
}
if (pathNodes == null || pathNodes.size() <= 1) {
// arguments have the same head.
checklist.removeAll(dependencyFeatures);
return;
}
// e.g. "nsubj-> <-prep_in <-nn"
if (usingFeature(types, checklist, "dependency_path")) {
features.setCount("dependency_path:" + generalizedDependencyPath(edgePath, node0), 1.0);
}
if (usingFeature(types, checklist, "dependency_path_lowlevel")) {
String depLowLevel = dependencyPath(edgePath, node0);
if (logger != null && !rel.getType().equals(RelationMention.UNRELATED))
logger.info("dependency_path_lowlevel: " + depLowLevel);
features.setCount("dependency_path_lowlevel:" + depLowLevel, 1.0);
}
List<String> pathLemmas = new ArrayList<>();
List<String> noArgPathLemmas = new ArrayList<>();
// do not add to pathLemmas words that belong to one of the two args
Set<Integer> indecesToSkip = new HashSet<>();
for (int i = arg0.getExtentTokenStart(); i < arg0.getExtentTokenEnd(); i++) indecesToSkip.add(i + 1);
for (int i = arg1.getExtentTokenStart(); i < arg1.getExtentTokenEnd(); i++) indecesToSkip.add(i + 1);
for (IndexedWord node : pathNodes) {
pathLemmas.add(Morphology.lemmaStatic(node.value(), node.tag(), true));
if (!indecesToSkip.contains(node.index()))
noArgPathLemmas.add(Morphology.lemmaStatic(node.value(), node.tag(), true));
}
// (Higher level relations used as opposed to "lowlevel" finer grained relations)
if (usingFeature(types, checklist, "dependency_paths_to_verb")) {
for (IndexedWord node : pathNodes) {
if (node.tag().contains("VB")) {
if (node.equals(node0) || node.equals(node1)) {
continue;
}
String lemma = Morphology.lemmaStatic(node.value(), node.tag(), true);
String node1Path = generalizedDependencyPath(graph.getShortestUndirectedPathEdges(node, node1), node);
String node0Path = generalizedDependencyPath(graph.getShortestUndirectedPathEdges(node0, node), node0);
features.setCount("dependency_paths_to_verb:" + node0Path + " " + lemma, 1.0);
features.setCount("dependency_paths_to_verb:" + lemma + " " + node1Path, 1.0);
features.setCount("dependency_paths_to_verb:" + node0Path + " " + lemma + " " + node1Path, 1.0);
}
}
}
// "stub: subj-> be <-mod"
if (usingFeature(types, checklist, "dependency_path_stubs_to_verb")) {
for (IndexedWord node : pathNodes) {
SemanticGraphEdge edge0 = edgePath.get(0);
SemanticGraphEdge edge1 = edgePath.get(edgePath.size() - 1);
if (node.tag().contains("VB")) {
if (node.equals(node0) || node.equals(node1)) {
continue;
}
String lemma = Morphology.lemmaStatic(node.value(), node.tag(), true);
String edge0str, edge1str;
if (node0.equals(edge0.getGovernor())) {
edge0str = "<-" + generalizeRelation(edge0.getRelation());
} else {
edge0str = generalizeRelation(edge0.getRelation()) + "->";
}
if (node1.equals(edge1.getGovernor())) {
edge1str = generalizeRelation(edge1.getRelation()) + "->";
} else {
edge1str = "<-" + generalizeRelation(edge1.getRelation());
}
features.setCount("stub: " + edge0str + " " + lemma, 1.0);
features.setCount("stub: " + lemma + edge1str, 1.0);
features.setCount("stub: " + edge0str + " " + lemma + " " + edge1str, 1.0);
}
}
}
if (usingFeature(types, checklist, "verb_in_dependency_path")) {
for (IndexedWord node : pathNodes) {
if (node.tag().contains("VB")) {
if (node.equals(node0) || node.equals(node1)) {
continue;
}
SemanticGraphEdge rightEdge = graph.getShortestUndirectedPathEdges(node, node1).get(0);
SemanticGraphEdge leftEdge = graph.getShortestUndirectedPathEdges(node, node0).get(0);
String rightRelation, leftRelation;
boolean governsLeft = false, governsRight = false;
if (node.equals(rightEdge.getGovernor())) {
rightRelation = " <-" + generalizeRelation(rightEdge.getRelation());
governsRight = true;
} else {
rightRelation = generalizeRelation(rightEdge.getRelation()) + "-> ";
}
if (node.equals(leftEdge.getGovernor())) {
leftRelation = generalizeRelation(leftEdge.getRelation()) + "-> ";
governsLeft = true;
} else {
leftRelation = " <-" + generalizeRelation(leftEdge.getRelation());
}
String lemma = Morphology.lemmaStatic(node.value(), node.tag(), true);
if (governsLeft || governsRight) {
}
if (governsLeft) {
features.setCount("verb: " + leftRelation + lemma, 1.0);
}
if (governsRight) {
features.setCount("verb: " + lemma + rightRelation, 1.0);
}
if (governsLeft && governsRight) {
features.setCount("verb: " + leftRelation + lemma + rightRelation, 1.0);
}
}
}
}
// dependency_path_POS_unigrams: generates a feature for the POS tag of each word in the dependency path
if (usingFeature(types, checklist, "dependency_path_words")) {
for (String lemma : noArgPathLemmas) features.setCount("word_in_dependency_path:" + lemma, 1.0);
}
if (usingFeature(types, checklist, "dependency_path_POS_unigrams")) {
for (IndexedWord node : pathNodes) if (!node.equals(node0) && !node.equals(node1))
features.setCount("POS_in_dependency_path: " + node.tag(), 1.0);
}
// dependency_path_POS_n_grams: n-grams of POS tags of words in the dependency path, n=2,3,4
for (int node = 0; node < pathNodes.size(); node++) {
for (int n = 2; n <= 4; n++) {
if (node + n > pathNodes.size())
break;
StringBuilder sb = new StringBuilder();
StringBuilder sbPOS = new StringBuilder();
for (int elt = node; elt < node + n; elt++) {
sb.append(pathLemmas.get(elt));
sb.append("_");
sbPOS.append(pathNodes.get(elt).tag());
sbPOS.append("_");
}
if (usingFeature(types, checklist, "dependency_path_word_n_grams"))
features.setCount("dependency_path_" + n + "-gram: " + sb, 1.0);
if (usingFeature(types, checklist, "dependency_path_POS_n_grams"))
features.setCount("dependency_path_POS_" + n + "-gram: " + sbPOS, 1.0);
}
}
// e.g. "prep_at -> - leftmost"
for (int edge = 0; edge < edgePath.size(); edge++) {
if (usingFeature(types, checklist, "dependency_path_edge_n_grams") || usingFeature(types, checklist, "dependency_path_edge_lowlevel_n_grams")) {
for (int n = 2; n <= 4; n++) {
if (edge + n > edgePath.size())
break;
StringBuilder sbRelsHi = new StringBuilder();
StringBuilder sbRelsLo = new StringBuilder();
for (int elt = edge; elt < edge + n; elt++) {
GrammaticalRelation gr = edgePath.get(elt).getRelation();
sbRelsHi.append(generalizeRelation(gr));
sbRelsHi.append("_");
sbRelsLo.append(gr);
sbRelsLo.append("_");
}
if (usingFeature(types, checklist, "dependency_path_edge_n_grams"))
features.setCount("dependency_path_edge_" + n + "-gram: " + sbRelsHi, 1.0);
if (usingFeature(types, checklist, "dependency_path_edge_lowlevel_n_grams"))
features.setCount("dependency_path_edge_lowlevel_" + n + "-gram: " + sbRelsLo, 1.0);
}
}
if (usingFeature(types, checklist, "dependency_path_node-edge-node-grams"))
features.setCount("dependency_path_node-edge-node-gram: " + pathLemmas.get(edge) + " -- " + generalizeRelation(edgePath.get(edge).getRelation()) + " -- " + pathLemmas.get(edge + 1), 1.0);
if (usingFeature(types, checklist, "dependency_path_node-edge-node-grams_lowlevel"))
features.setCount("dependency_path_node-edge-node-gram_lowlevel: " + pathLemmas.get(edge) + " -- " + edgePath.get(edge).getRelation() + " -- " + pathLemmas.get(edge + 1), 1.0);
if (usingFeature(types, checklist, "dependency_path_edge-node-edge-grams") && edge > 0)
features.setCount("dependency_path_edge-node-edge-gram: " + generalizeRelation(edgePath.get(edge - 1).getRelation()) + " -- " + pathLemmas.get(edge) + " -- " + generalizeRelation(edgePath.get(edge).getRelation()), 1.0);
if (usingFeature(types, checklist, "dependency_path_edge-node-edge-grams_lowlevel") && edge > 0)
features.setCount("dependency_path_edge-node-edge-gram_lowlevel: " + edgePath.get(edge - 1).getRelation() + " -- " + pathLemmas.get(edge) + " -- " + edgePath.get(edge).getRelation(), 1.0);
String dir = pathNodes.get(edge).equals(edgePath.get(edge).getDependent()) ? " -> " : " <- ";
if (usingFeature(types, checklist, "dependency_path_directed_bigrams"))
features.setCount("dependency_path_directed_bigram: " + pathLemmas.get(edge) + dir + pathLemmas.get(edge + 1), 1.0);
if (usingFeature(types, checklist, "dependency_path_edge_unigrams"))
features.setCount("dependency_path_edge_unigram: " + edgePath.get(edge).getRelation() + dir + (edge == 0 ? " - leftmost" : edge == edgePath.size() - 1 ? " - rightmost" : " - interior"), 1.0);
}
// dependency_path_length_binary: same, as binary features
if (usingFeature(types, checklist, "dependency_path_length")) {
features.setCount("dependency_path_length", edgePath.size());
}
if (usingFeature(types, checklist, "dependency_path_length_binary")) {
features.setCount("dependency_path_length_" + new DecimalFormat("00").format(edgePath.size()), 1.0);
}
if (usingFeature(types, checklist, "dependency_path_trigger")) {
List<CoreLabel> tokens = rel.getSentence().get(TokensAnnotation.class);
for (IndexedWord node : pathNodes) {
int index = node.index();
if (indecesToSkip.contains(index))
continue;
String trigger = tokens.get(index - 1).get(TriggerAnnotation.class);
if (trigger != null && trigger.startsWith("B-"))
features.incrementCount("dependency_path_trigger=" + trigger.substring(2));
}
}
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class BasicRelationFeatureFactory method dependencyPathAsList.
/*
* Under construction
*/
public static List<String> dependencyPathAsList(List<SemanticGraphEdge> edgePath, IndexedWord node, boolean generalize) {
if (edgePath == null)
return null;
List<String> path = new ArrayList<>();
for (SemanticGraphEdge edge : edgePath) {
IndexedWord nextNode;
GrammaticalRelation relation;
if (generalize) {
relation = generalizeRelation(edge.getRelation());
} else {
relation = edge.getRelation();
}
if (node.equals(edge.getDependent())) {
String v = (relation + "->").intern();
path.add(v);
nextNode = edge.getGovernor();
} else {
String v = ("<-" + relation).intern();
path.add(v);
nextNode = edge.getDependent();
}
node = nextNode;
}
return path;
}
use of edu.stanford.nlp.trees.GrammaticalRelation in project CoreNLP by stanfordnlp.
the class DependencyParser method predict.
/**
* Determine the dependency parse of the given sentence using the loaded model.
* You must first load a parser before calling this method.
*
* @throws java.lang.IllegalStateException If parser has not yet been loaded and initialized
* (see {@link #initialize(boolean)}
*/
public GrammaticalStructure predict(CoreMap sentence) {
if (system == null)
throw new IllegalStateException("Parser has not been " + "loaded and initialized; first load a model.");
DependencyTree result = predictInner(sentence);
// The rest of this method is just busy-work to convert the
// package-local representation into a CoreNLP-standard
// GrammaticalStructure.
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
List<TypedDependency> dependencies = new ArrayList<>();
IndexedWord root = new IndexedWord(new Word("ROOT"));
root.set(CoreAnnotations.IndexAnnotation.class, 0);
for (int i = 1; i <= result.n; i++) {
int head = result.getHead(i);
String label = result.getLabel(i);
IndexedWord thisWord = new IndexedWord(tokens.get(i - 1));
IndexedWord headWord = head == 0 ? root : new IndexedWord(tokens.get(head - 1));
GrammaticalRelation relation = head == 0 ? GrammaticalRelation.ROOT : makeGrammaticalRelation(label);
dependencies.add(new TypedDependency(relation, headWord, thisWord));
}
// Build GrammaticalStructure
// TODO ideally submodule should just return GrammaticalStructure
TreeGraphNode rootNode = new TreeGraphNode(root);
return makeGrammaticalStructure(dependencies, rootNode);
}
Aggregations