use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class RemoveEdge method evaluate.
@Override
public void evaluate(SemanticGraph sg, SemgrexMatcher sm) {
boolean govWild = govName.equals(WILDCARD_NODE);
boolean depWild = depName.equals(WILDCARD_NODE);
IndexedWord govNode = getNamedNode(govName, sm);
IndexedWord depNode = getNamedNode(depName, sm);
if (govNode != null && depNode != null) {
SemanticGraphEdge edge = sg.getEdge(govNode, depNode, relation);
if (edge != null) {
@SuppressWarnings("unused") boolean successFlag = sg.removeEdge(edge);
}
} else if (depNode != null && govWild) {
// dep known, wildcard gov
for (SemanticGraphEdge edge : sg.incomingEdgeIterable(depNode)) {
if (edge.getRelation().equals(relation) && sg.containsEdge(edge)) {
sg.removeEdge(edge);
}
}
} else if (govNode != null && depWild) {
// gov known, wildcard dep
for (SemanticGraphEdge edge : sg.outgoingEdgeIterable(govNode)) {
if (edge.getRelation().equals(relation) && sg.containsEdge(edge)) {
sg.removeEdge(edge);
}
}
}
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class RemoveNamedEdge method evaluate.
@Override
public void evaluate(SemanticGraph sg, SemgrexMatcher sm) {
String relation = sm.getRelnString(edgeName);
IndexedWord govNode = getNamedNode(govName, sm);
IndexedWord depNode = getNamedNode(depName, sm);
SemanticGraphEdge edge = sg.getEdge(govNode, depNode, GrammaticalRelation.valueOf(relation));
if (edge != null) {
sg.removeEdge(edge);
}
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class DocumentPreprocessor method findSyntacticRelationsFromDependency.
private static void findSyntacticRelationsFromDependency(List<Mention> orderedMentions) {
if (orderedMentions.size() == 0)
return;
markListMemberRelation(orderedMentions);
SemanticGraph dependency = orderedMentions.get(0).enhancedDependency;
// apposition
Set<Pair<Integer, Integer>> appos = Generics.newHashSet();
List<SemanticGraphEdge> appositions = dependency.findAllRelns(UniversalEnglishGrammaticalRelations.APPOSITIONAL_MODIFIER);
for (SemanticGraphEdge edge : appositions) {
int sIdx = edge.getSource().index() - 1;
int tIdx = edge.getTarget().index() - 1;
appos.add(Pair.makePair(sIdx, tIdx));
}
markMentionRelation(orderedMentions, appos, "APPOSITION");
// predicate nominatives
Set<Pair<Integer, Integer>> preNomi = Generics.newHashSet();
List<SemanticGraphEdge> copula = dependency.findAllRelns(UniversalEnglishGrammaticalRelations.COPULA);
for (SemanticGraphEdge edge : copula) {
IndexedWord source = edge.getSource();
IndexedWord target = dependency.getChildWithReln(source, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT);
if (target == null)
target = dependency.getChildWithReln(source, UniversalEnglishGrammaticalRelations.CLAUSAL_SUBJECT);
// TODO
if (target == null)
continue;
// to handle relative clause: e.g., Tim who is a student,
if (target.tag().startsWith("W")) {
IndexedWord parent = dependency.getParent(source);
if (parent != null && dependency.reln(parent, source).equals(UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER)) {
target = parent;
}
}
int sIdx = source.index() - 1;
int tIdx = target.index() - 1;
preNomi.add(Pair.makePair(tIdx, sIdx));
}
markMentionRelation(orderedMentions, preNomi, "PREDICATE_NOMINATIVE");
// relative pronouns TODO
Set<Pair<Integer, Integer>> relativePronounPairs = Generics.newHashSet();
markMentionRelation(orderedMentions, relativePronounPairs, "RELATIVE_PRONOUN");
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class EmbeddingExtractor method getMentionEmbeddings.
public SimpleMatrix getMentionEmbeddings(Mention m, SimpleMatrix docEmbedding) {
Iterator<SemanticGraphEdge> depIterator = m.enhancedDependency.incomingEdgeIterator(m.headIndexedWord);
SemanticGraphEdge depRelation = depIterator.hasNext() ? depIterator.next() : null;
return NeuralUtils.concatenate(getAverageEmbedding(m.sentenceWords, m.startIndex, m.endIndex), getAverageEmbedding(m.sentenceWords, m.startIndex - 5, m.startIndex), getAverageEmbedding(m.sentenceWords, m.endIndex, m.endIndex + 5), getAverageEmbedding(m.sentenceWords.subList(0, m.sentenceWords.size() - 1)), docEmbedding, getWordEmbedding(m.sentenceWords, m.headIndex), getWordEmbedding(m.sentenceWords, m.startIndex), getWordEmbedding(m.sentenceWords, m.endIndex - 1), getWordEmbedding(m.sentenceWords, m.startIndex - 1), getWordEmbedding(m.sentenceWords, m.endIndex), getWordEmbedding(m.sentenceWords, m.startIndex - 2), getWordEmbedding(m.sentenceWords, m.endIndex + 1), getWordEmbedding(depRelation == null ? null : depRelation.getSource().word()));
}
use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.
the class FeatureExtractor method getFeatures.
private Counter<String> getFeatures(Document doc, Mention m1, Mention m2) {
assert (m1.appearEarlierThan(m2));
Counter<String> features = new ClassicCounter<>();
// global features
features.incrementCount("bias");
if (useDocSource) {
features.incrementCount("doc-type=" + doc.docType);
if (doc.docInfo != null && doc.docInfo.containsKey("DOC_ID")) {
features.incrementCount("doc-source=" + doc.docInfo.get("DOC_ID").split("/")[1]);
}
}
// singleton feature conjunctions
List<String> singletonFeatures1 = m1.getSingletonFeatures(dictionaries);
List<String> singletonFeatures2 = m2.getSingletonFeatures(dictionaries);
for (Map.Entry<Integer, String> e : SINGLETON_FEATURES.entrySet()) {
if (e.getKey() < singletonFeatures1.size() && e.getKey() < singletonFeatures2.size()) {
features.incrementCount(e.getValue() + "=" + singletonFeatures1.get(e.getKey()) + "_" + singletonFeatures2.get(e.getKey()));
}
}
SemanticGraphEdge p1 = getDependencyParent(m1);
SemanticGraphEdge p2 = getDependencyParent(m2);
features.incrementCount("dep-relations=" + (p1 == null ? "null" : p1.getRelation()) + "_" + (p2 == null ? "null" : p2.getRelation()));
features.incrementCount("roles=" + getRole(m1) + "_" + getRole(m2));
CoreLabel headCL1 = headWord(m1);
CoreLabel headCL2 = headWord(m2);
String headPOS1 = getPOS(headCL1);
String headPOS2 = getPOS(headCL2);
features.incrementCount("head-pos-s=" + headPOS1 + "_" + headPOS2);
features.incrementCount("head-words=" + wordIndicator("h_" + headCL1.word().toLowerCase() + "_" + headCL2.word().toLowerCase(), headPOS1 + "_" + headPOS2));
// agreement features
addFeature(features, "animacies-agree", m2.animaciesAgree(m1));
addFeature(features, "attributes-agree", m2.attributesAgree(m1, dictionaries));
addFeature(features, "entity-types-agree", m2.entityTypesAgree(m1, dictionaries));
addFeature(features, "numbers-agree", m2.numbersAgree(m1));
addFeature(features, "genders-agree", m2.gendersAgree(m1));
addFeature(features, "ner-strings-equal", m1.nerString.equals(m2.nerString));
// string matching features
addFeature(features, "antecedent-head-in-anaphor", headContainedIn(m1, m2));
addFeature(features, "anaphor-head-in-antecedent", headContainedIn(m2, m1));
if (m1.mentionType != MentionType.PRONOMINAL && m2.mentionType != MentionType.PRONOMINAL) {
addFeature(features, "antecedent-in-anaphor", m2.spanToString().toLowerCase().contains(m1.spanToString().toLowerCase()));
addFeature(features, "anaphor-in-antecedent", m1.spanToString().toLowerCase().contains(m2.spanToString().toLowerCase()));
addFeature(features, "heads-equal", m1.headString.equalsIgnoreCase(m2.headString));
addFeature(features, "heads-agree", m2.headsAgree(m1));
addFeature(features, "exact-match", m1.toString().trim().toLowerCase().equals(m2.toString().trim().toLowerCase()));
addFeature(features, "partial-match", relaxedStringMatch(m1, m2));
double editDistance = StringUtils.editDistance(m1.spanToString(), m2.spanToString()) / (double) (m1.spanToString().length() + m2.spanToString().length());
features.incrementCount("edit-distance", editDistance);
features.incrementCount("edit-distance=" + ((int) (editDistance * 10) / 10.0));
double headEditDistance = StringUtils.editDistance(m1.headString, m2.headString) / (double) (m1.headString.length() + m2.headString.length());
features.incrementCount("head-edit-distance", headEditDistance);
features.incrementCount("head-edit-distance=" + ((int) (headEditDistance * 10) / 10.0));
}
// distance features
addNumeric(features, "mention-distance", m2.mentionNum - m1.mentionNum);
addNumeric(features, "sentence-distance", m2.sentNum - m1.sentNum);
if (m2.sentNum == m1.sentNum) {
addNumeric(features, "word-distance", m2.startIndex - m1.endIndex);
if (m1.endIndex > m2.startIndex) {
features.incrementCount("spans-intersect");
}
}
// setup for dcoref features
Set<Mention> ms1 = new HashSet<>();
ms1.add(m1);
Set<Mention> ms2 = new HashSet<>();
ms2.add(m2);
Random r = new Random();
CorefCluster c1 = new CorefCluster(20000 + r.nextInt(10000), ms1);
CorefCluster c2 = new CorefCluster(10000 + r.nextInt(10000), ms2);
String s2 = m2.lowercaseNormalizedSpanString();
String s1 = m1.lowercaseNormalizedSpanString();
// discourse dcoref features
addFeature(features, "mention-speaker-PER0", m2.headWord.get(SpeakerAnnotation.class).equalsIgnoreCase("PER0"));
addFeature(features, "antecedent-is-anaphor-speaker", CorefRules.antecedentIsMentionSpeaker(doc, m2, m1, dictionaries));
addFeature(features, "same-speaker", CorefRules.entitySameSpeaker(doc, m2, m1));
addFeature(features, "person-disagree-same-speaker", CorefRules.entityPersonDisagree(doc, m2, m1, dictionaries) && CorefRules.entitySameSpeaker(doc, m2, m1));
addFeature(features, "antecedent-matches-anaphor-speaker", CorefRules.antecedentMatchesMentionSpeakerAnnotation(m2, m1, doc));
addFeature(features, "discourse-you-PER0", m2.person == Person.YOU && doc.docType == DocType.ARTICLE && m2.headWord.get(CoreAnnotations.SpeakerAnnotation.class).equals("PER0"));
addFeature(features, "speaker-match-i-i", m2.number == Number.SINGULAR && dictionaries.firstPersonPronouns.contains(s1) && m1.number == Number.SINGULAR && dictionaries.firstPersonPronouns.contains(s2) && CorefRules.entitySameSpeaker(doc, m2, m1));
addFeature(features, "speaker-match-speaker-i", m2.number == Number.SINGULAR && dictionaries.firstPersonPronouns.contains(s2) && CorefRules.antecedentIsMentionSpeaker(doc, m2, m1, dictionaries));
addFeature(features, "speaker-match-i-speaker", m1.number == Number.SINGULAR && dictionaries.firstPersonPronouns.contains(s1) && CorefRules.antecedentIsMentionSpeaker(doc, m1, m2, dictionaries));
addFeature(features, "speaker-match-you-you", dictionaries.secondPersonPronouns.contains(s1) && dictionaries.secondPersonPronouns.contains(s2) && CorefRules.entitySameSpeaker(doc, m2, m1));
addFeature(features, "discourse-between-two-person", ((m2.person == Person.I && m1.person == Person.YOU || (m2.person == Person.YOU && m1.person == Person.I)) && (m2.headWord.get(CoreAnnotations.UtteranceAnnotation.class) - m1.headWord.get(CoreAnnotations.UtteranceAnnotation.class) == 1) && doc.docType == DocType.CONVERSATION));
addFeature(features, "incompatible-not-match", m1.person != Person.I && m2.person != Person.I && (CorefRules.antecedentIsMentionSpeaker(doc, m1, m2, dictionaries) || CorefRules.antecedentIsMentionSpeaker(doc, m2, m1, dictionaries)));
int utteranceDist = Math.abs(m1.headWord.get(CoreAnnotations.UtteranceAnnotation.class) - m2.headWord.get(CoreAnnotations.UtteranceAnnotation.class));
if (doc.docType != DocType.ARTICLE && utteranceDist == 1 && !CorefRules.entitySameSpeaker(doc, m2, m1)) {
addFeature(features, "speaker-mismatch-i-i", m1.person == Person.I && m2.person == Person.I);
addFeature(features, "speaker-mismatch-you-you", m1.person == Person.YOU && m2.person == Person.YOU);
addFeature(features, "speaker-mismatch-we-we", m1.person == Person.WE && m2.person == Person.WE);
}
// other dcoref features
String firstWord1 = firstWord(m1).word().toLowerCase();
addFeature(features, "indefinite-article-np", (m1.appositions == null && m1.predicateNominatives == null && (firstWord1.equals("a") || firstWord1.equals("an"))));
addFeature(features, "far-this", m2.lowercaseNormalizedSpanString().equals("this") && Math.abs(m2.sentNum - m1.sentNum) > 3);
addFeature(features, "per0-you-in-article", m2.person == Person.YOU && doc.docType == DocType.ARTICLE && m2.headWord.get(CoreAnnotations.SpeakerAnnotation.class).equals("PER0"));
addFeature(features, "inside-in", m2.insideIn(m1) || m1.insideIn(m2));
addFeature(features, "indefinite-determiners", dictionaries.indefinitePronouns.contains(m1.originalSpan.get(0).lemma()) || dictionaries.indefinitePronouns.contains(m2.originalSpan.get(0).lemma()));
addFeature(features, "entity-attributes-agree", CorefRules.entityAttributesAgree(c2, c1));
addFeature(features, "entity-token-distance", CorefRules.entityTokenDistance(m2, m1));
addFeature(features, "i-within-i", CorefRules.entityIWithinI(m2, m1, dictionaries));
addFeature(features, "exact-string-match", CorefRules.entityExactStringMatch(c2, c1, dictionaries, doc.roleSet));
addFeature(features, "entity-relaxed-heads-agree", CorefRules.entityRelaxedHeadsAgreeBetweenMentions(c2, c1, m2, m1));
addFeature(features, "is-acronym", CorefRules.entityIsAcronym(doc, c2, c1));
addFeature(features, "demonym", m2.isDemonym(m1, dictionaries));
addFeature(features, "incompatible-modifier", CorefRules.entityHaveIncompatibleModifier(m2, m1));
addFeature(features, "head-lemma-match", m1.headWord.lemma().equals(m2.headWord.lemma()));
addFeature(features, "words-included", CorefRules.entityWordsIncluded(c2, c1, m2, m1));
addFeature(features, "extra-proper-noun", CorefRules.entityHaveExtraProperNoun(m2, m1, new HashSet<>()));
addFeature(features, "number-in-later-mentions", CorefRules.entityNumberInLaterMention(m2, m1));
addFeature(features, "sentence-context-incompatible", CorefRules.sentenceContextIncompatible(m2, m1, dictionaries));
// syntax features
if (useConstituencyParse) {
if (m1.sentNum == m2.sentNum) {
int clauseCount = 0;
Tree tree = m2.contextParseTree;
Tree current = m2.mentionSubTree;
while (true) {
current = current.ancestor(1, tree);
if (current.label().value().startsWith("S")) {
clauseCount++;
}
if (current.dominates(m1.mentionSubTree)) {
break;
}
if (current.label().value().equals("ROOT") || current.ancestor(1, tree) == null) {
break;
}
}
features.incrementCount("clause-count", clauseCount);
features.incrementCount("clause-count=" + bin(clauseCount));
}
if (RuleBasedCorefMentionFinder.isPleonastic(m2, m2.contextParseTree) || RuleBasedCorefMentionFinder.isPleonastic(m1, m1.contextParseTree)) {
features.incrementCount("pleonastic-it");
}
if (maximalNp(m1.mentionSubTree) == maximalNp(m2.mentionSubTree)) {
features.incrementCount("same-maximal-np");
}
boolean m1Embedded = headEmbeddingLevel(m1.mentionSubTree, m1.headIndex - m1.startIndex) > 1;
boolean m2Embedded = headEmbeddingLevel(m2.mentionSubTree, m2.headIndex - m2.startIndex) > 1;
features.incrementCount("embedding=" + m1Embedded + "_" + m2Embedded);
}
return features;
}
Aggregations