use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method demoteQuantificationalModifiers.
private static void demoteQuantificationalModifiers(SemanticGraph sg) {
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = QUANT_MOD_3W_PATTERN.matcher(sgCopy);
while (matcher.findNextMatchingNode()) {
IndexedWord w1 = matcher.getNode("w1");
IndexedWord w2 = matcher.getNode("w2");
IndexedWord w3 = matcher.getNode("w3");
IndexedWord gov = matcher.getNode("gov");
demoteQmodParentHelper(sg, gov, w2);
List<IndexedWord> otherDeps = Generics.newLinkedList();
otherDeps.add(w1);
otherDeps.add(w2);
otherDeps.add(w3);
demoteQmodMWEHelper(sg, otherDeps, gov, w2);
}
for (SemgrexPattern p : QUANT_MOD_2W_PATTERNS) {
sgCopy = sg.makeSoftCopy();
matcher = p.matcher(sgCopy);
while (matcher.findNextMatchingNode()) {
IndexedWord w1 = matcher.getNode("w1");
IndexedWord w2 = matcher.getNode("w2");
IndexedWord gov = matcher.getNode("gov");
demoteQmodParentHelper(sg, gov, w1);
List<IndexedWord> otherDeps = Generics.newLinkedList();
otherDeps.add(w1);
otherDeps.add(w2);
demoteQmodMWEHelper(sg, otherDeps, gov, w1);
}
}
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method addCaseMarkerInformation.
/**
* Adds the case marker(s) to all nmod, acl and advcl relations that are
* modified by one or more case markers(s).
*
* @param enhanceOnlyNmods If this is set to true, then prepositons will only be appended to nmod
* relations (and not to acl or advcl) relations.
*
* @see UniversalEnglishGrammaticalStructure#addCaseMarkersToReln
*/
private static void addCaseMarkerInformation(SemanticGraph sg, boolean enhanceOnlyNmods) {
/* Semgrexes require a graph with a root. */
if (sg.getRoots().isEmpty())
return;
/* passive agent */
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = PASSIVE_AGENT_PATTERN.matcher(sgCopy);
while (matcher.find()) {
IndexedWord caseMarker = matcher.getNode("c1");
IndexedWord gov = matcher.getNode("gov");
IndexedWord mod = matcher.getNode("mod");
addPassiveAgentToReln(sg, gov, mod, caseMarker);
}
List<IndexedWord> oldCaseMarkers = Generics.newArrayList();
/* 3-word prepositions */
for (SemgrexPattern p : PREP_MW3_PATTERNS) {
sgCopy = sg.makeSoftCopy();
matcher = p.matcher(sgCopy);
while (matcher.find()) {
if (enhanceOnlyNmods && !matcher.getRelnString("reln").equals("nmod")) {
continue;
}
List<IndexedWord> caseMarkers = Generics.newArrayList(3);
caseMarkers.add(matcher.getNode("c1"));
caseMarkers.add(matcher.getNode("c2"));
caseMarkers.add(matcher.getNode("c3"));
Collections.sort(caseMarkers);
/* We only want to match every case marker once. */
if (caseMarkers.equals(oldCaseMarkers))
continue;
IndexedWord gov = matcher.getNode("gov");
IndexedWord mod = matcher.getNode("mod");
addCaseMarkersToReln(sg, gov, mod, caseMarkers);
oldCaseMarkers = caseMarkers;
}
}
/* 2-word prepositions */
for (SemgrexPattern p : PREP_MW2_PATTERNS) {
sgCopy = sg.makeSoftCopy();
matcher = p.matcher(sgCopy);
while (matcher.find()) {
if (enhanceOnlyNmods && !matcher.getRelnString("reln").equals("nmod")) {
continue;
}
List<IndexedWord> caseMarkers = Generics.newArrayList(2);
caseMarkers.add(matcher.getNode("c1"));
caseMarkers.add(matcher.getNode("c2"));
Collections.sort(caseMarkers);
/* We only want to match every case marker once. */
if (caseMarkers.equals(oldCaseMarkers))
continue;
IndexedWord gov = matcher.getNode("gov");
IndexedWord mod = matcher.getNode("mod");
addCaseMarkersToReln(sg, gov, mod, caseMarkers);
oldCaseMarkers = caseMarkers;
}
}
/* Single-word prepositions */
for (SemgrexPattern p : PREP_PATTERNS) {
sgCopy = sg.makeSoftCopy();
matcher = p.matcher(sgCopy);
while (matcher.find()) {
if (enhanceOnlyNmods && !matcher.getRelnString("reln").equals("nmod")) {
continue;
}
List<IndexedWord> caseMarkers = Generics.newArrayList(1);
caseMarkers.add(matcher.getNode("c1"));
if (caseMarkers.equals(oldCaseMarkers))
continue;
IndexedWord gov = matcher.getNode("gov");
IndexedWord mod = matcher.getNode("mod");
addCaseMarkersToReln(sg, gov, mod, caseMarkers);
oldCaseMarkers = caseMarkers;
}
}
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method addEnhancements.
@Override
protected void addEnhancements(List<TypedDependency> list, EnhancementOptions options) {
SemanticGraph sg = new SemanticGraph(list);
if (DEBUG) {
printListSorted("addEnhancements: before correctDependencies()", sg.typedDependencies());
}
correctDependencies(sg);
if (DEBUG) {
printListSorted("addEnhancements: after correctDependencies()", sg.typedDependencies());
}
/* Turn multi-word prepositions into flat mwe. */
if (options.processMultiWordPrepositions) {
processMultiwordPreps(sg);
if (DEBUG) {
printListSorted("addEnhancements: after processMultiwordPreps()", sg.typedDependencies());
}
}
/* Turn quantificational modifiers into flat mwe. */
if (options.demoteQuantMod) {
demoteQuantificationalModifiers(sg);
if (DEBUG) {
printListSorted("addEnhancements: after demoteQuantificationalModifiers()", sg.typedDependencies());
}
}
/* Add copy nodes for conjoined Ps and PPs. */
if (options.addCopyNodes) {
expandPPConjunctions(sg);
if (DEBUG) {
printListSorted("addEnhancements: after expandPPConjunctions()", sg.typedDependencies());
}
expandPrepConjunctions(sg);
if (DEBUG) {
printListSorted("addEnhancements: after expandPrepConjunctions()", sg.typedDependencies());
}
}
/* Add propositions to relation names. */
if (options.enhancePrepositionalModifiers) {
addCaseMarkerInformation(sg, options.enhanceOnlyNmods);
if (DEBUG) {
printListSorted("addEnhancements: after addCaseMarkerInformation()", sg.typedDependencies());
}
}
/* Add coordinating conjunctions to relation names. */
if (options.enhanceConjuncts) {
addConjInformation(sg);
if (DEBUG) {
printListSorted("addEnhancements: after addConjInformation()", sg.typedDependencies());
}
}
/* Add "referent" relations. */
if (options.addReferent) {
addRef(sg);
if (DEBUG) {
printListSorted("addEnhancements: after addRef()", sg.typedDependencies());
}
collapseReferent(sg);
if (DEBUG) {
printListSorted("addEnhancements: after collapseReferent()", sg.typedDependencies());
}
}
/* Propagate dependents. */
if (options.propagateDependents) {
treatCC(sg);
if (DEBUG) {
printListSorted("addEnhancements: after treatCC()", sg.typedDependencies());
}
}
/* Add relations between controlling subjects and controlled verbs. */
if (options.addXSubj) {
addExtraNSubj(sg);
if (DEBUG) {
printListSorted("addEnhancements: after addExtraNSubj()", sg.typedDependencies());
}
}
correctSubjPass(sg);
list.clear();
list.addAll(sg.typedDependencies());
Collections.sort(list);
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method postProcessDependencies.
@Override
protected void postProcessDependencies(List<TypedDependency> list) {
SemanticGraph sg = new SemanticGraph(list);
postProcessDependencies(sg);
list.clear();
list.addAll(sg.typedDependencies());
}
use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.
the class TSVUtils method parseSentence.
/** Create an Annotation object (with a single sentence) from the given specification */
private static Annotation parseSentence(Optional<String> docid, Optional<Integer> sentenceIndex, String gloss, Function<List<CoreLabel>, SemanticGraph> tree, Function<List<CoreLabel>, SemanticGraph> maltTree, List<String> words, List<String> lemmas, List<String> pos, List<String> ner, Optional<String> sentenceid) {
// Error checks
if (lemmas.size() != words.size()) {
throw new IllegalArgumentException("Array lengths don't match: " + words.size() + " vs " + lemmas.size() + " (sentence " + sentenceid.orElse("???") + ")");
}
if (pos.size() != words.size()) {
throw new IllegalArgumentException("Array lengths don't match: " + words.size() + " vs " + pos.size() + " (sentence " + sentenceid.orElse("???") + ")");
}
if (ner.size() != words.size()) {
throw new IllegalArgumentException("Array lengths don't match: " + words.size() + " vs " + ner.size() + " (sentence " + sentenceid.orElse("???") + ")");
}
// Create structure
List<CoreLabel> tokens = new ArrayList<>(words.size());
int beginChar = 0;
for (int i = 0; i < words.size(); ++i) {
CoreLabel token = new CoreLabel(12);
token.setWord(words.get(i));
token.setValue(words.get(i));
token.setBeginPosition(beginChar);
token.setEndPosition(beginChar + words.get(i).length());
beginChar += words.get(i).length() + 1;
token.setLemma(lemmas.get(i));
token.setTag(pos.get(i));
token.setNER(ner.get(i));
token.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???"));
token.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
token.set(CoreAnnotations.IndexAnnotation.class, i + 1);
token.set(CoreAnnotations.TokenBeginAnnotation.class, i);
token.set(CoreAnnotations.TokenEndAnnotation.class, i + 1);
tokens.add(token);
}
gloss = gloss.replace("\\n", "\n").replace("\\t", "\t");
CoreMap sentence = new ArrayCoreMap(16);
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
SemanticGraph graph = tree.apply(tokens);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
SemanticGraph maltGraph = maltTree.apply(tokens);
sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, maltGraph);
sentence.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???"));
sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
sentence.set(CoreAnnotations.TextAnnotation.class, gloss);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokens.size());
Annotation doc = new Annotation(gloss);
doc.set(CoreAnnotations.TokensAnnotation.class, tokens);
doc.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
doc.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???"));
doc.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
return doc;
}
Aggregations