use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.
the class ExtractPhraseFromPattern method getSemGrexPatternNodes.
/*
* Given a SemanticGraph g and a SemgrexPattern pattern
* And a bunch of other parameters,
* run the pattern matcher (get SemgrexMatcher m)
* Iterate through to get matching words/phrases
*
* Next, gets matchedGraphsForPattern.get(pattern),
* a list of matched (String, semgraph) pairs
* and adds the new graph and tokens if matched.
*
* I need to clarify what's going on with tokens.
*/
public Set<IndexedWord> getSemGrexPatternNodes(SemanticGraph g, List<String> tokens, Collection<String> outputNodes, Collection<IntPair> outputIndices, SemgrexPattern pattern, boolean findSubTrees, Collection<ExtractedPhrase> extractedPhrases, boolean lowercase, Function<CoreLabel, Boolean> acceptWord) {
Set<IndexedWord> foundWordsParents = new HashSet<>();
SemgrexMatcher m = pattern.matcher(g, lowercase);
while (m.find()) {
IndexedWord w = m.getNode("node");
//System.out.println("found a match for " + pattern.pattern());
IndexedWord parent = m.getNode("parent");
boolean ifSatisfiedMaxDepth = checkIfSatisfiedMaxDepth(g, parent, w, new IntPair(maxDepth, 0));
if (ifSatisfiedMaxDepth == false)
continue;
if (DEBUG > 3) {
List<Pair<String, SemanticGraph>> matchedGraphs = matchedGraphsForPattern.get(pattern);
if (matchedGraphs == null)
matchedGraphs = new ArrayList<>();
matchedGraphs.add(new Pair<>(StringUtils.join(tokens, " "), g));
//if (DEBUG >= 3)
// System.out.println("matched pattern is " + pattern);
matchedGraphsForPattern.put(pattern, matchedGraphs);
}
foundWordsParents.add(parent);
// String relationName = m.getRelnString("reln");
// System.out.println("word is " + w.lemma() + " and " + w.tag());
ArrayList<IndexedWord> seenNodes = new ArrayList<>();
List<String> cutoffrelations = new ArrayList<>();
// if (elementStr.equalsIgnoreCase("technique"))
// cutoffrelations = cutoffRelationsForTech;
// if (elementStr.equalsIgnoreCase("app"))
// cutoffrelations = this.cuttoffRelationsForApp;
//System.out.println("g is ");
//g.prettyPrint();
printSubGraph(g, w, cutoffrelations, tokens, outputNodes, outputIndices, seenNodes, new ArrayList<>(), findSubTrees, extractedPhrases, pattern, acceptWord);
}
return foundWordsParents;
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.
the class SemgrexDemo method main.
public static void main(String[] args) {
String treeString = "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
// Typically the tree is constructed by parsing or reading a
// treebank. This is just for example purposes
Tree tree = Tree.valueOf(treeString);
// This creates English uncollapsed dependencies as a
// SemanticGraph. If you are creating many SemanticGraphs, you
// should use a GrammaticalStructureFactory and use it to generate
// the intermediate GrammaticalStructure instead
SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
// Alternatively, this could have been the Chinese params or any
// other language supported. As of 2014, only English and Chinese
TreebankLangParserParams params = new EnglishTreebankParserParams();
GrammaticalStructureFactory gsf = params.treebankLanguagePack().grammaticalStructureFactory(params.treebankLanguagePack().punctuationWordRejectFilter(), params.typedDependencyHeadFinder());
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
log.info(graph);
SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<nsubj {}=B");
SemgrexMatcher matcher = semgrex.matcher(graph);
// ancestor of both "dog" and "my" via the nsubj relation
while (matcher.find()) {
log.info(matcher.getNode("A") + " <<nsubj " + matcher.getNode("B"));
}
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.
the class Sentence method semgrex.
/**
* Apply a semgrex pattern to the sentence
* @param pattern The Semgrex pattern to match against.
* @param fn The action to do on each match.
* @return the list of matches, after run through the function.
*/
public <T> List<T> semgrex(SemgrexPattern pattern, Function<SemgrexMatcher, T> fn) {
SemgrexMatcher matcher = pattern.matcher(dependencyGraph());
List<T> lst = new ArrayList<>();
while (matcher.findNextMatchingNode()) {
lst.add(fn.apply(matcher));
}
return lst;
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.
the class EnglishGrammaticalStructure method correctWHAttachment.
/**
* Tries to correct complicated cases of WH-movement in
* sentences such as "What does Mary seem to have?" in
* which "What" should attach to "have" instead of the
* control verb.
*
* @param sg The Semantic graph to operate on.
*/
private static void correctWHAttachment(SemanticGraph sg) {
/* Semgrexes require a graph with a root. */
if (sg.getRoots().isEmpty())
return;
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = XCOMP_PATTERN.matcher(sgCopy);
while (matcher.findNextMatchingNode()) {
IndexedWord root = matcher.getNode("root");
IndexedWord embeddedVerb = matcher.getNode("embedded");
IndexedWord wh = matcher.getNode("wh");
IndexedWord dobj = matcher.getNode("obj");
/* Check if the object is a WH-word. */
if (wh.tag().startsWith("W")) {
boolean reattach = false;
/* If the control verb already has an object, then
we have to reattach th WH-word to the verb in the embedded clause. */
if (dobj != null) {
reattach = true;
} else {
/* If the control verb can't have an object, we also have to reattach. */
String lemma = Morphology.lemmaStatic(root.value(), root.tag());
if (lemma.matches(EnglishPatterns.NP_V_S_INF_VERBS_REGEX)) {
reattach = true;
}
}
if (reattach) {
SemanticGraphEdge edge = sg.getEdge(root, wh);
if (edge != null) {
sg.removeEdge(edge);
sg.addEdge(embeddedVerb, wh, DIRECT_OBJECT, Double.NEGATIVE_INFINITY, false);
}
}
}
}
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher in project CoreNLP by stanfordnlp.
the class UniversalEnglishGrammaticalStructure method processSimple2WP.
/**
* Processes all the two-word prepositions in TWO_WORD_PREPS_REGULAR.
*/
private static void processSimple2WP(SemanticGraph sg, HashMap<String, HashSet<Integer>> bigrams) {
for (String bigram : TWO_WORD_PREPS_REGULAR) {
if (bigrams.get(bigram) == null) {
continue;
}
for (Integer i : bigrams.get(bigram)) {
IndexedWord w1 = sg.getNodeByIndexSafe(i);
IndexedWord w2 = sg.getNodeByIndexSafe(i + 1);
if (w1 == null || w2 == null) {
continue;
}
SemgrexMatcher matcher = TWO_WORD_PREPS_REGULAR_PATTERN.matcher(sg);
IndexedWord gov = null;
while (matcher.find()) {
if (w1.equals(matcher.getNode("w1")) && w2.equals(matcher.getNode("w2"))) {
gov = matcher.getNode("gov");
break;
}
}
if (gov == null) {
continue;
}
createMultiWordExpression(sg, gov, CASE_MARKER, w1, w2);
}
}
}
Aggregations