use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.
the class RuleBasedParser method parse.
@Override
public SceneGraph parse(SemanticGraph sg) {
SemanticGraphEnhancer.enhance(sg);
SceneGraph scene = new SceneGraph();
scene.sg = sg;
SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg);
while (matcher.find()) {
IndexedWord subj = matcher.getNode("subj");
IndexedWord obj = matcher.getNode("obj");
IndexedWord pred = matcher.getNode("pred");
String reln = matcher.getRelnString("objreln");
String predicate = getPredicate(sg, pred);
if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
predicate += reln.replace("nmod:", " ").replace("_", " ");
}
SceneGraphNode node1 = new SceneGraphNode(subj);
SceneGraphNode node2 = new SceneGraphNode(obj);
scene.addEdge(node1, node2, predicate);
}
matcher = ACL_PATTERN.matcher(sg);
while (matcher.find()) {
IndexedWord subj = matcher.getNode("subj");
IndexedWord obj = matcher.getNode("obj");
IndexedWord pred = matcher.getNode("pred");
String reln = matcher.getRelnString("objreln");
String predicate = getPredicate(sg, pred);
if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
predicate += reln.replace("nmod:", " ").replace("_", " ");
}
SceneGraphNode node1 = new SceneGraphNode(subj);
SceneGraphNode node2 = new SceneGraphNode(obj);
scene.addEdge(node1, node2, predicate);
}
SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN };
for (SemgrexPattern p : subjPredPatterns) {
matcher = p.matcher(sg);
while (matcher.find()) {
IndexedWord subj = matcher.getNode("subj");
IndexedWord pred = matcher.getNode("pred");
if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) {
IndexedWord caseMarker = sg.getChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER);
String prep = caseMarker.value();
if (sg.hasChildWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
prep = prep + " " + additionalCaseMarker.value();
}
}
SceneGraphNode node1 = new SceneGraphNode(subj);
SceneGraphNode node2 = new SceneGraphNode(pred);
scene.addEdge(node1, node2, prep);
} else {
if (!pred.lemma().equals("be")) {
SceneGraphNode node = scene.getOrAddNode(subj);
node.addAttribute(pred);
}
}
}
}
matcher = ADJ_MOD_PATTERN.matcher(sg);
while (matcher.find()) {
IndexedWord obj = matcher.getNode("obj");
IndexedWord adj = matcher.getNode("adj");
SceneGraphNode node = scene.getOrAddNode(obj);
node.addAttribute(adj);
}
matcher = ADJ_PRED_PATTERN.matcher(sg);
while (matcher.find()) {
IndexedWord obj = matcher.getNode("obj");
IndexedWord adj = matcher.getNode("adj");
SceneGraphNode node = scene.getOrAddNode(obj);
node.addAttribute(adj);
}
matcher = PP_MOD_PATTERN.matcher(sg);
while (matcher.find()) {
IndexedWord gov = matcher.getNode("gov");
IndexedWord mod = matcher.getNode("mod");
String reln = matcher.getRelnString("reln");
String predicate = reln.replace("nmod:", "").replace("_", " ");
if (predicate.equals("poss") || predicate.equals("agent")) {
continue;
}
SceneGraphNode node1 = new SceneGraphNode(gov);
SceneGraphNode node2 = new SceneGraphNode(mod);
scene.addEdge(node1, node2, predicate);
}
matcher = POSS_PATTERN.matcher(sg);
while (matcher.find()) {
IndexedWord gov = matcher.getNode("gov");
IndexedWord mod = matcher.getNode("mod");
SceneGraphNode node1 = new SceneGraphNode(mod);
SceneGraphNode node2 = new SceneGraphNode(gov);
scene.addEdge(node1, node2, "have");
}
matcher = AGENT_PATTERN.matcher(sg);
while (matcher.find()) {
IndexedWord subj = matcher.getNode("subj");
IndexedWord obj = matcher.getNode("obj");
IndexedWord pred = matcher.getNode("pred");
SceneGraphNode node1 = new SceneGraphNode(subj);
SceneGraphNode node2 = new SceneGraphNode(obj);
scene.addEdge(node1, node2, getPredicate(sg, pred));
}
return scene;
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.
the class ProcessSemgrexRequest method processRequest.
/**
* For a single request, iterate through the SemanticGraphs it
* includes, and add the results of each Semgrex operation included
* in the request.
*/
public static CoreNLPProtos.SemgrexResponse processRequest(CoreNLPProtos.SemgrexRequest request) {
ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer();
CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder();
List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList());
for (CoreNLPProtos.SemgrexRequest.Dependencies sentence : request.getQueryList()) {
CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder();
List<CoreLabel> tokens = sentence.getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList());
SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(sentence.getGraph(), tokens, "semgrex");
for (SemgrexPattern pattern : patterns) {
graphResultBuilder.addResult(matchSentence(pattern, graph));
}
responseBuilder.addResult(graphResultBuilder.build());
}
return responseBuilder.build();
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.
the class SemgrexDemo method main.
public static void main(String[] args) {
String treeString = "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
// Typically the tree is constructed by parsing or reading a
// treebank. This is just for example purposes
Tree tree = Tree.valueOf(treeString);
// This creates English uncollapsed dependencies as a
// SemanticGraph. If you are creating many SemanticGraphs, you
// should use a GrammaticalStructureFactory and use it to generate
// the intermediate GrammaticalStructure instead
SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
// Alternatively, this could have been the Chinese params or any
// other language supported. As of 2014, only English and Chinese
TreebankLangParserParams params = new EnglishTreebankParserParams();
GrammaticalStructureFactory gsf = params.treebankLanguagePack().grammaticalStructureFactory(params.treebankLanguagePack().punctuationWordRejectFilter(), params.typedDependencyHeadFinder());
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
log.info(graph);
SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<nsubj {}=B");
SemgrexMatcher matcher = semgrex.matcher(graph);
// ancestor of both "dog" and "my" via the nsubj relation
while (matcher.find()) {
log.info(matcher.getNode("A") + " <<nsubj " + matcher.getNode("B"));
}
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.
the class UniversalGrammaticalStructure method addCaseMarkerInformation.
public static final void addCaseMarkerInformation(SemanticGraph sg) {
for (SemgrexPattern p : PREP_PATTERNS) {
SemanticGraph sgCopy = sg.makeSoftCopy();
SemgrexMatcher matcher = p.matcher(sgCopy);
IndexedWord oldCaseMarker = null;
while (matcher.find()) {
IndexedWord caseMarker = matcher.getNode("c1");
if (oldCaseMarker != null && caseMarker.equals(oldCaseMarker)) {
continue;
}
IndexedWord gov = matcher.getNode("gov");
IndexedWord mod = matcher.getNode("mod");
addCaseMarkersToReln(sg, gov, mod, caseMarker);
oldCaseMarker = caseMarker;
}
}
}
use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.
the class ExtractPhraseFromPattern method printMatchedGraphsForPattern.
public void printMatchedGraphsForPattern(String filename, int maxGraphsPerPattern) throws Exception {
BufferedWriter w = new BufferedWriter(new FileWriter(filename));
for (Entry<SemgrexPattern, List<Pair<String, SemanticGraph>>> en : matchedGraphsForPattern.entrySet()) {
w.write("\n\nFor Pattern: " + en.getKey().pattern() + "\n");
int num = 0;
for (Pair<String, SemanticGraph> gEn : en.getValue()) {
num++;
if (num > maxGraphsPerPattern)
break;
w.write(gEn.first() + "\n" + gEn.second().toFormattedString() + "\n\n");
}
}
w.close();
}
Aggregations