Search in sources :

Example 16 with SemgrexPattern

use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.

the class RuleBasedParser method parse.

@Override
public SceneGraph parse(SemanticGraph sg) {
    SemanticGraphEnhancer.enhance(sg);
    SceneGraph scene = new SceneGraph();
    scene.sg = sg;
    SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg);
    while (matcher.find()) {
        IndexedWord subj = matcher.getNode("subj");
        IndexedWord obj = matcher.getNode("obj");
        IndexedWord pred = matcher.getNode("pred");
        String reln = matcher.getRelnString("objreln");
        String predicate = getPredicate(sg, pred);
        if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
            predicate += reln.replace("nmod:", " ").replace("_", " ");
        }
        SceneGraphNode node1 = new SceneGraphNode(subj);
        SceneGraphNode node2 = new SceneGraphNode(obj);
        scene.addEdge(node1, node2, predicate);
    }
    matcher = ACL_PATTERN.matcher(sg);
    while (matcher.find()) {
        IndexedWord subj = matcher.getNode("subj");
        IndexedWord obj = matcher.getNode("obj");
        IndexedWord pred = matcher.getNode("pred");
        String reln = matcher.getRelnString("objreln");
        String predicate = getPredicate(sg, pred);
        if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
            predicate += reln.replace("nmod:", " ").replace("_", " ");
        }
        SceneGraphNode node1 = new SceneGraphNode(subj);
        SceneGraphNode node2 = new SceneGraphNode(obj);
        scene.addEdge(node1, node2, predicate);
    }
    SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN };
    for (SemgrexPattern p : subjPredPatterns) {
        matcher = p.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord pred = matcher.getNode("pred");
            if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) {
                IndexedWord caseMarker = sg.getChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER);
                String prep = caseMarker.value();
                if (sg.hasChildWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                    for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                        prep = prep + " " + additionalCaseMarker.value();
                    }
                }
                SceneGraphNode node1 = new SceneGraphNode(subj);
                SceneGraphNode node2 = new SceneGraphNode(pred);
                scene.addEdge(node1, node2, prep);
            } else {
                if (!pred.lemma().equals("be")) {
                    SceneGraphNode node = scene.getOrAddNode(subj);
                    node.addAttribute(pred);
                }
            }
        }
    }
    matcher = ADJ_MOD_PATTERN.matcher(sg);
    while (matcher.find()) {
        IndexedWord obj = matcher.getNode("obj");
        IndexedWord adj = matcher.getNode("adj");
        SceneGraphNode node = scene.getOrAddNode(obj);
        node.addAttribute(adj);
    }
    matcher = ADJ_PRED_PATTERN.matcher(sg);
    while (matcher.find()) {
        IndexedWord obj = matcher.getNode("obj");
        IndexedWord adj = matcher.getNode("adj");
        SceneGraphNode node = scene.getOrAddNode(obj);
        node.addAttribute(adj);
    }
    matcher = PP_MOD_PATTERN.matcher(sg);
    while (matcher.find()) {
        IndexedWord gov = matcher.getNode("gov");
        IndexedWord mod = matcher.getNode("mod");
        String reln = matcher.getRelnString("reln");
        String predicate = reln.replace("nmod:", "").replace("_", " ");
        if (predicate.equals("poss") || predicate.equals("agent")) {
            continue;
        }
        SceneGraphNode node1 = new SceneGraphNode(gov);
        SceneGraphNode node2 = new SceneGraphNode(mod);
        scene.addEdge(node1, node2, predicate);
    }
    matcher = POSS_PATTERN.matcher(sg);
    while (matcher.find()) {
        IndexedWord gov = matcher.getNode("gov");
        IndexedWord mod = matcher.getNode("mod");
        SceneGraphNode node1 = new SceneGraphNode(mod);
        SceneGraphNode node2 = new SceneGraphNode(gov);
        scene.addEdge(node1, node2, "have");
    }
    matcher = AGENT_PATTERN.matcher(sg);
    while (matcher.find()) {
        IndexedWord subj = matcher.getNode("subj");
        IndexedWord obj = matcher.getNode("obj");
        IndexedWord pred = matcher.getNode("pred");
        SceneGraphNode node1 = new SceneGraphNode(subj);
        SceneGraphNode node2 = new SceneGraphNode(obj);
        scene.addEdge(node1, node2, getPredicate(sg, pred));
    }
    return scene;
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 17 with SemgrexPattern

use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.

the class ProcessSemgrexRequest method processRequest.

/**
 * For a single request, iterate through the SemanticGraphs it
 * includes, and add the results of each Semgrex operation included
 * in the request.
 */
public static CoreNLPProtos.SemgrexResponse processRequest(CoreNLPProtos.SemgrexRequest request) {
    ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer();
    CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder();
    List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList());
    for (CoreNLPProtos.SemgrexRequest.Dependencies sentence : request.getQueryList()) {
        CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder();
        List<CoreLabel> tokens = sentence.getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList());
        SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(sentence.getGraph(), tokens, "semgrex");
        for (SemgrexPattern pattern : patterns) {
            graphResultBuilder.addResult(matchSentence(pattern, graph));
        }
        responseBuilder.addResult(graphResultBuilder.build());
    }
    return responseBuilder.build();
}
Also used : SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) CoreLabel(edu.stanford.nlp.ling.CoreLabel) ProtobufAnnotationSerializer(edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer) CoreNLPProtos(edu.stanford.nlp.pipeline.CoreNLPProtos) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph)

Example 18 with SemgrexPattern

use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.

the class SemgrexDemo method main.

public static void main(String[] args) {
    String treeString = "(ROOT  (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
    // Typically the tree is constructed by parsing or reading a
    // treebank.  This is just for example purposes
    Tree tree = Tree.valueOf(treeString);
    // This creates English uncollapsed dependencies as a
    // SemanticGraph.  If you are creating many SemanticGraphs, you
    // should use a GrammaticalStructureFactory and use it to generate
    // the intermediate GrammaticalStructure instead
    SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
    // Alternatively, this could have been the Chinese params or any
    // other language supported.  As of 2014, only English and Chinese
    TreebankLangParserParams params = new EnglishTreebankParserParams();
    GrammaticalStructureFactory gsf = params.treebankLanguagePack().grammaticalStructureFactory(params.treebankLanguagePack().punctuationWordRejectFilter(), params.typedDependencyHeadFinder());
    GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
    log.info(graph);
    SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<nsubj {}=B");
    SemgrexMatcher matcher = semgrex.matcher(graph);
    // ancestor of both "dog" and "my" via the nsubj relation
    while (matcher.find()) {
        log.info(matcher.getNode("A") + " <<nsubj " + matcher.getNode("B"));
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) GrammaticalStructureFactory(edu.stanford.nlp.trees.GrammaticalStructureFactory) GrammaticalStructure(edu.stanford.nlp.trees.GrammaticalStructure) Tree(edu.stanford.nlp.trees.Tree) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) EnglishTreebankParserParams(edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams)

Example 19 with SemgrexPattern

use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.

the class UniversalGrammaticalStructure method addCaseMarkerInformation.

public static final void addCaseMarkerInformation(SemanticGraph sg) {
    for (SemgrexPattern p : PREP_PATTERNS) {
        SemanticGraph sgCopy = sg.makeSoftCopy();
        SemgrexMatcher matcher = p.matcher(sgCopy);
        IndexedWord oldCaseMarker = null;
        while (matcher.find()) {
            IndexedWord caseMarker = matcher.getNode("c1");
            if (oldCaseMarker != null && caseMarker.equals(oldCaseMarker)) {
                continue;
            }
            IndexedWord gov = matcher.getNode("gov");
            IndexedWord mod = matcher.getNode("mod");
            addCaseMarkersToReln(sg, gov, mod, caseMarker);
            oldCaseMarker = caseMarker;
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 20 with SemgrexPattern

use of edu.stanford.nlp.semgraph.semgrex.SemgrexPattern in project CoreNLP by stanfordnlp.

the class ExtractPhraseFromPattern method printMatchedGraphsForPattern.

public void printMatchedGraphsForPattern(String filename, int maxGraphsPerPattern) throws Exception {
    BufferedWriter w = new BufferedWriter(new FileWriter(filename));
    for (Entry<SemgrexPattern, List<Pair<String, SemanticGraph>>> en : matchedGraphsForPattern.entrySet()) {
        w.write("\n\nFor Pattern: " + en.getKey().pattern() + "\n");
        int num = 0;
        for (Pair<String, SemanticGraph> gEn : en.getValue()) {
            num++;
            if (num > maxGraphsPerPattern)
                break;
            w.write(gEn.first() + "\n" + gEn.second().toFormattedString() + "\n\n");
        }
    }
    w.close();
}
Also used : SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) FileWriter(java.io.FileWriter) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) ArrayList(java.util.ArrayList) List(java.util.List) BufferedWriter(java.io.BufferedWriter)

Aggregations

SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)21 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)12 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)12 IndexedWord (edu.stanford.nlp.ling.IndexedWord)11 CoreLabel (edu.stanford.nlp.ling.CoreLabel)6 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)5 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)3 TwoDimensionalCounter (edu.stanford.nlp.stats.TwoDimensionalCounter)3 Span (edu.stanford.nlp.ie.machinereading.structure.Span)2 TokenSequencePattern (edu.stanford.nlp.ling.tokensregex.TokenSequencePattern)2 CandidatePhrase (edu.stanford.nlp.patterns.CandidatePhrase)2 DataInstance (edu.stanford.nlp.patterns.DataInstance)2 Pattern (edu.stanford.nlp.patterns.Pattern)2 PatternsAnnotations (edu.stanford.nlp.patterns.PatternsAnnotations)2 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)2 CollectionValuedMap (edu.stanford.nlp.util.CollectionValuedMap)2 IntPair (edu.stanford.nlp.util.IntPair)2 Pair (edu.stanford.nlp.util.Pair)2 Triple (edu.stanford.nlp.util.Triple)2 ArrayList (java.util.ArrayList)2