Search in sources :

Example 36 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project Info-Evaluation by TechnionYP5777.

the class AnalyzeParagragh method Analyze.

public TableTuple Analyze() {
    /*
		 * First step is initiating the Stanford CoreNLP pipeline (the pipeline
		 * will be later used to evaluate the text and annotate it) Pipeline is
		 * initiated using a Properties object which is used for setting all
		 * needed entities, annotations, training data and so on, in order to
		 * customized the pipeline initialization to contains only the models
		 * you need
		 */
    final Properties props = new Properties();
    /*
		 * The "annotators" property key tells the pipeline which entities
		 * should be initiated with our pipeline object, See
		 * http://nlp.stanford.edu/software/corenlp.shtml for a complete
		 * reference to the "annotators" values you can set here and what they
		 * will contribute to the analyzing process
		 */
    props.put("annotators", "tokenize,ssplit, pos, regexner, parse,lemma,natlog,openie");
    final StanfordCoreNLP pipeLine = new StanfordCoreNLP(props);
    // inputText will be the text to evaluate in this example
    final String inputText = input + "";
    final Annotation document = new Annotation(inputText);
    // Finally we use the pipeline to annotate the document we created
    pipeLine.annotate(document);
    final String $ = getName();
    final String input_date = getDate(year);
    String reason = "";
    // more details about the reason. e.g - where it
    String details = "";
    // happened.
    String aux = "";
    String accurate_name = "";
    for (final CoreMap sentence : document.get(SentencesAnnotation.class)) {
        final SemanticGraph dependencies = sentence.get(CollapsedDependenciesAnnotation.class);
        for (final IndexedWord root : dependencies.getRoots()) for (final SemanticGraphEdge edge : dependencies.getOutEdgesSorted(root)) {
            final IndexedWord dep = edge.getDependent();
            final String rel = edge.getRelation() + "";
            if (!"arrested".equals(edge.getGovernor().word()))
                switch(rel) {
                    case "nmod:in":
                        details += "in" + " " + dep.word() + " ";
                        break;
                    case "nmod:during":
                        details += "during" + " " + dep.word() + " ";
                        break;
                    case "nmod:at":
                        details += "at" + " " + dep.word() + " ";
                        break;
                }
            else {
                //Finding the name in a more accurate manner:
                if ("nsubjpass".equals(rel)) {
                    for (final SemanticGraphEdge keshet : dependencies.getOutEdgesSorted(dep)) {
                        final IndexedWord dep2 = keshet.getDependent();
                        final String rel2 = keshet.getRelation() + "";
                        if ((dep2.ner() != null && "PERSON".equals(dep2.ner())) || "compound".equals(rel2) || "det".equals(rel2))
                            accurate_name += dep2.word() + " ";
                    }
                    accurate_name += dep.word();
                }
                //Finding the reason in the paragraph
                if ("advcl".equals(rel) || "advcl:for".equals(rel) || "nmod:for".equals(rel)) {
                    for (final SemanticGraphEdge keshet : dependencies.getOutEdgesSorted(dep)) {
                        final String rel2 = keshet.getRelation() + "";
                        final IndexedWord dep2 = keshet.getDependent();
                        if ("amod".equals(rel2) || "dobj".equals(rel2))
                            reason += dep2.word() + " ";
                        if ("xcomp".equals(rel2))
                            aux += " " + dep2.word();
                        switch(rel2) {
                            case "nmod:in":
                                final String longLocation = dep2.word();
                                details += "in ";
                                for (final SemanticGraphEdge keshet2 : dependencies.getOutEdgesSorted(dep2)) if ("compound".equals(keshet2.getRelation() + ""))
                                    details += keshet2.getDependent().word() + " ";
                                details += longLocation;
                                break;
                            case "nmod:during":
                                details += "during" + " " + dep2.word() + " ";
                                break;
                            case "nmod:under":
                                details += "under " + dep2.word() + " ";
                                break;
                            case "nmod:of":
                                details += "of " + dep2.word();
                                break;
                            case "nmod:at":
                                details += "at" + " " + dep2.word() + " ";
                                break;
                        }
                        if ("suspicion".equals(keshet.getSource().word()) && "acl:of".equals(rel2))
                            details += dep2.word();
                    }
                    reason += dep.word();
                    reason += aux;
                }
            }
        }
    }
    return new TableTuple(accurate_name.isEmpty() ? $ : accurate_name, input_date, (reason + " " + details).trim());
}
Also used : TableTuple(main.database.TableTuple) InteractiveTableTuple(main.database.InteractiveTableTuple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Properties(java.util.Properties) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreMap(edu.stanford.nlp.util.CoreMap) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) CollapsedDependenciesAnnotation(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 37 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class EnglishGrammaticalStructure method correctWHAttachment.

/**
   * Tries to correct complicated cases of WH-movement in
   * sentences such as "What does Mary seem to have?" in
   * which "What" should attach to "have" instead of the
   * control verb.
   *
   * @param sg The Semantic graph to operate on.
   */
private static void correctWHAttachment(SemanticGraph sg) {
    /* Semgrexes require a graph with a root. */
    if (sg.getRoots().isEmpty())
        return;
    SemanticGraph sgCopy = sg.makeSoftCopy();
    SemgrexMatcher matcher = XCOMP_PATTERN.matcher(sgCopy);
    while (matcher.findNextMatchingNode()) {
        IndexedWord root = matcher.getNode("root");
        IndexedWord embeddedVerb = matcher.getNode("embedded");
        IndexedWord wh = matcher.getNode("wh");
        IndexedWord dobj = matcher.getNode("obj");
        /* Check if the object is a WH-word. */
        if (wh.tag().startsWith("W")) {
            boolean reattach = false;
            /* If the control verb already has an object, then
           we have to reattach th WH-word to the verb in the embedded clause. */
            if (dobj != null) {
                reattach = true;
            } else {
                /* If the control verb can't have an object, we also have to reattach. */
                String lemma = Morphology.lemmaStatic(root.value(), root.tag());
                if (lemma.matches(EnglishPatterns.NP_V_S_INF_VERBS_REGEX)) {
                    reattach = true;
                }
            }
            if (reattach) {
                SemanticGraphEdge edge = sg.getEdge(root, wh);
                if (edge != null) {
                    sg.removeEdge(edge);
                    sg.addEdge(embeddedVerb, wh, DIRECT_OBJECT, Double.NEGATIVE_INFINITY, false);
                }
            }
        }
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 38 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method processNamesHelper.

private static void processNamesHelper(SemanticGraph sg, IndexedWord oldHead, List<IndexedWord> nameParts) {
    if (nameParts.size() < 1) {
        // if the named entity only spans one token, change compound relations
        // to nmod relations to get the right structure for NPs with additional modifiers
        // such as "Mrs. Clinton".
        Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead));
        for (IndexedWord child : children) {
            SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
            if (oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER) {
                sg.addEdge(oldHead, child, UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra());
                sg.removeEdge(oldEdge);
            }
        }
        return;
    }
    // sort nameParts
    Collections.sort(nameParts);
    // check whether {nameParts[0], ..., nameParts[n], oldHead} are a contiguous NP
    for (int i = nameParts.get(0).index(), end = oldHead.index(); i < end; i++) {
        IndexedWord node = sg.getNodeByIndexSafe(i);
        if (node == null) {
            return;
        }
        if (!nameParts.contains(node) && PUNCT_TAG_FILTER.test(node.tag())) {
            // not in nameParts and not a punctuation mark => not a contiguous NP
            return;
        }
    }
    IndexedWord gov = sg.getParent(oldHead);
    if (gov == null && !sg.getRoots().contains(oldHead)) {
        return;
    }
    IndexedWord newHead = nameParts.get(0);
    Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead));
    //change structure and relations
    for (IndexedWord child : children) {
        if (child == newHead) {
            // make the leftmost word the new head
            if (gov == null) {
                sg.getRoots().add(newHead);
                sg.getRoots().remove(oldHead);
            } else {
                SemanticGraphEdge oldEdge = sg.getEdge(gov, oldHead);
                sg.addEdge(gov, newHead, oldEdge.getRelation(), oldEdge.getWeight(), oldEdge.isExtra());
                sg.removeEdge(oldEdge);
            }
            // swap direction of relation between old head and new head and change it to name relation.
            SemanticGraphEdge oldEdge = sg.getEdge(oldHead, newHead);
            sg.addEdge(newHead, oldHead, UniversalEnglishGrammaticalRelations.NAME_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra());
            sg.removeEdge(oldEdge);
        } else if (nameParts.contains(child)) {
            // remove relation between the old head and part of the name
            // and introduce new relation between new head and part of the name
            SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
            sg.addEdge(newHead, child, UniversalEnglishGrammaticalRelations.NAME_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra());
            sg.removeEdge(oldEdge);
        } else {
            // attach word to new head
            SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
            //if not the entire compound is part of a named entity, attach the other tokens via an nmod relation
            GrammaticalRelation reln = oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER ? UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER : oldEdge.getRelation();
            sg.addEdge(newHead, child, reln, oldEdge.getWeight(), oldEdge.isExtra());
            sg.removeEdge(oldEdge);
        }
    }
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 39 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method expandPPConjunction.

/*
   * Used by expandPPConjunction.
   */
private static void expandPPConjunction(SemanticGraph sg, IndexedWord gov, List<IndexedWord> conjDeps, IndexedWord ccDep) {
    IndexedWord nmodGov = sg.getParent(gov);
    if (nmodGov == null)
        return;
    IndexedWord conjGov = nmodGov.getOriginal() != null ? nmodGov.getOriginal() : nmodGov;
    GrammaticalRelation rel = sg.reln(nmodGov, gov);
    List<IndexedWord> newConjDeps = Generics.newLinkedList();
    for (IndexedWord conjDep : conjDeps) {
        IndexedWord nmodGovCopy = nmodGov.makeSoftCopy();
        /* Change conj(nmod-1, nmod-2) to nmod(nmod-1-gov, nmod-2) */
        SemanticGraphEdge edge = sg.getEdge(gov, conjDep);
        if (edge != null) {
            sg.removeEdge(edge);
            sg.addEdge(nmodGovCopy, conjDep, rel, Double.NEGATIVE_INFINITY, false);
        }
        /* Add relation to copy node. */
        sg.addEdge(conjGov, nmodGovCopy, CONJUNCT, Double.NEGATIVE_INFINITY, false);
        newConjDeps.add(nmodGovCopy);
    }
    /* Attach CC node to conjGov */
    SemanticGraphEdge edge = sg.getEdge(gov, ccDep);
    if (edge != null) {
        sg.removeEdge(edge);
        sg.addEdge(conjGov, ccDep, COORDINATION, Double.NEGATIVE_INFINITY, false);
    }
    /* Add conjunction information for these relations already at this point.
     * It could be that we add several coordinating conjunctions while collapsing
     * and we might not know which conjunction belongs to which conjunct at a later
     * point.
     */
    addConjToReln(sg, conjGov, newConjDeps, ccDep);
}
Also used : GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 40 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class UniversalEnglishGrammaticalStructure method treatCC.

private static void treatCC(SemanticGraph sg) {
    // Construct a map from tree nodes to the set of typed
    // dependencies in which the node appears as dependent.
    Map<IndexedWord, Set<SemanticGraphEdge>> map = Generics.newHashMap();
    // Construct a map of tree nodes being governor of a subject grammatical
    // relation to that relation
    Map<IndexedWord, SemanticGraphEdge> subjectMap = Generics.newHashMap();
    // Construct a set of TreeGraphNodes with a passive auxiliary on them
    Set<IndexedWord> withPassiveAuxiliary = Generics.newHashSet();
    // Construct a map of tree nodes being governor of an object grammatical
    // relation to that relation
    // Map<TreeGraphNode, TypedDependency> objectMap = new
    // HashMap<TreeGraphNode, TypedDependency>();
    List<IndexedWord> rcmodHeads = Generics.newArrayList();
    List<IndexedWord> prepcDep = Generics.newArrayList();
    for (SemanticGraphEdge edge : sg.edgeIterable()) {
        if (!map.containsKey(edge.getDependent())) {
            // NB: Here and in other places below, we use a TreeSet (which extends
            // SortedSet) to guarantee that results are deterministic)
            map.put(edge.getDependent(), new TreeSet<>());
        }
        map.get(edge.getDependent()).add(edge);
        if (edge.getRelation().equals(AUX_PASSIVE_MODIFIER)) {
            withPassiveAuxiliary.add(edge.getGovernor());
        }
        // look for subjects
        if (edge.getRelation().getParent() == NOMINAL_SUBJECT || edge.getRelation().getParent() == SUBJECT || edge.getRelation().getParent() == CLAUSAL_SUBJECT) {
            if (!subjectMap.containsKey(edge.getGovernor())) {
                subjectMap.put(edge.getGovernor(), edge);
            }
        }
        // look for rcmod relations
        if (edge.getRelation() == RELATIVE_CLAUSE_MODIFIER) {
            rcmodHeads.add(edge.getGovernor());
        }
        // to avoid wrong propagation of dobj
        if (edge.getRelation().toString().startsWith("acl:") || edge.getRelation().toString().startsWith("advcl:")) {
            prepcDep.add(edge.getDependent());
        }
    }
    // log.info(map);
    // if (DEBUG) log.info("Subject map: " + subjectMap);
    // if (DEBUG) log.info("Object map: " + objectMap);
    // log.info(rcmodHeads);
    // create a new list of typed dependencies
    //Collection<TypedDependency> newTypedDeps = new ArrayList<TypedDependency>(list);
    SemanticGraph sgCopy = sg.makeSoftCopy();
    // find typed deps of form conj(gov,dep)
    for (SemanticGraphEdge edge : sgCopy.edgeIterable()) {
        if (UniversalEnglishGrammaticalRelations.getConjs().contains(edge.getRelation())) {
            IndexedWord gov = edge.getGovernor();
            IndexedWord dep = edge.getDependent();
            // look at the dep in the conjunct
            Set<SemanticGraphEdge> gov_relations = map.get(gov);
            // log.info("gov " + gov);
            if (gov_relations != null) {
                for (SemanticGraphEdge edge1 : gov_relations) {
                    // log.info("gov rel " + td1);
                    IndexedWord newGov = edge1.getGovernor();
                    // is possible to have overlapping newGov & dep
                    if (newGov.equals(dep)) {
                        continue;
                    }
                    GrammaticalRelation newRel = edge1.getRelation();
                    //TODO: Do we want to copy case markers here?
                    if (newRel != ROOT && newRel != CASE_MARKER) {
                        if (rcmodHeads.contains(gov) && rcmodHeads.contains(dep)) {
                            // to prevent wrong propagation in the case of long dependencies in relative clauses
                            if (newRel != DIRECT_OBJECT && newRel != NOMINAL_SUBJECT) {
                                if (DEBUG) {
                                    log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep + " (subj/obj case)");
                                }
                                sg.addEdge(newGov, dep, newRel, Double.NEGATIVE_INFINITY, true);
                            }
                        } else {
                            if (DEBUG) {
                                log.info("Adding new " + newRel + " dependency from " + newGov + " to " + dep);
                            }
                            sg.addEdge(newGov, dep, newRel, Double.NEGATIVE_INFINITY, true);
                        }
                    }
                }
            }
            // propagate subjects
            // look at the gov in the conjunct: if it is has a subject relation,
            // the dep is a verb and the dep doesn't have a subject relation
            // then we want to add a subject relation for the dep.
            // (By testing for the dep to be a verb, we are going to miss subject of
            // copular verbs! but
            // is it safe to relax this assumption?? i.e., just test for the subject
            // part)
            // CDM 2008: I also added in JJ, since participial verbs are often
            // tagged JJ
            String tag = dep.tag();
            if (subjectMap.containsKey(gov) && (tag.startsWith("VB") || tag.startsWith("JJ")) && !subjectMap.containsKey(dep)) {
                SemanticGraphEdge tdsubj = subjectMap.get(gov);
                // check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then
                // add nsubj (if it is tagged correctly, should do this for VBD too, but we don't)
                GrammaticalRelation relation = tdsubj.getRelation();
                if (relation == NOMINAL_PASSIVE_SUBJECT) {
                    if (isDefinitelyActive(tag)) {
                        relation = NOMINAL_SUBJECT;
                    }
                } else if (relation == CLAUSAL_PASSIVE_SUBJECT) {
                    if (isDefinitelyActive(tag)) {
                        relation = CLAUSAL_SUBJECT;
                    }
                } else if (relation == NOMINAL_SUBJECT) {
                    if (withPassiveAuxiliary.contains(dep)) {
                        relation = NOMINAL_PASSIVE_SUBJECT;
                    }
                } else if (relation == CLAUSAL_SUBJECT) {
                    if (withPassiveAuxiliary.contains(dep)) {
                        relation = CLAUSAL_PASSIVE_SUBJECT;
                    }
                }
                if (DEBUG) {
                    log.info("Adding new " + relation + " dependency from " + dep + " to " + tdsubj.getDependent() + " (subj propagation case)");
                }
                sg.addEdge(dep, tdsubj.getDependent(), relation, Double.NEGATIVE_INFINITY, true);
            }
        // propagate objects
        // cdm july 2010: This bit of code would copy a dobj from the first
        // clause to a later conjoined clause if it didn't
        // contain its own dobj or prepc. But this is too aggressive and wrong
        // if the later clause is intransitive
        // (including passivized cases) and so I think we have to not have this
        // done always, and see no good "sometimes" heuristic.
        // IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp
        // (SBAR).
        // if (objectMap.containsKey(gov) &&
        // dep.tag().startsWith("VB") && ! objectMap.containsKey(dep)
        // && ! prepcDep.contains(gov)) {
        // TypedDependency tdobj = objectMap.get(gov);
        // if (DEBUG) {
        // log.info("Adding new " + tdobj.reln() + " dependency from "
        // + dep + " to " + tdobj.dep() + " (obj propagation case)");
        // }
        // newTypedDeps.add(new TypedDependency(tdobj.reln(), dep,
        // tdobj.dep()));
        // }
        }
    }
}
Also used : SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Aggregations

SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)65 IndexedWord (edu.stanford.nlp.ling.IndexedWord)52 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 CoreLabel (edu.stanford.nlp.ling.CoreLabel)15 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)15 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)11 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)10 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)8 Pair (edu.stanford.nlp.util.Pair)6 Mention (edu.stanford.nlp.coref.data.Mention)5 Span (edu.stanford.nlp.ie.machinereading.structure.Span)5 Annotation (edu.stanford.nlp.pipeline.Annotation)5 Tree (edu.stanford.nlp.trees.Tree)5 CoreMap (edu.stanford.nlp.util.CoreMap)5 HashMap (java.util.HashMap)5 Collectors (java.util.stream.Collectors)5 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)4 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)4 IntPair (edu.stanford.nlp.util.IntPair)4 java.util (java.util)4