Search in sources :

Example 21 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class FeatureExtractor method getFeatures.

private Counter<String> getFeatures(Document doc, Mention m, Map<Integer, List<Mention>> mentionsByHeadIndex) {
    Counter<String> features = new ClassicCounter<>();
    // type features
    features.incrementCount("mention-type=" + m.mentionType);
    features.incrementCount("gender=" + m.gender);
    features.incrementCount("person-fine=" + m.person);
    features.incrementCount("head-ne-type=" + m.nerString);
    List<String> singletonFeatures = m.getSingletonFeatures(dictionaries);
    for (Map.Entry<Integer, String> e : SINGLETON_FEATURES.entrySet()) {
        if (e.getKey() < singletonFeatures.size()) {
            features.incrementCount(e.getValue() + "=" + singletonFeatures.get(e.getKey()));
        }
    }
    // length and location features
    addNumeric(features, "mention-length", m.spanToString().length());
    addNumeric(features, "mention-words", m.originalSpan.size());
    addNumeric(features, "sentence-words", m.sentenceWords.size());
    features.incrementCount("sentence-words=" + bin(m.sentenceWords.size()));
    features.incrementCount("mention-position", m.mentionNum / (double) doc.predictedMentions.size());
    features.incrementCount("sentence-position", m.sentNum / (double) doc.numSentences);
    // lexical features
    CoreLabel firstWord = firstWord(m);
    CoreLabel lastWord = lastWord(m);
    CoreLabel headWord = headWord(m);
    CoreLabel prevWord = prevWord(m);
    CoreLabel nextWord = nextWord(m);
    CoreLabel prevprevWord = prevprevWord(m);
    CoreLabel nextnextWord = nextnextWord(m);
    String headPOS = getPOS(headWord);
    String firstPOS = getPOS(firstWord);
    String lastPOS = getPOS(lastWord);
    String prevPOS = getPOS(prevWord);
    String nextPOS = getPOS(nextWord);
    String prevprevPOS = getPOS(prevprevWord);
    String nextnextPOS = getPOS(nextnextWord);
    features.incrementCount("first-word=" + wordIndicator(firstWord, firstPOS));
    features.incrementCount("last-word=" + wordIndicator(lastWord, lastPOS));
    features.incrementCount("head-word=" + wordIndicator(headWord, headPOS));
    features.incrementCount("next-word=" + wordIndicator(nextWord, nextPOS));
    features.incrementCount("prev-word=" + wordIndicator(prevWord, prevPOS));
    features.incrementCount("next-bigram=" + wordIndicator(nextWord, nextnextWord, nextPOS + "_" + nextnextPOS));
    features.incrementCount("prev-bigram=" + wordIndicator(prevprevWord, prevWord, prevprevPOS + "_" + prevPOS));
    features.incrementCount("next-pos=" + nextPOS);
    features.incrementCount("prev-pos=" + prevPOS);
    features.incrementCount("first-pos=" + firstPOS);
    features.incrementCount("last-pos=" + lastPOS);
    features.incrementCount("next-pos-bigram=" + nextPOS + "_" + nextnextPOS);
    features.incrementCount("prev-pos-bigram=" + prevprevPOS + "_" + prevPOS);
    addDependencyFeatures(features, "parent", getDependencyParent(m), true);
    addFeature(features, "ends-with-head", m.headIndex == m.endIndex - 1);
    addFeature(features, "is-generic", m.originalSpan.size() == 1 && firstPOS.equals("NNS"));
    // syntax features
    IndexedWord w = m.headIndexedWord;
    String depPath = "";
    int depth = 0;
    while (w != null) {
        SemanticGraphEdge e = getDependencyParent(m, w);
        depth++;
        if (depth <= 3 && e != null) {
            depPath += (depPath.isEmpty() ? "" : "_") + e.getRelation().toString();
            features.incrementCount("dep-path=" + depPath);
            w = e.getSource();
        } else {
            w = null;
        }
    }
    if (useConstituencyParse) {
        int fullEmbeddingLevel = headEmbeddingLevel(m.contextParseTree, m.headIndex);
        int mentionEmbeddingLevel = headEmbeddingLevel(m.mentionSubTree, m.headIndex - m.startIndex);
        if (fullEmbeddingLevel != -1 && mentionEmbeddingLevel != -1) {
            features.incrementCount("mention-embedding-level=" + bin(fullEmbeddingLevel - mentionEmbeddingLevel));
            features.incrementCount("head-embedding-level=" + bin(mentionEmbeddingLevel));
        } else {
            features.incrementCount("undetermined-embedding-level");
        }
        features.incrementCount("num-embedded-nps=" + bin(numEmbeddedNps(m.mentionSubTree)));
        String syntaxPath = "";
        Tree tree = m.contextParseTree;
        Tree head = tree.getLeaves().get(m.headIndex).ancestor(1, tree);
        depth = 0;
        for (Tree node : tree.pathNodeToNode(head, tree)) {
            syntaxPath += node.value() + "-";
            features.incrementCount("syntax-path=" + syntaxPath);
            depth++;
            if (depth >= 4 || node.value().equals("S")) {
                break;
            }
        }
    }
    // mention containment features
    addFeature(features, "contained-in-other-mention", mentionsByHeadIndex.get(m.headIndex).stream().anyMatch(m2 -> m != m2 && m.insideIn(m2)));
    addFeature(features, "contains-other-mention", mentionsByHeadIndex.get(m.headIndex).stream().anyMatch(m2 -> m != m2 && m2.insideIn(m)));
    // features from dcoref rules
    addFeature(features, "bare-plural", m.originalSpan.size() == 1 && headPOS.equals("NNS"));
    addFeature(features, "quantifier-start", dictionaries.quantifiers.contains(firstWord.word().toLowerCase()));
    addFeature(features, "negative-start", firstWord.word().toLowerCase().matches("none|no|nothing|not"));
    addFeature(features, "partitive", RuleBasedCorefMentionFinder.partitiveRule(m, m.sentenceWords, dictionaries));
    addFeature(features, "adjectival-demonym", dictionaries.isAdjectivalDemonym(m.spanToString()));
    if (doc.docType != DocType.ARTICLE && m.person == Person.YOU && nextWord != null && nextWord.word().equalsIgnoreCase("know")) {
        features.incrementCount("generic-you");
    }
    return features;
}
Also used : SpeakerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SpeakerAnnotation) Tree(edu.stanford.nlp.trees.Tree) HashMap(java.util.HashMap) Random(java.util.Random) Dictionaries(edu.stanford.nlp.coref.data.Dictionaries) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Number(edu.stanford.nlp.coref.data.Dictionaries.Number) CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention) RuleBasedCorefMentionFinder(edu.stanford.nlp.coref.md.RuleBasedCorefMentionFinder) Counter(edu.stanford.nlp.stats.Counter) Map(java.util.Map) Pair(edu.stanford.nlp.util.Pair) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) CorefRules(edu.stanford.nlp.coref.CorefRules) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Properties(java.util.Properties) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) Iterator(java.util.Iterator) IOUtils(edu.stanford.nlp.io.IOUtils) DocType(edu.stanford.nlp.coref.data.Document.DocType) Set(java.util.Set) Person(edu.stanford.nlp.coref.data.Dictionaries.Person) List(java.util.List) MentionType(edu.stanford.nlp.coref.data.Dictionaries.MentionType) StringUtils(edu.stanford.nlp.util.StringUtils) CorefProperties(edu.stanford.nlp.coref.CorefProperties) Document(edu.stanford.nlp.coref.data.Document) CorefUtils(edu.stanford.nlp.coref.CorefUtils) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) CoreLabel(edu.stanford.nlp.ling.CoreLabel) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) Tree(edu.stanford.nlp.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord) HashMap(java.util.HashMap) Map(java.util.Map)

Example 22 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class OpenIE method entailmentsFromClause.

/**
   * Returns all of the entailed shortened clauses (as per natural logic) from the given clause.
   * This runs the forward entailment component of the OpenIE system only.
   * It is usually chained together with the clause splitting component: {@link OpenIE#clausesInSentence(CoreMap)}.
   *
   * @param clause The premise clause, as a sentence fragment in itself.
   *
   * @return A list of entailed clauses.
   */
@SuppressWarnings("unchecked")
public List<SentenceFragment> entailmentsFromClause(SentenceFragment clause) {
    if (clause.parseTree.isEmpty()) {
        return Collections.emptyList();
    } else {
        // Get the forward entailments
        List<SentenceFragment> list = new ArrayList<>();
        if (entailmentsPerSentence > 0) {
            list.addAll(forwardEntailer.apply(clause.parseTree, true).search().stream().map(x -> x.changeScore(x.score * clause.score)).collect(Collectors.toList()));
        }
        list.add(clause);
        // A special case for adjective entailments
        List<SentenceFragment> adjFragments = new ArrayList<>();
        SemgrexMatcher matcher = adjectivePattern.matcher(clause.parseTree);
        OUTER: while (matcher.find()) {
            // (get nodes)
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord be = matcher.getNode("be");
            IndexedWord adj = matcher.getNode("adj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pobj = matcher.getNode("pobj");
            String prep = matcher.getRelnString("prep");
            // (if the adjective, or any earlier adjective, is privative, then all bets are off)
            for (SemanticGraphEdge edge : clause.parseTree.outgoingEdgeIterable(obj)) {
                if ("amod".equals(edge.getRelation().toString()) && edge.getDependent().index() <= adj.index() && Util.PRIVATIVE_ADJECTIVES.contains(edge.getDependent().word().toLowerCase())) {
                    continue OUTER;
                }
            }
            // (create the core tree)
            SemanticGraph tree = new SemanticGraph();
            tree.addRoot(adj);
            tree.addVertex(subj);
            tree.addVertex(be);
            tree.addEdge(adj, be, GrammaticalRelation.valueOf(Language.English, "cop"), Double.NEGATIVE_INFINITY, false);
            tree.addEdge(adj, subj, GrammaticalRelation.valueOf(Language.English, "nsubj"), Double.NEGATIVE_INFINITY, false);
            // (add pp attachment, if it existed)
            if (pobj != null) {
                assert prep != null;
                tree.addEdge(adj, pobj, GrammaticalRelation.valueOf(Language.English, prep), Double.NEGATIVE_INFINITY, false);
            }
            // (check for monotonicity)
            if (adj.get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards() && be.get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards()) {
                // (add tree)
                adjFragments.add(new SentenceFragment(tree, clause.assumedTruth, false));
            }
        }
        list.addAll(adjFragments);
        return list;
    }
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 23 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class OpenIE method canonicalizeCoref.

/**
   * Create a copy of the passed parse tree, canonicalizing pronominal nodes with their canonical mention.
   * Canonical mentions are tied together with the <i>compound</i> dependency arc; otherwise, the structure of
   * the tree remains unchanged.
   *
   * @param parse The original dependency parse of the sentence.
   * @param canonicalMentionMap The map from tokens to their canonical mentions.
   *
   * @return A <b>copy</b> of the passed parse tree, with pronouns replaces with their canonical mention.
   */
private static SemanticGraph canonicalizeCoref(SemanticGraph parse, Map<CoreLabel, List<CoreLabel>> canonicalMentionMap) {
    parse = new SemanticGraph(parse);
    for (IndexedWord node : new HashSet<>(parse.vertexSet())) {
        // copy the vertex set to prevent ConcurrentModificationExceptions
        if (node.tag() != null && node.tag().startsWith("PRP")) {
            List<CoreLabel> canonicalMention = canonicalMentionMap.get(node.backingLabel());
            if (canonicalMention != null) {
                // Case: this node is a preposition with a valid antecedent.
                // 1. Save the attaching edges
                List<SemanticGraphEdge> incomingEdges = parse.incomingEdgeList(node);
                List<SemanticGraphEdge> outgoingEdges = parse.outgoingEdgeList(node);
                // 2. Remove the node
                parse.removeVertex(node);
                // 3. Add the new head word
                IndexedWord headWord = new IndexedWord(canonicalMention.get(canonicalMention.size() - 1));
                headWord.setPseudoPosition(node.pseudoPosition());
                parse.addVertex(headWord);
                for (SemanticGraphEdge edge : incomingEdges) {
                    parse.addEdge(edge.getGovernor(), headWord, edge.getRelation(), edge.getWeight(), edge.isExtra());
                }
                for (SemanticGraphEdge edge : outgoingEdges) {
                    parse.addEdge(headWord, edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra());
                }
                // 4. Add other words
                double pseudoPosition = headWord.pseudoPosition() - 1e-3;
                for (int i = canonicalMention.size() - 2; i >= 0; --i) {
                    // Create the node
                    IndexedWord dependent = new IndexedWord(canonicalMention.get(i));
                    // Set its pseudo position appropriately
                    dependent.setPseudoPosition(pseudoPosition);
                    pseudoPosition -= 1e-3;
                    // Add the node to the graph
                    parse.addVertex(dependent);
                    parse.addEdge(headWord, dependent, UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER, 1.0, false);
                }
            }
        }
    }
    return parse;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 24 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class RelationTripleSegmenter method getValidChunk.

/**
   * @see RelationTripleSegmenter#getValidSubjectChunk(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord, Optional)
   * @see RelationTripleSegmenter#getValidObjectChunk(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord, Optional)
   * @see RelationTripleSegmenter#getValidAdverbChunk(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord, Optional)
   */
@SuppressWarnings("StatementWithEmptyBody")
protected Optional<List<IndexedWord>> getValidChunk(SemanticGraph parse, IndexedWord originalRoot, Set<String> validArcs, Optional<String> ignoredArc, boolean allowExtraArcs) {
    PriorityQueue<IndexedWord> chunk = new FixedPrioritiesPriorityQueue<>();
    Set<Double> seenIndices = new HashSet<>();
    Queue<IndexedWord> fringe = new LinkedList<>();
    IndexedWord root = originalRoot;
    fringe.add(root);
    boolean isCopula = false;
    IndexedWord primaryCase = null;
    for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(originalRoot)) {
        String shortName = edge.getRelation().getShortName();
        if (shortName.equals("cop") || shortName.equals("auxpass")) {
            isCopula = true;
        }
        if (shortName.equals("case")) {
            primaryCase = edge.getDependent();
        }
    }
    while (!fringe.isEmpty()) {
        root = fringe.poll();
        chunk.add(root, -root.pseudoPosition());
        // Sanity check to prevent infinite loops
        if (seenIndices.contains(root.pseudoPosition())) {
            // TODO(gabor) Indicates a cycle in the tree!
            return Optional.empty();
        }
        seenIndices.add(root.pseudoPosition());
        // Check outgoing edges
        boolean hasConj = false;
        boolean hasCC = false;
        for (SemanticGraphEdge edge : parse.getOutEdgesSorted(root)) {
            String shortName = edge.getRelation().getShortName();
            String name = edge.getRelation().toString();
            if (shortName.startsWith("conj")) {
                hasConj = true;
            }
            if (shortName.equals("cc")) {
                hasCC = true;
            }
            //noinspection StatementWithEmptyBody
            if (isCopula && (shortName.equals("cop") || shortName.contains("subj") || shortName.equals("auxpass"))) {
            // noop; ignore nsubj, cop for extractions with copula
            } else if (edge.getDependent() == primaryCase) {
            // noop: ignore case edge
            } else if (ignoredArc.isPresent() && (ignoredArc.get().equals(name) || (ignoredArc.get().startsWith("conj") && name.equals("cc")))) {
            // noop; ignore explicitly requested noop arc, or "CC" if the noop arc is a conj:*
            } else if (!validArcs.contains(edge.getRelation().getShortName()) && !validArcs.contains(edge.getRelation().getShortName().replaceAll(":.*", ":*"))) {
                if (!allowExtraArcs) {
                    return Optional.empty();
                } else {
                // noop: just some dangling arc
                }
            } else {
                fringe.add(edge.getDependent());
            }
        }
        // Ensure that we don't have a conj without a cc, or vice versa
        if (Boolean.logicalXor(hasConj, hasCC)) {
            return Optional.empty();
        }
    }
    return Optional.of(chunk.toSortedList());
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 25 with SemanticGraphEdge

use of edu.stanford.nlp.semgraph.SemanticGraphEdge in project CoreNLP by stanfordnlp.

the class RelationTripleSegmenter method extract.

/**
   * Extract the nominal patterns from this sentence.
   *
   * @see RelationTripleSegmenter#NOUN_TOKEN_PATTERNS
   * @see RelationTripleSegmenter#NOUN_DEPENDENCY_PATTERNS
   *
   * @param parse The parse tree of the sentence to annotate.
   * @param tokens The tokens of the sentence to annotate.
   * @return A list of {@link RelationTriple}s. Note that these do not have an associated tree with them.
   */
@SuppressWarnings("unchecked")
public List<RelationTriple> extract(SemanticGraph parse, List<CoreLabel> tokens) {
    List<RelationTriple> extractions = new ArrayList<>();
    Set<Triple<Span, String, Span>> alreadyExtracted = new HashSet<>();
    //
    for (TokenSequencePattern tokenPattern : NOUN_TOKEN_PATTERNS) {
        TokenSequenceMatcher tokenMatcher = tokenPattern.matcher(tokens);
        while (tokenMatcher.find()) {
            boolean missingPrefixBe;
            boolean missingSuffixOf = false;
            // Create subject
            List<? extends CoreMap> subject = tokenMatcher.groupNodes("$subject");
            Span subjectSpan = Util.extractNER(tokens, Span.fromValues(((CoreLabel) subject.get(0)).index() - 1, ((CoreLabel) subject.get(subject.size() - 1)).index()));
            List<CoreLabel> subjectTokens = new ArrayList<>();
            for (int i : subjectSpan) {
                subjectTokens.add(tokens.get(i));
            }
            // Create object
            List<? extends CoreMap> object = tokenMatcher.groupNodes("$object");
            Span objectSpan = Util.extractNER(tokens, Span.fromValues(((CoreLabel) object.get(0)).index() - 1, ((CoreLabel) object.get(object.size() - 1)).index()));
            if (Span.overlaps(subjectSpan, objectSpan)) {
                continue;
            }
            List<CoreLabel> objectTokens = new ArrayList<>();
            for (int i : objectSpan) {
                objectTokens.add(tokens.get(i));
            }
            // Create relation
            if (subjectTokens.size() > 0 && objectTokens.size() > 0) {
                List<CoreLabel> relationTokens = new ArrayList<>();
                // (add the 'be')
                missingPrefixBe = true;
                // (add a complement to the 'be')
                List<? extends CoreMap> beofComp = tokenMatcher.groupNodes("$beof_comp");
                if (beofComp != null) {
                    // (add the complement
                    for (CoreMap token : beofComp) {
                        if (token instanceof CoreLabel) {
                            relationTokens.add((CoreLabel) token);
                        } else {
                            relationTokens.add(new CoreLabel(token));
                        }
                    }
                    // (add the 'of')
                    missingSuffixOf = true;
                }
                // Add extraction
                String relationGloss = StringUtils.join(relationTokens.stream().map(CoreLabel::word), " ");
                if (!alreadyExtracted.contains(Triple.makeTriple(subjectSpan, relationGloss, objectSpan))) {
                    RelationTriple extraction = new RelationTriple(subjectTokens, relationTokens, objectTokens);
                    //noinspection ConstantConditions
                    extraction.isPrefixBe(missingPrefixBe);
                    extraction.isSuffixOf(missingSuffixOf);
                    extractions.add(extraction);
                    alreadyExtracted.add(Triple.makeTriple(subjectSpan, relationGloss, objectSpan));
                }
            }
        }
        //
        for (SemgrexPattern semgrex : NOUN_DEPENDENCY_PATTERNS) {
            SemgrexMatcher matcher = semgrex.matcher(parse);
            while (matcher.find()) {
                boolean missingPrefixBe = false;
                boolean missingSuffixBe = false;
                boolean istmod = false;
                // Get relaux if applicable
                String relaux = matcher.getRelnString("relaux");
                String ignoredArc = relaux;
                if (ignoredArc == null) {
                    ignoredArc = matcher.getRelnString("arc");
                }
                // Create subject
                IndexedWord subject = matcher.getNode("subject");
                List<IndexedWord> subjectTokens = new ArrayList<>();
                Span subjectSpan;
                if (subject.ner() != null && !"O".equals(subject.ner())) {
                    subjectSpan = Util.extractNER(tokens, Span.fromValues(subject.index() - 1, subject.index()));
                    for (int i : subjectSpan) {
                        subjectTokens.add(new IndexedWord(tokens.get(i)));
                    }
                } else {
                    subjectTokens = getValidChunk(parse, subject, VALID_SUBJECT_ARCS, Optional.ofNullable(ignoredArc), true).orElse(Collections.singletonList(subject));
                    subjectSpan = Util.tokensToSpan(subjectTokens);
                }
                // Create object
                IndexedWord object = matcher.getNode("object");
                List<IndexedWord> objectTokens = new ArrayList<>();
                Span objectSpan;
                if (object.ner() != null && !"O".equals(object.ner())) {
                    objectSpan = Util.extractNER(tokens, Span.fromValues(object.index() - 1, object.index()));
                    for (int i : objectSpan) {
                        objectTokens.add(new IndexedWord(tokens.get(i)));
                    }
                } else {
                    objectTokens = getValidChunk(parse, object, VALID_OBJECT_ARCS, Optional.ofNullable(ignoredArc), true).orElse(Collections.singletonList(object));
                    objectSpan = Util.tokensToSpan(objectTokens);
                }
                // Check that the pair is valid
                if (Span.overlaps(subjectSpan, objectSpan)) {
                    // We extracted an identity
                    continue;
                }
                if (subjectSpan.end() == objectSpan.start() - 1 && (tokens.get(subjectSpan.end()).word().matches("[\\.,:;\\('\"]") || "CC".equals(tokens.get(subjectSpan.end()).tag()))) {
                    // We're straddling a clause
                    continue;
                }
                if (objectSpan.end() == subjectSpan.start() - 1 && (tokens.get(objectSpan.end()).word().matches("[\\.,:;\\('\"]") || "CC".equals(tokens.get(objectSpan.end()).tag()))) {
                    // We're straddling a clause
                    continue;
                }
                // Get any prepositional edges
                String expected = relaux == null ? "" : relaux.substring(relaux.indexOf(":") + 1).replace("_", " ");
                IndexedWord prepWord = null;
                // (these usually come from the object)
                boolean prepositionIsPrefix = false;
                for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(object)) {
                    if (edge.getRelation().toString().equals("case")) {
                        prepWord = edge.getDependent();
                    }
                }
                // (...but sometimes from the subject)
                if (prepWord == null) {
                    for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(subject)) {
                        if (edge.getRelation().toString().equals("case")) {
                            prepositionIsPrefix = true;
                            prepWord = edge.getDependent();
                        }
                    }
                }
                List<IndexedWord> prepChunk = Collections.EMPTY_LIST;
                if (prepWord != null && !expected.equals("tmod")) {
                    Optional<List<IndexedWord>> optionalPrepChunk = getValidChunk(parse, prepWord, Collections.singleton("mwe"), Optional.empty(), true);
                    if (!optionalPrepChunk.isPresent()) {
                        continue;
                    }
                    prepChunk = optionalPrepChunk.get();
                    Collections.sort(prepChunk, (a, b) -> {
                        double val = a.pseudoPosition() - b.pseudoPosition();
                        if (val < 0) {
                            return -1;
                        }
                        if (val > 0) {
                            return 1;
                        } else {
                            return 0;
                        }
                    });
                // ascending sort
                }
                // Get the relation
                if (subjectTokens.size() > 0 && objectTokens.size() > 0) {
                    LinkedList<IndexedWord> relationTokens = new LinkedList<>();
                    IndexedWord relNode = matcher.getNode("relation");
                    if (relNode != null) {
                        // Case: we have a grounded relation span
                        // (add the relation)
                        relationTokens.add(relNode);
                        // (add any prepositional case markings)
                        if (prepositionIsPrefix) {
                            // We're almost certainly missing a suffix 'be'
                            missingSuffixBe = true;
                            for (int i = prepChunk.size() - 1; i >= 0; --i) {
                                relationTokens.addFirst(prepChunk.get(i));
                            }
                        } else {
                            relationTokens.addAll(prepChunk);
                        }
                        if (expected.equalsIgnoreCase("tmod")) {
                            istmod = true;
                        }
                    } else {
                        // (mark it as missing a preceding 'be'
                        if (!expected.equals("poss")) {
                            missingPrefixBe = true;
                        }
                        // (add any prepositional case markings)
                        if (prepositionIsPrefix) {
                            for (int i = prepChunk.size() - 1; i >= 0; --i) {
                                relationTokens.addFirst(prepChunk.get(i));
                            }
                        } else {
                            relationTokens.addAll(prepChunk);
                        }
                        if (expected.equalsIgnoreCase("tmod")) {
                            istmod = true;
                        }
                        // (some fine-tuning)
                        if (allowNominalsWithoutNER && "of".equals(expected)) {
                            // prohibit things like "conductor of electricity" -> "conductor; be of; electricity"
                            continue;
                        }
                    }
                    // Add extraction
                    String relationGloss = StringUtils.join(relationTokens.stream().map(IndexedWord::word), " ");
                    if (!alreadyExtracted.contains(Triple.makeTriple(subjectSpan, relationGloss, objectSpan))) {
                        RelationTriple extraction = new RelationTriple(subjectTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()), relationTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()), objectTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()));
                        extraction.istmod(istmod);
                        extraction.isPrefixBe(missingPrefixBe);
                        extraction.isSuffixBe(missingSuffixBe);
                        extractions.add(extraction);
                        alreadyExtracted.add(Triple.makeTriple(subjectSpan, relationGloss, objectSpan));
                    }
                }
            }
        }
    }
    //
    // Filter downward polarity extractions
    //
    Iterator<RelationTriple> iter = extractions.iterator();
    while (iter.hasNext()) {
        RelationTriple term = iter.next();
        boolean shouldRemove = true;
        for (CoreLabel token : term) {
            if (token.get(NaturalLogicAnnotations.PolarityAnnotation.class) == null || !token.get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()) {
                shouldRemove = false;
            }
        }
        if (shouldRemove) {
            // Don't extract things in downward polarity contexts.
            iter.remove();
        }
    }
    // Return
    return extractions;
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) Span(edu.stanford.nlp.ie.machinereading.structure.Span) TokenSequencePattern(edu.stanford.nlp.ling.tokensregex.TokenSequencePattern) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) TokenSequenceMatcher(edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) CoreLabel(edu.stanford.nlp.ling.CoreLabel) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)65 IndexedWord (edu.stanford.nlp.ling.IndexedWord)52 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 CoreLabel (edu.stanford.nlp.ling.CoreLabel)15 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)15 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)11 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)10 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)8 Pair (edu.stanford.nlp.util.Pair)6 Mention (edu.stanford.nlp.coref.data.Mention)5 Span (edu.stanford.nlp.ie.machinereading.structure.Span)5 Annotation (edu.stanford.nlp.pipeline.Annotation)5 Tree (edu.stanford.nlp.trees.Tree)5 CoreMap (edu.stanford.nlp.util.CoreMap)5 HashMap (java.util.HashMap)5 Collectors (java.util.stream.Collectors)5 RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)4 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)4 IntPair (edu.stanford.nlp.util.IntPair)4 java.util (java.util)4