Search in sources :

Example 6 with RelationTriple

use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.

the class RelationTripleSegmenter method extract.

/**
   * Extract the nominal patterns from this sentence.
   *
   * @see RelationTripleSegmenter#NOUN_TOKEN_PATTERNS
   * @see RelationTripleSegmenter#NOUN_DEPENDENCY_PATTERNS
   *
   * @param parse The parse tree of the sentence to annotate.
   * @param tokens The tokens of the sentence to annotate.
   * @return A list of {@link RelationTriple}s. Note that these do not have an associated tree with them.
   */
@SuppressWarnings("unchecked")
public List<RelationTriple> extract(SemanticGraph parse, List<CoreLabel> tokens) {
    List<RelationTriple> extractions = new ArrayList<>();
    Set<Triple<Span, String, Span>> alreadyExtracted = new HashSet<>();
    //
    for (TokenSequencePattern tokenPattern : NOUN_TOKEN_PATTERNS) {
        TokenSequenceMatcher tokenMatcher = tokenPattern.matcher(tokens);
        while (tokenMatcher.find()) {
            boolean missingPrefixBe;
            boolean missingSuffixOf = false;
            // Create subject
            List<? extends CoreMap> subject = tokenMatcher.groupNodes("$subject");
            Span subjectSpan = Util.extractNER(tokens, Span.fromValues(((CoreLabel) subject.get(0)).index() - 1, ((CoreLabel) subject.get(subject.size() - 1)).index()));
            List<CoreLabel> subjectTokens = new ArrayList<>();
            for (int i : subjectSpan) {
                subjectTokens.add(tokens.get(i));
            }
            // Create object
            List<? extends CoreMap> object = tokenMatcher.groupNodes("$object");
            Span objectSpan = Util.extractNER(tokens, Span.fromValues(((CoreLabel) object.get(0)).index() - 1, ((CoreLabel) object.get(object.size() - 1)).index()));
            if (Span.overlaps(subjectSpan, objectSpan)) {
                continue;
            }
            List<CoreLabel> objectTokens = new ArrayList<>();
            for (int i : objectSpan) {
                objectTokens.add(tokens.get(i));
            }
            // Create relation
            if (subjectTokens.size() > 0 && objectTokens.size() > 0) {
                List<CoreLabel> relationTokens = new ArrayList<>();
                // (add the 'be')
                missingPrefixBe = true;
                // (add a complement to the 'be')
                List<? extends CoreMap> beofComp = tokenMatcher.groupNodes("$beof_comp");
                if (beofComp != null) {
                    // (add the complement
                    for (CoreMap token : beofComp) {
                        if (token instanceof CoreLabel) {
                            relationTokens.add((CoreLabel) token);
                        } else {
                            relationTokens.add(new CoreLabel(token));
                        }
                    }
                    // (add the 'of')
                    missingSuffixOf = true;
                }
                // Add extraction
                String relationGloss = StringUtils.join(relationTokens.stream().map(CoreLabel::word), " ");
                if (!alreadyExtracted.contains(Triple.makeTriple(subjectSpan, relationGloss, objectSpan))) {
                    RelationTriple extraction = new RelationTriple(subjectTokens, relationTokens, objectTokens);
                    //noinspection ConstantConditions
                    extraction.isPrefixBe(missingPrefixBe);
                    extraction.isSuffixOf(missingSuffixOf);
                    extractions.add(extraction);
                    alreadyExtracted.add(Triple.makeTriple(subjectSpan, relationGloss, objectSpan));
                }
            }
        }
        //
        for (SemgrexPattern semgrex : NOUN_DEPENDENCY_PATTERNS) {
            SemgrexMatcher matcher = semgrex.matcher(parse);
            while (matcher.find()) {
                boolean missingPrefixBe = false;
                boolean missingSuffixBe = false;
                boolean istmod = false;
                // Get relaux if applicable
                String relaux = matcher.getRelnString("relaux");
                String ignoredArc = relaux;
                if (ignoredArc == null) {
                    ignoredArc = matcher.getRelnString("arc");
                }
                // Create subject
                IndexedWord subject = matcher.getNode("subject");
                List<IndexedWord> subjectTokens = new ArrayList<>();
                Span subjectSpan;
                if (subject.ner() != null && !"O".equals(subject.ner())) {
                    subjectSpan = Util.extractNER(tokens, Span.fromValues(subject.index() - 1, subject.index()));
                    for (int i : subjectSpan) {
                        subjectTokens.add(new IndexedWord(tokens.get(i)));
                    }
                } else {
                    subjectTokens = getValidChunk(parse, subject, VALID_SUBJECT_ARCS, Optional.ofNullable(ignoredArc), true).orElse(Collections.singletonList(subject));
                    subjectSpan = Util.tokensToSpan(subjectTokens);
                }
                // Create object
                IndexedWord object = matcher.getNode("object");
                List<IndexedWord> objectTokens = new ArrayList<>();
                Span objectSpan;
                if (object.ner() != null && !"O".equals(object.ner())) {
                    objectSpan = Util.extractNER(tokens, Span.fromValues(object.index() - 1, object.index()));
                    for (int i : objectSpan) {
                        objectTokens.add(new IndexedWord(tokens.get(i)));
                    }
                } else {
                    objectTokens = getValidChunk(parse, object, VALID_OBJECT_ARCS, Optional.ofNullable(ignoredArc), true).orElse(Collections.singletonList(object));
                    objectSpan = Util.tokensToSpan(objectTokens);
                }
                // Check that the pair is valid
                if (Span.overlaps(subjectSpan, objectSpan)) {
                    // We extracted an identity
                    continue;
                }
                if (subjectSpan.end() == objectSpan.start() - 1 && (tokens.get(subjectSpan.end()).word().matches("[\\.,:;\\('\"]") || "CC".equals(tokens.get(subjectSpan.end()).tag()))) {
                    // We're straddling a clause
                    continue;
                }
                if (objectSpan.end() == subjectSpan.start() - 1 && (tokens.get(objectSpan.end()).word().matches("[\\.,:;\\('\"]") || "CC".equals(tokens.get(objectSpan.end()).tag()))) {
                    // We're straddling a clause
                    continue;
                }
                // Get any prepositional edges
                String expected = relaux == null ? "" : relaux.substring(relaux.indexOf(":") + 1).replace("_", " ");
                IndexedWord prepWord = null;
                // (these usually come from the object)
                boolean prepositionIsPrefix = false;
                for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(object)) {
                    if (edge.getRelation().toString().equals("case")) {
                        prepWord = edge.getDependent();
                    }
                }
                // (...but sometimes from the subject)
                if (prepWord == null) {
                    for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(subject)) {
                        if (edge.getRelation().toString().equals("case")) {
                            prepositionIsPrefix = true;
                            prepWord = edge.getDependent();
                        }
                    }
                }
                List<IndexedWord> prepChunk = Collections.EMPTY_LIST;
                if (prepWord != null && !expected.equals("tmod")) {
                    Optional<List<IndexedWord>> optionalPrepChunk = getValidChunk(parse, prepWord, Collections.singleton("mwe"), Optional.empty(), true);
                    if (!optionalPrepChunk.isPresent()) {
                        continue;
                    }
                    prepChunk = optionalPrepChunk.get();
                    Collections.sort(prepChunk, (a, b) -> {
                        double val = a.pseudoPosition() - b.pseudoPosition();
                        if (val < 0) {
                            return -1;
                        }
                        if (val > 0) {
                            return 1;
                        } else {
                            return 0;
                        }
                    });
                // ascending sort
                }
                // Get the relation
                if (subjectTokens.size() > 0 && objectTokens.size() > 0) {
                    LinkedList<IndexedWord> relationTokens = new LinkedList<>();
                    IndexedWord relNode = matcher.getNode("relation");
                    if (relNode != null) {
                        // Case: we have a grounded relation span
                        // (add the relation)
                        relationTokens.add(relNode);
                        // (add any prepositional case markings)
                        if (prepositionIsPrefix) {
                            // We're almost certainly missing a suffix 'be'
                            missingSuffixBe = true;
                            for (int i = prepChunk.size() - 1; i >= 0; --i) {
                                relationTokens.addFirst(prepChunk.get(i));
                            }
                        } else {
                            relationTokens.addAll(prepChunk);
                        }
                        if (expected.equalsIgnoreCase("tmod")) {
                            istmod = true;
                        }
                    } else {
                        // (mark it as missing a preceding 'be'
                        if (!expected.equals("poss")) {
                            missingPrefixBe = true;
                        }
                        // (add any prepositional case markings)
                        if (prepositionIsPrefix) {
                            for (int i = prepChunk.size() - 1; i >= 0; --i) {
                                relationTokens.addFirst(prepChunk.get(i));
                            }
                        } else {
                            relationTokens.addAll(prepChunk);
                        }
                        if (expected.equalsIgnoreCase("tmod")) {
                            istmod = true;
                        }
                        // (some fine-tuning)
                        if (allowNominalsWithoutNER && "of".equals(expected)) {
                            // prohibit things like "conductor of electricity" -> "conductor; be of; electricity"
                            continue;
                        }
                    }
                    // Add extraction
                    String relationGloss = StringUtils.join(relationTokens.stream().map(IndexedWord::word), " ");
                    if (!alreadyExtracted.contains(Triple.makeTriple(subjectSpan, relationGloss, objectSpan))) {
                        RelationTriple extraction = new RelationTriple(subjectTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()), relationTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()), objectTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()));
                        extraction.istmod(istmod);
                        extraction.isPrefixBe(missingPrefixBe);
                        extraction.isSuffixBe(missingSuffixBe);
                        extractions.add(extraction);
                        alreadyExtracted.add(Triple.makeTriple(subjectSpan, relationGloss, objectSpan));
                    }
                }
            }
        }
    }
    //
    // Filter downward polarity extractions
    //
    Iterator<RelationTriple> iter = extractions.iterator();
    while (iter.hasNext()) {
        RelationTriple term = iter.next();
        boolean shouldRemove = true;
        for (CoreLabel token : term) {
            if (token.get(NaturalLogicAnnotations.PolarityAnnotation.class) == null || !token.get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()) {
                shouldRemove = false;
            }
        }
        if (shouldRemove) {
            // Don't extract things in downward polarity contexts.
            iter.remove();
        }
    }
    // Return
    return extractions;
}
Also used : SemgrexMatcher(edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher) Span(edu.stanford.nlp.ie.machinereading.structure.Span) TokenSequencePattern(edu.stanford.nlp.ling.tokensregex.TokenSequencePattern) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemgrexPattern(edu.stanford.nlp.semgraph.semgrex.SemgrexPattern) TokenSequenceMatcher(edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) CoreLabel(edu.stanford.nlp.ling.CoreLabel) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 7 with RelationTriple

use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.

the class RelationTripleSegmenter method segment.

/**
   * <p>
   * Try to segment this sentence as a relation triple.
   * This sentence must already match one of a few strict patterns for a valid OpenIE extraction.
   * If it does not, then no relation triple is created.
   * That is, this is <b>not</b> a relation extractor; it is just a utility to segment what is already a
   * (subject, relation, object) triple into these three parts.
   * </p>
   *
   * <p>
   *   This method will attempt to use both the verb-centric patterns and the ACL-centric patterns.
   * </p>
   *
   * @param parse The sentence to process, as a dependency tree.
   * @param confidence An optional confidence to pass on to the relation triple.
   * @param consumeAll if true, force the entire parse to be consumed by the pattern.
   * @return A relation triple, if this sentence matches one of the patterns of a valid relation triple.
   */
public Optional<RelationTriple> segment(SemanticGraph parse, Optional<Double> confidence, boolean consumeAll) {
    // Copy and clean the tree
    parse = new SemanticGraph(parse);
    // Special case "there is <something>". Arguably this is a job for the clause splitter, but the <something> is
    // sometimes not _really_ its own clause
    IndexedWord root = parse.getFirstRoot();
    if ((root.lemma() != null && root.lemma().equalsIgnoreCase("be")) || (root.lemma() == null && ("is".equalsIgnoreCase(root.word()) || "are".equalsIgnoreCase(root.word()) || "were".equalsIgnoreCase(root.word()) || "be".equalsIgnoreCase(root.word())))) {
        // Check for the "there is" construction
        boolean foundThere = false;
        // an indicator for there being too much nonsense hanging off of the root
        boolean tooMayArcs = false;
        Optional<SemanticGraphEdge> newRoot = Optional.empty();
        for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(root)) {
            if (edge.getRelation().toString().equals("expl") && edge.getDependent().word().equalsIgnoreCase("there")) {
                foundThere = true;
            } else if (edge.getRelation().toString().equals("nsubj")) {
                newRoot = Optional.of(edge);
            } else {
                tooMayArcs = true;
            }
        }
        // Split off "there is")
        if (foundThere && newRoot.isPresent() && !tooMayArcs) {
            ClauseSplitterSearchProblem.splitToChildOfEdge(parse, newRoot.get());
        }
    }
    // Run the patterns
    Optional<RelationTriple> extraction = segmentVerb(parse, confidence, consumeAll);
    if (!extraction.isPresent()) {
        extraction = segmentACL(parse, confidence, consumeAll);
    }
    //
    if (extraction.isPresent()) {
        boolean shouldRemove = true;
        for (CoreLabel token : extraction.get()) {
            if (token.get(NaturalLogicAnnotations.PolarityAnnotation.class) == null || !token.get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()) {
                shouldRemove = false;
            }
        }
        if (shouldRemove) {
            return Optional.empty();
        }
    }
    // Return
    return extraction;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 8 with RelationTriple

use of edu.stanford.nlp.ie.util.RelationTriple in project Info-Evaluation by TechnionYP5777.

the class AnalyzeParagragh method InteractiveReasonFinding.

//This function makes the analyze process interactive with the user - he gets reasons to choose from and chooses the most fitiing one.
public LinkedList<ReasonPair> InteractiveReasonFinding() {
    LinkedList<ReasonPair> $ = new LinkedList<ReasonPair>();
    final Properties props = new Properties();
    props.put("annotators", "tokenize,ssplit, pos, regexner, parse,lemma,natlog,openie");
    final StanfordCoreNLP pipeLine = new StanfordCoreNLP(props);
    // inputText will be the text to evaluate in this example
    final String inputText = input + "";
    final Annotation document = new Annotation(inputText);
    // Finally we use the pipeline to annotate the document we created
    pipeLine.annotate(document);
    for (final CoreMap sentence : document.get(SentencesAnnotation.class)) for (RelationTriple ¢ : sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) $.add(new ReasonPair(¢.confidence, ¢.relationGloss() + " " + ¢.objectGloss()));
    return $;
}
Also used : RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) ReasonPair(main.database.ReasonPair) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap) LinkedList(java.util.LinkedList) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP) SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) CollapsedDependenciesAnnotation(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation)

Example 9 with RelationTriple

use of edu.stanford.nlp.ie.util.RelationTriple in project cogcomp-nlp by CogComp.

the class StanfordOpenIEHandler method addView.

@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
    Annotation document = new Annotation(ta.text);
    pipeline.annotate(document);
    SpanLabelView vu = new SpanLabelView(viewName, ta);
    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
        Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
        for (RelationTriple triple : triples) {
            Constituent subject = getConstituent(triple.subjectGloss(), triple.subjectTokenSpan(), sentence, ta);
            subject.addAttribute("subjectGloss", triple.subjectGloss());
            subject.addAttribute("subjectLemmaGloss", triple.subjectLemmaGloss());
            subject.addAttribute("subjectLink", triple.subjectLink());
            Constituent object = getConstituent(triple.objectGloss(), triple.objectTokenSpan(), sentence, ta);
            object.addAttribute("objectGloss", triple.objectGloss());
            object.addAttribute("objectLemmaGloss", triple.objectLemmaGloss());
            object.addAttribute("objectLink", triple.objectLink());
            Constituent relation = getConstituent(triple.relationGloss(), triple.relationTokenSpan(), sentence, ta);
            relation.addAttribute("relationGloss", triple.relationGloss());
            relation.addAttribute("relationLemmaGloss", triple.relationLemmaGloss());
            Relation subj = new Relation("subj", relation, subject, triple.confidence);
            Relation obj = new Relation("obj", relation, object, triple.confidence);
            vu.addRelation(subj);
            vu.addRelation(obj);
            vu.addConstituent(subject);
            vu.addConstituent(object);
            vu.addConstituent(relation);
        }
    }
    ta.addView(viewName, vu);
}
Also used : Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 10 with RelationTriple

use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.

the class OpenIEITest method assertExtracted.

public void assertExtracted(String expected, String text) {
    boolean found = false;
    Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
    for (RelationTriple extraction : extractions) {
        if (extraction.toString().equals("1.0\t" + expected)) {
            found = true;
        }
    }
    assertTrue("The extraction (" + expected.replace("\t", "; ") + ") was not found in '" + text + "'", found);
}
Also used : RelationTriple(edu.stanford.nlp.ie.util.RelationTriple)

Aggregations

RelationTriple (edu.stanford.nlp.ie.util.RelationTriple)20 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)13 CoreLabel (edu.stanford.nlp.ling.CoreLabel)13 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)12 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)10 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)8 CorefChain (edu.stanford.nlp.coref.data.CorefChain)8 Tree (edu.stanford.nlp.trees.Tree)8 java.util (java.util)7 Span (edu.stanford.nlp.ie.machinereading.structure.Span)6 SentimentCoreAnnotations (edu.stanford.nlp.sentiment.SentimentCoreAnnotations)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 CoreMap (edu.stanford.nlp.util.CoreMap)6 Collectors (java.util.stream.Collectors)6 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)5 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)5 IndexedWord (edu.stanford.nlp.ling.IndexedWord)5 Annotation (edu.stanford.nlp.pipeline.Annotation)5 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)5 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)4