use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class RelationTripleSegmenter method extract.
/**
* Extract the nominal patterns from this sentence.
*
* @see RelationTripleSegmenter#NOUN_TOKEN_PATTERNS
* @see RelationTripleSegmenter#NOUN_DEPENDENCY_PATTERNS
*
* @param parse The parse tree of the sentence to annotate.
* @param tokens The tokens of the sentence to annotate.
* @return A list of {@link RelationTriple}s. Note that these do not have an associated tree with them.
*/
@SuppressWarnings("unchecked")
public List<RelationTriple> extract(SemanticGraph parse, List<CoreLabel> tokens) {
List<RelationTriple> extractions = new ArrayList<>();
Set<Triple<Span, String, Span>> alreadyExtracted = new HashSet<>();
//
for (TokenSequencePattern tokenPattern : NOUN_TOKEN_PATTERNS) {
TokenSequenceMatcher tokenMatcher = tokenPattern.matcher(tokens);
while (tokenMatcher.find()) {
boolean missingPrefixBe;
boolean missingSuffixOf = false;
// Create subject
List<? extends CoreMap> subject = tokenMatcher.groupNodes("$subject");
Span subjectSpan = Util.extractNER(tokens, Span.fromValues(((CoreLabel) subject.get(0)).index() - 1, ((CoreLabel) subject.get(subject.size() - 1)).index()));
List<CoreLabel> subjectTokens = new ArrayList<>();
for (int i : subjectSpan) {
subjectTokens.add(tokens.get(i));
}
// Create object
List<? extends CoreMap> object = tokenMatcher.groupNodes("$object");
Span objectSpan = Util.extractNER(tokens, Span.fromValues(((CoreLabel) object.get(0)).index() - 1, ((CoreLabel) object.get(object.size() - 1)).index()));
if (Span.overlaps(subjectSpan, objectSpan)) {
continue;
}
List<CoreLabel> objectTokens = new ArrayList<>();
for (int i : objectSpan) {
objectTokens.add(tokens.get(i));
}
// Create relation
if (subjectTokens.size() > 0 && objectTokens.size() > 0) {
List<CoreLabel> relationTokens = new ArrayList<>();
// (add the 'be')
missingPrefixBe = true;
// (add a complement to the 'be')
List<? extends CoreMap> beofComp = tokenMatcher.groupNodes("$beof_comp");
if (beofComp != null) {
// (add the complement
for (CoreMap token : beofComp) {
if (token instanceof CoreLabel) {
relationTokens.add((CoreLabel) token);
} else {
relationTokens.add(new CoreLabel(token));
}
}
// (add the 'of')
missingSuffixOf = true;
}
// Add extraction
String relationGloss = StringUtils.join(relationTokens.stream().map(CoreLabel::word), " ");
if (!alreadyExtracted.contains(Triple.makeTriple(subjectSpan, relationGloss, objectSpan))) {
RelationTriple extraction = new RelationTriple(subjectTokens, relationTokens, objectTokens);
//noinspection ConstantConditions
extraction.isPrefixBe(missingPrefixBe);
extraction.isSuffixOf(missingSuffixOf);
extractions.add(extraction);
alreadyExtracted.add(Triple.makeTriple(subjectSpan, relationGloss, objectSpan));
}
}
}
//
for (SemgrexPattern semgrex : NOUN_DEPENDENCY_PATTERNS) {
SemgrexMatcher matcher = semgrex.matcher(parse);
while (matcher.find()) {
boolean missingPrefixBe = false;
boolean missingSuffixBe = false;
boolean istmod = false;
// Get relaux if applicable
String relaux = matcher.getRelnString("relaux");
String ignoredArc = relaux;
if (ignoredArc == null) {
ignoredArc = matcher.getRelnString("arc");
}
// Create subject
IndexedWord subject = matcher.getNode("subject");
List<IndexedWord> subjectTokens = new ArrayList<>();
Span subjectSpan;
if (subject.ner() != null && !"O".equals(subject.ner())) {
subjectSpan = Util.extractNER(tokens, Span.fromValues(subject.index() - 1, subject.index()));
for (int i : subjectSpan) {
subjectTokens.add(new IndexedWord(tokens.get(i)));
}
} else {
subjectTokens = getValidChunk(parse, subject, VALID_SUBJECT_ARCS, Optional.ofNullable(ignoredArc), true).orElse(Collections.singletonList(subject));
subjectSpan = Util.tokensToSpan(subjectTokens);
}
// Create object
IndexedWord object = matcher.getNode("object");
List<IndexedWord> objectTokens = new ArrayList<>();
Span objectSpan;
if (object.ner() != null && !"O".equals(object.ner())) {
objectSpan = Util.extractNER(tokens, Span.fromValues(object.index() - 1, object.index()));
for (int i : objectSpan) {
objectTokens.add(new IndexedWord(tokens.get(i)));
}
} else {
objectTokens = getValidChunk(parse, object, VALID_OBJECT_ARCS, Optional.ofNullable(ignoredArc), true).orElse(Collections.singletonList(object));
objectSpan = Util.tokensToSpan(objectTokens);
}
// Check that the pair is valid
if (Span.overlaps(subjectSpan, objectSpan)) {
// We extracted an identity
continue;
}
if (subjectSpan.end() == objectSpan.start() - 1 && (tokens.get(subjectSpan.end()).word().matches("[\\.,:;\\('\"]") || "CC".equals(tokens.get(subjectSpan.end()).tag()))) {
// We're straddling a clause
continue;
}
if (objectSpan.end() == subjectSpan.start() - 1 && (tokens.get(objectSpan.end()).word().matches("[\\.,:;\\('\"]") || "CC".equals(tokens.get(objectSpan.end()).tag()))) {
// We're straddling a clause
continue;
}
// Get any prepositional edges
String expected = relaux == null ? "" : relaux.substring(relaux.indexOf(":") + 1).replace("_", " ");
IndexedWord prepWord = null;
// (these usually come from the object)
boolean prepositionIsPrefix = false;
for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(object)) {
if (edge.getRelation().toString().equals("case")) {
prepWord = edge.getDependent();
}
}
// (...but sometimes from the subject)
if (prepWord == null) {
for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(subject)) {
if (edge.getRelation().toString().equals("case")) {
prepositionIsPrefix = true;
prepWord = edge.getDependent();
}
}
}
List<IndexedWord> prepChunk = Collections.EMPTY_LIST;
if (prepWord != null && !expected.equals("tmod")) {
Optional<List<IndexedWord>> optionalPrepChunk = getValidChunk(parse, prepWord, Collections.singleton("mwe"), Optional.empty(), true);
if (!optionalPrepChunk.isPresent()) {
continue;
}
prepChunk = optionalPrepChunk.get();
Collections.sort(prepChunk, (a, b) -> {
double val = a.pseudoPosition() - b.pseudoPosition();
if (val < 0) {
return -1;
}
if (val > 0) {
return 1;
} else {
return 0;
}
});
// ascending sort
}
// Get the relation
if (subjectTokens.size() > 0 && objectTokens.size() > 0) {
LinkedList<IndexedWord> relationTokens = new LinkedList<>();
IndexedWord relNode = matcher.getNode("relation");
if (relNode != null) {
// Case: we have a grounded relation span
// (add the relation)
relationTokens.add(relNode);
// (add any prepositional case markings)
if (prepositionIsPrefix) {
// We're almost certainly missing a suffix 'be'
missingSuffixBe = true;
for (int i = prepChunk.size() - 1; i >= 0; --i) {
relationTokens.addFirst(prepChunk.get(i));
}
} else {
relationTokens.addAll(prepChunk);
}
if (expected.equalsIgnoreCase("tmod")) {
istmod = true;
}
} else {
// (mark it as missing a preceding 'be'
if (!expected.equals("poss")) {
missingPrefixBe = true;
}
// (add any prepositional case markings)
if (prepositionIsPrefix) {
for (int i = prepChunk.size() - 1; i >= 0; --i) {
relationTokens.addFirst(prepChunk.get(i));
}
} else {
relationTokens.addAll(prepChunk);
}
if (expected.equalsIgnoreCase("tmod")) {
istmod = true;
}
// (some fine-tuning)
if (allowNominalsWithoutNER && "of".equals(expected)) {
// prohibit things like "conductor of electricity" -> "conductor; be of; electricity"
continue;
}
}
// Add extraction
String relationGloss = StringUtils.join(relationTokens.stream().map(IndexedWord::word), " ");
if (!alreadyExtracted.contains(Triple.makeTriple(subjectSpan, relationGloss, objectSpan))) {
RelationTriple extraction = new RelationTriple(subjectTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()), relationTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()), objectTokens.stream().map(IndexedWord::backingLabel).collect(Collectors.toList()));
extraction.istmod(istmod);
extraction.isPrefixBe(missingPrefixBe);
extraction.isSuffixBe(missingSuffixBe);
extractions.add(extraction);
alreadyExtracted.add(Triple.makeTriple(subjectSpan, relationGloss, objectSpan));
}
}
}
}
}
//
// Filter downward polarity extractions
//
Iterator<RelationTriple> iter = extractions.iterator();
while (iter.hasNext()) {
RelationTriple term = iter.next();
boolean shouldRemove = true;
for (CoreLabel token : term) {
if (token.get(NaturalLogicAnnotations.PolarityAnnotation.class) == null || !token.get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()) {
shouldRemove = false;
}
}
if (shouldRemove) {
// Don't extract things in downward polarity contexts.
iter.remove();
}
}
// Return
return extractions;
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class RelationTripleSegmenter method segment.
/**
* <p>
* Try to segment this sentence as a relation triple.
* This sentence must already match one of a few strict patterns for a valid OpenIE extraction.
* If it does not, then no relation triple is created.
* That is, this is <b>not</b> a relation extractor; it is just a utility to segment what is already a
* (subject, relation, object) triple into these three parts.
* </p>
*
* <p>
* This method will attempt to use both the verb-centric patterns and the ACL-centric patterns.
* </p>
*
* @param parse The sentence to process, as a dependency tree.
* @param confidence An optional confidence to pass on to the relation triple.
* @param consumeAll if true, force the entire parse to be consumed by the pattern.
* @return A relation triple, if this sentence matches one of the patterns of a valid relation triple.
*/
public Optional<RelationTriple> segment(SemanticGraph parse, Optional<Double> confidence, boolean consumeAll) {
// Copy and clean the tree
parse = new SemanticGraph(parse);
// Special case "there is <something>". Arguably this is a job for the clause splitter, but the <something> is
// sometimes not _really_ its own clause
IndexedWord root = parse.getFirstRoot();
if ((root.lemma() != null && root.lemma().equalsIgnoreCase("be")) || (root.lemma() == null && ("is".equalsIgnoreCase(root.word()) || "are".equalsIgnoreCase(root.word()) || "were".equalsIgnoreCase(root.word()) || "be".equalsIgnoreCase(root.word())))) {
// Check for the "there is" construction
boolean foundThere = false;
// an indicator for there being too much nonsense hanging off of the root
boolean tooMayArcs = false;
Optional<SemanticGraphEdge> newRoot = Optional.empty();
for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(root)) {
if (edge.getRelation().toString().equals("expl") && edge.getDependent().word().equalsIgnoreCase("there")) {
foundThere = true;
} else if (edge.getRelation().toString().equals("nsubj")) {
newRoot = Optional.of(edge);
} else {
tooMayArcs = true;
}
}
// Split off "there is")
if (foundThere && newRoot.isPresent() && !tooMayArcs) {
ClauseSplitterSearchProblem.splitToChildOfEdge(parse, newRoot.get());
}
}
// Run the patterns
Optional<RelationTriple> extraction = segmentVerb(parse, confidence, consumeAll);
if (!extraction.isPresent()) {
extraction = segmentACL(parse, confidence, consumeAll);
}
//
if (extraction.isPresent()) {
boolean shouldRemove = true;
for (CoreLabel token : extraction.get()) {
if (token.get(NaturalLogicAnnotations.PolarityAnnotation.class) == null || !token.get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()) {
shouldRemove = false;
}
}
if (shouldRemove) {
return Optional.empty();
}
}
// Return
return extraction;
}
use of edu.stanford.nlp.ie.util.RelationTriple in project Info-Evaluation by TechnionYP5777.
the class AnalyzeParagragh method InteractiveReasonFinding.
//This function makes the analyze process interactive with the user - he gets reasons to choose from and chooses the most fitiing one.
public LinkedList<ReasonPair> InteractiveReasonFinding() {
LinkedList<ReasonPair> $ = new LinkedList<ReasonPair>();
final Properties props = new Properties();
props.put("annotators", "tokenize,ssplit, pos, regexner, parse,lemma,natlog,openie");
final StanfordCoreNLP pipeLine = new StanfordCoreNLP(props);
// inputText will be the text to evaluate in this example
final String inputText = input + "";
final Annotation document = new Annotation(inputText);
// Finally we use the pipeline to annotate the document we created
pipeLine.annotate(document);
for (final CoreMap sentence : document.get(SentencesAnnotation.class)) for (RelationTriple ¢ : sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) $.add(new ReasonPair(¢.confidence, ¢.relationGloss() + " " + ¢.objectGloss()));
return $;
}
use of edu.stanford.nlp.ie.util.RelationTriple in project cogcomp-nlp by CogComp.
the class StanfordOpenIEHandler method addView.
@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
Annotation document = new Annotation(ta.text);
pipeline.annotate(document);
SpanLabelView vu = new SpanLabelView(viewName, ta);
for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
for (RelationTriple triple : triples) {
Constituent subject = getConstituent(triple.subjectGloss(), triple.subjectTokenSpan(), sentence, ta);
subject.addAttribute("subjectGloss", triple.subjectGloss());
subject.addAttribute("subjectLemmaGloss", triple.subjectLemmaGloss());
subject.addAttribute("subjectLink", triple.subjectLink());
Constituent object = getConstituent(triple.objectGloss(), triple.objectTokenSpan(), sentence, ta);
object.addAttribute("objectGloss", triple.objectGloss());
object.addAttribute("objectLemmaGloss", triple.objectLemmaGloss());
object.addAttribute("objectLink", triple.objectLink());
Constituent relation = getConstituent(triple.relationGloss(), triple.relationTokenSpan(), sentence, ta);
relation.addAttribute("relationGloss", triple.relationGloss());
relation.addAttribute("relationLemmaGloss", triple.relationLemmaGloss());
Relation subj = new Relation("subj", relation, subject, triple.confidence);
Relation obj = new Relation("obj", relation, object, triple.confidence);
vu.addRelation(subj);
vu.addRelation(obj);
vu.addConstituent(subject);
vu.addConstituent(object);
vu.addConstituent(relation);
}
}
ta.addView(viewName, vu);
}
use of edu.stanford.nlp.ie.util.RelationTriple in project CoreNLP by stanfordnlp.
the class OpenIEITest method assertExtracted.
public void assertExtracted(String expected, String text) {
boolean found = false;
Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
for (RelationTriple extraction : extractions) {
if (extraction.toString().equals("1.0\t" + expected)) {
found = true;
}
}
assertTrue("The extraction (" + expected.replace("\t", "; ") + ") was not found in '" + text + "'", found);
}
Aggregations