use of edu.stanford.nlp.ie.machinereading.structure.ExtractionObject in project CoreNLP by stanfordnlp.
the class ProtobufAnnotationSerializer method fromProto.
/**
* Read a relation mention from its serialized form. Requires the containing sentence to be
* passed in along with the protocol buffer.
* @param proto The serialized relation mention.
* @param sentence The sentence this mention is attached to.
* @return The relation mention corresponding to the serialized object.
*/
private RelationMention fromProto(CoreNLPProtos.Relation proto, CoreMap sentence) {
List<ExtractionObject> args = proto.getArgList().stream().map(arg -> fromProto(arg, sentence)).collect(Collectors.toList());
RelationMention rtn = new RelationMention(proto.hasObjectID() ? proto.getObjectID() : null, sentence, proto.hasExtentStart() ? new Span(proto.getExtentStart(), proto.getExtentEnd()) : null, proto.hasType() ? proto.getType() : null, proto.hasSubtype() ? proto.getSubtype() : null, args);
if (proto.hasSignature()) {
rtn.setSignature(proto.getSignature());
}
if (proto.getArgNameCount() > 0 || proto.getArgCount() == 0) {
rtn.setArgNames(proto.getArgNameList());
}
return rtn;
}
use of edu.stanford.nlp.ie.machinereading.structure.ExtractionObject in project CoreNLP by stanfordnlp.
the class RothCONLL04Reader method readSentence.
private Annotation readSentence(String docId, Iterator<String> lineIterator) {
Annotation sentence = new Annotation("");
sentence.set(CoreAnnotations.DocIDAnnotation.class, docId);
sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, new ArrayList<>());
// we'll need to set things like the tokens and textContent after we've
// fully read the sentence
// contains the full text that we've read so far
StringBuilder textContent = new StringBuilder();
// how many tokens we've seen so far
int tokenCount = 0;
List<CoreLabel> tokens = new ArrayList<>();
// when we've seen two blank lines in a row, this sentence is over (one
// blank line separates the sentence and the relations
int numBlankLinesSeen = 0;
String sentenceID = null;
// keeps tracks of entities we've seen so far for use by relations
Map<String, EntityMention> indexToEntityMention = new HashMap<>();
while (lineIterator.hasNext() && numBlankLinesSeen < 2) {
String currentLine = lineIterator.next();
currentLine = currentLine.replace("COMMA", ",");
List<String> pieces = StringUtils.split(currentLine);
String identifier;
int size = pieces.size();
switch(size) {
case // blank line between sentences or relations
1:
numBlankLinesSeen++;
break;
case // relation
3:
String type = pieces.get(2);
List<ExtractionObject> args = new ArrayList<>();
EntityMention entity1 = indexToEntityMention.get(pieces.get(0));
EntityMention entity2 = indexToEntityMention.get(pieces.get(1));
args.add(entity1);
args.add(entity2);
Span span = new Span(entity1.getExtentTokenStart(), entity2.getExtentTokenEnd());
// identifier = "relation" + sentenceID + "-" + sentence.getAllRelations().size();
identifier = RelationMention.makeUniqueId();
RelationMention relationMention = new RelationMention(identifier, sentence, span, type, null, args);
AnnotationUtils.addRelationMention(sentence, relationMention);
break;
case // token
9:
/*
* Roth token lines look like this:
*
* 19 Peop 9 O NNP/NNP Jamal/Ghosheh O O O
*/
// Entities may be multiple words joined by '/'; we split these up
List<String> words = StringUtils.split(pieces.get(5), "/");
//List<String> postags = StringUtils.split(pieces.get(4),"/");
String text = StringUtils.join(words, " ");
identifier = "entity" + pieces.get(0) + '-' + pieces.get(2);
// entity type of the word/expression
String nerTag = getNormalizedNERTag(pieces.get(1));
if (sentenceID == null)
sentenceID = pieces.get(0);
if (!nerTag.equals("O")) {
Span extentSpan = new Span(tokenCount, tokenCount + words.size());
// Temporarily sets the head span to equal the extent span.
// This is so the entity has a head (in particular, getValue() works) even if preprocessSentences isn't called.
// The head span is later modified if preprocessSentences is called.
EntityMention entity = new EntityMention(identifier, sentence, extentSpan, extentSpan, nerTag, null, null);
AnnotationUtils.addEntityMention(sentence, entity);
// we can get by using these indices as strings since we only use them
// as a hash key
String index = pieces.get(2);
indexToEntityMention.put(index, entity);
}
// int i =0;
for (String word : words) {
CoreLabel label = new CoreLabel();
label.setWord(word);
//label.setTag(postags.get(i));
label.set(CoreAnnotations.TextAnnotation.class, word);
label.set(CoreAnnotations.ValueAnnotation.class, word);
// we don't set TokenBeginAnnotation or TokenEndAnnotation since we're
// not keeping track of character offsets
tokens.add(label);
// i++;
}
textContent.append(text);
textContent.append(' ');
tokenCount += words.size();
break;
}
}
sentence.set(CoreAnnotations.TextAnnotation.class, textContent.toString());
sentence.set(CoreAnnotations.ValueAnnotation.class, textContent.toString());
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
sentence.set(CoreAnnotations.SentenceIDAnnotation.class, sentenceID);
return sentence;
}
use of edu.stanford.nlp.ie.machinereading.structure.ExtractionObject in project CoreNLP by stanfordnlp.
the class AceReader method convertAceRelationMention.
private RelationMention convertAceRelationMention(AceRelationMention aceRelationMention, String docId, CoreMap sentence, Map<String, EntityMention> entityMap) {
List<AceRelationMentionArgument> args = Arrays.asList(aceRelationMention.getArgs());
List<ExtractionObject> convertedArgs = new ArrayList<>();
List<String> argNames = new ArrayList<>();
// the arguments are already stored in semantic order. Make sure we preserve the same ordering!
int left = Integer.MAX_VALUE;
int right = Integer.MIN_VALUE;
for (AceRelationMentionArgument arg : args) {
ExtractionObject o = entityMap.get(arg.getContent().getId());
if (o == null) {
logger.severe("READER ERROR: Failed to find relation argument with id " + arg.getContent().getId());
logger.severe("This happens because a few relation mentions illegally span multiple sentences. Will ignore this mention.");
return null;
}
convertedArgs.add(o);
argNames.add(arg.getRole());
if (o.getExtentTokenStart() < left)
left = o.getExtentTokenStart();
if (o.getExtentTokenEnd() > right)
right = o.getExtentTokenEnd();
}
if (argNames.size() != 2 || !argNames.get(0).equalsIgnoreCase("arg-1") || !argNames.get(1).equalsIgnoreCase("arg-2")) {
logger.severe("READER ERROR: Invalid succession of arguments in relation mention: " + argNames);
logger.severe("ACE relations must have two arguments. Will ignore this mention.");
return null;
}
RelationMention relation = new RelationMention(aceRelationMention.getId(), sentence, new Span(left, right), aceRelationMention.getParent().getType(), aceRelationMention.getParent().getSubtype(), convertedArgs, null);
return relation;
}
use of edu.stanford.nlp.ie.machinereading.structure.ExtractionObject in project CoreNLP by stanfordnlp.
the class AceReader method convertAceEventMention.
private EventMention convertAceEventMention(AceEventMention aceEventMention, String docId, CoreMap sentence, Map<String, EntityMention> entityMap, int tokenOffset) {
Set<String> roleSet = aceEventMention.getRoles();
List<String> roles = new ArrayList<>();
for (String role : roleSet) roles.add(role);
List<ExtractionObject> convertedArgs = new ArrayList<>();
int left = Integer.MAX_VALUE;
int right = Integer.MIN_VALUE;
for (String role : roles) {
AceEntityMention arg = aceEventMention.getArg(role);
ExtractionObject o = entityMap.get(arg.getId());
if (o == null) {
logger.severe("READER ERROR: Failed to find event argument with id " + arg.getId());
logger.severe("This happens because a few event mentions illegally span multiple sentences. Will ignore this mention.");
return null;
}
convertedArgs.add(o);
if (o.getExtentTokenStart() < left)
left = o.getExtentTokenStart();
if (o.getExtentTokenEnd() > right)
right = o.getExtentTokenEnd();
}
AceCharSeq anchor = aceEventMention.getAnchor();
ExtractionObject anchorObject = new ExtractionObject(aceEventMention.getId() + "-anchor", sentence, new Span(anchor.getTokenStart() - tokenOffset, anchor.getTokenEnd() + 1 - tokenOffset), "ANCHOR", null);
EventMention em = new EventMention(aceEventMention.getId(), sentence, new Span(left, right), aceEventMention.getParent().getType(), aceEventMention.getParent().getSubtype(), anchorObject, convertedArgs, roles);
return em;
}
Aggregations