use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class NumberSequenceClassifier method buildSentenceFromTokens.
private static CoreMap buildSentenceFromTokens(List<CoreLabel> tokens, Integer characterOffsetStart, Integer characterOffsetEnd) {
//
// Recover the sentence text:
// a) try to get it from TextAnnotation
// b) if not present, build it from the OriginalTextAnnotation of each token
// c) if not present, build it from the TextAnnotation of each token
//
boolean adjustCharacterOffsets = false;
// try to recover the text from the original tokens
String text = buildText(tokens, CoreAnnotations.OriginalTextAnnotation.class);
if (text == null) {
text = buildText(tokens, CoreAnnotations.TextAnnotation.class);
// character offset will point to the original tokens
// so we need to align them to the text built from normalized tokens
adjustCharacterOffsets = true;
if (text == null) {
throw new RuntimeException("ERROR: to use SUTime, sentences must have TextAnnotation set, or the individual tokens must have OriginalTextAnnotation or TextAnnotation set!");
}
}
// make sure token character offsets are aligned with text
List<CoreLabel> tokenSequence = copyTokens(tokens, adjustCharacterOffsets, false);
Annotation newSentence = new Annotation(text);
newSentence.set(CoreAnnotations.TokensAnnotation.class, tokenSequence);
if (!adjustCharacterOffsets && characterOffsetStart != null && characterOffsetEnd != null) {
newSentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, characterOffsetStart);
newSentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, characterOffsetEnd);
} else {
int tokenCharStart = tokenSequence.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
int tokenCharEnd = tokenSequence.get(tokenSequence.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
newSentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, tokenCharStart);
newSentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, tokenCharEnd);
}
// some default token offsets
newSentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
newSentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenSequence.size());
return newSentence;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class MachineReading method makeDataSets.
@SuppressWarnings("unchecked")
private void makeDataSets(Annotation training, Annotation testing, Annotation auxDataset) {
if (!MachineReadingProperties.crossValidate) {
datasets = new Pair[1];
Annotation trainingEnhanced = training;
if (auxDataset != null) {
trainingEnhanced = new Annotation(training.get(TextAnnotation.class));
for (int i = 0; i < AnnotationUtils.sentenceCount(training); i++) {
AnnotationUtils.addSentence(trainingEnhanced, AnnotationUtils.getSentence(training, i));
}
for (int ind = 0; ind < AnnotationUtils.sentenceCount(auxDataset); ind++) {
AnnotationUtils.addSentence(trainingEnhanced, AnnotationUtils.getSentence(auxDataset, ind));
}
}
datasets[0] = new Pair<>(trainingEnhanced, testing);
predictions = new Annotation[3][1];
} else {
assert (MachineReadingProperties.kfold > 1);
datasets = new Pair[MachineReadingProperties.kfold];
AnnotationUtils.shuffleSentences(training);
for (int partition = 0; partition < MachineReadingProperties.kfold; partition++) {
int begin = AnnotationUtils.sentenceCount(training) * partition / MachineReadingProperties.kfold;
int end = AnnotationUtils.sentenceCount(training) * (partition + 1) / MachineReadingProperties.kfold;
MachineReadingProperties.logger.info("Creating partition #" + partition + " using offsets [" + begin + ", " + end + ") out of " + AnnotationUtils.sentenceCount(training));
Annotation partitionTrain = new Annotation("");
Annotation partitionTest = new Annotation("");
for (int i = 0; i < AnnotationUtils.sentenceCount(training); i++) {
if (i < begin) {
AnnotationUtils.addSentence(partitionTrain, AnnotationUtils.getSentence(training, i));
} else if (i < end) {
AnnotationUtils.addSentence(partitionTest, AnnotationUtils.getSentence(training, i));
} else {
AnnotationUtils.addSentence(partitionTrain, AnnotationUtils.getSentence(training, i));
}
}
// for learning curve experiments
// partitionTrain = keepPercentage(partitionTrain, percentageOfTrain);
partitionTrain = keepPercentage(partitionTrain, MachineReadingProperties.percentageOfTrain);
if (auxDataset != null) {
for (int ind = 0; ind < AnnotationUtils.sentenceCount(auxDataset); ind++) {
AnnotationUtils.addSentence(partitionTrain, AnnotationUtils.getSentence(auxDataset, ind));
}
}
datasets[partition] = new Pair<>(partitionTrain, partitionTest);
}
predictions = new Annotation[3][MachineReadingProperties.kfold];
}
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class AnnotationUtils method deepMentionCopy.
/**
* Creates a deep copy of the given dataset with new lists for all mentions (entity, relation, event)
* @param dataset
*/
public static Annotation deepMentionCopy(CoreMap dataset) {
Annotation newDataset = new Annotation("");
List<CoreMap> sents = dataset.get(CoreAnnotations.SentencesAnnotation.class);
List<CoreMap> newSents = new ArrayList<>();
if (sents != null) {
for (CoreMap sent : sents) {
if (!(sent instanceof Annotation)) {
throw new RuntimeException("ERROR: Sentences must instantiate Annotation!");
}
CoreMap newSent = sentenceDeepMentionCopy((Annotation) sent);
newSents.add(newSent);
}
}
addSentences(newDataset, newSents);
return newDataset;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class AnnotationUtils method sentenceDeepMentionCopy.
/**
* Deep copy of the sentence: we create new entity/relation/event lists here.
* However, we do not deep copy the ExtractionObjects themselves!
* @param sentence
*/
public static Annotation sentenceDeepMentionCopy(Annotation sentence) {
Annotation newSent = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
newSent.set(CoreAnnotations.TokensAnnotation.class, sentence.get(CoreAnnotations.TokensAnnotation.class));
newSent.set(TreeCoreAnnotations.TreeAnnotation.class, sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
newSent.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class));
newSent.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
newSent.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class));
newSent.set(CoreAnnotations.DocIDAnnotation.class, sentence.get(CoreAnnotations.DocIDAnnotation.class));
// deep copy of all mentions lists
List<EntityMention> ents = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (ents != null)
newSent.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, new ArrayList<>(ents));
List<RelationMention> rels = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (rels != null)
newSent.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, new ArrayList<>(rels));
List<EventMention> evs = sentence.get(MachineReadingAnnotations.EventMentionsAnnotation.class);
if (evs != null)
newSent.set(MachineReadingAnnotations.EventMentionsAnnotation.class, new ArrayList<>(evs));
return newSent;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class OpenIEDemo method main.
public static void main(String[] args) throws Exception {
// Create the Stanford CoreNLP pipeline
Properties props = PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// Annotate an example document.
String text;
if (args.length > 0) {
text = IOUtils.slurpFile(args[0]);
} else {
text = "Obama was born in Hawaii. He is our president.";
}
Annotation doc = new Annotation(text);
pipeline.annotate(doc);
// Loop over sentences in the document
int sentNo = 0;
for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
System.out.println("Sentence #" + ++sentNo + ": " + sentence.get(CoreAnnotations.TextAnnotation.class));
// Print SemanticGraph
System.out.println(sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST));
// Get the OpenIE triples for the sentence
Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
// Print the triples
for (RelationTriple triple : triples) {
System.out.println(triple.confidence + "\t" + triple.subjectLemmaGloss() + "\t" + triple.relationLemmaGloss() + "\t" + triple.objectLemmaGloss());
}
// Alternately, to only run e.g., the clause splitter:
List<SentenceFragment> clauses = new OpenIE(props).clausesInSentence(sentence);
for (SentenceFragment clause : clauses) {
System.out.println(clause.parseTree.toString(SemanticGraph.OutputFormat.LIST));
}
System.out.println();
}
}
Aggregations