use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class ResultsPrinter method printResults.
/**
* Given a set of sentences with annotations from an information extractor class, and the same sentences
* with gold-standard annotations, print results on how the information extraction performed.
*/
public String printResults(CoreMap goldStandard, CoreMap extractorOutput) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw, true);
List<CoreMap> mutableGold = new ArrayList<>();
mutableGold.addAll(goldStandard.get(CoreAnnotations.SentencesAnnotation.class));
List<CoreMap> mutableOutput = new ArrayList<>();
mutableOutput.addAll(extractorOutput.get(CoreAnnotations.SentencesAnnotation.class));
printResults(pw, mutableGold, mutableOutput);
return sw.getBuffer().toString();
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class NumberSequenceClassifier method classifyWithSUTime.
// todo [cdm, 2013]: Where does this call NumberNormalizer? Is it the call buried in SUTime's TimeExpressionExtractorImpl?
/**
* Modular classification using NumberNormalizer for numbers, SUTime for date/time.
* Note: this is slower than classifyOld because it runs multiple passes
* over the tokens (one for numbers and dates, and others for money and ordinals).
* However, the slowdown is not substantial since the passes are fast. Plus,
* the code is much cleaner than before...
* @param tokenSequence
*/
private List<CoreLabel> classifyWithSUTime(List<CoreLabel> tokenSequence, final CoreMap document, final CoreMap sentence) {
//
for (CoreLabel token : tokenSequence) {
if (token.get(CoreAnnotations.AnswerAnnotation.class) == null)
token.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol);
}
//
// run SUTime
// note: SUTime requires TextAnnotation to be set at document/sent level and
// that the Character*Offset annotations be aligned with the token words.
// This is guaranteed because here we work on a copy generated by copyTokens()
//
CoreMap timeSentence = (sentence != null ? alignSentence(sentence) : buildSentenceFromTokens(tokenSequence));
List<CoreMap> timeExpressions = runSUTime(timeSentence, document);
List<CoreMap> numbers = timeSentence.get(CoreAnnotations.NumerizedTokensAnnotation.class);
//
if (timeExpressions != null) {
for (CoreMap timeExpression : timeExpressions) {
// todo [cdm 2013]: We should also store these in the Sentence, but we've just got the list of tokens here
int start = timeExpression.get(CoreAnnotations.TokenBeginAnnotation.class);
int end = timeExpression.get(CoreAnnotations.TokenEndAnnotation.class);
int offset = 0;
if (sentence != null && sentence.containsKey(CoreAnnotations.TokenBeginAnnotation.class)) {
offset = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
}
Timex timex = timeExpression.get(TimeAnnotations.TimexAnnotation.class);
if (timex != null) {
if (DEBUG) {
log.info("FOUND DATE/TIME \"" + timeExpression + "\" with offsets " + start + " " + end + " and value " + timex);
log.info("The above CoreMap has the following fields:");
// for(Class key: timeExpression.keySet()) log.info("\t" + key + ": " + timeExpression.get(key));
}
String label = timex.timexType();
for (int i = start; i < end; i++) {
CoreLabel token = tokenSequence.get(i - offset);
if (token.get(CoreAnnotations.AnswerAnnotation.class).equals(flags.backgroundSymbol)) {
token.set(CoreAnnotations.AnswerAnnotation.class, label);
token.set(TimeAnnotations.TimexAnnotation.class, timex);
}
}
}
}
}
//
if (numbers != null) {
for (CoreMap number : numbers) {
if (number.containsKey(CoreAnnotations.NumericCompositeValueAnnotation.class)) {
int start = number.get(CoreAnnotations.TokenBeginAnnotation.class);
int end = number.get(CoreAnnotations.TokenEndAnnotation.class);
int offset = 0;
if (sentence != null && sentence.containsKey(CoreAnnotations.TokenBeginAnnotation.class)) {
offset = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
}
String type = number.get(CoreAnnotations.NumericCompositeTypeAnnotation.class);
Number value = number.get(CoreAnnotations.NumericCompositeValueAnnotation.class);
if (type != null) {
if (DEBUG)
log.info("FOUND NUMBER \"" + number + "\" with offsets " + start + " " + end + " and value " + value + " and type " + type);
for (int i = start; i < end; i++) {
CoreLabel token = tokenSequence.get(i - offset);
if (token.get(CoreAnnotations.AnswerAnnotation.class).equals(flags.backgroundSymbol)) {
token.set(CoreAnnotations.AnswerAnnotation.class, type);
if (value != null) {
token.set(CoreAnnotations.NumericCompositeValueAnnotation.class, value);
}
}
}
}
}
}
}
// use inverted "CD".equals() because tag could be null (if no POS info available)
for (CoreLabel token : tokenSequence) {
if ("CD".equals(token.tag()) && token.get(CoreAnnotations.AnswerAnnotation.class).equals(flags.backgroundSymbol)) {
token.set(CoreAnnotations.AnswerAnnotation.class, "NUMBER");
}
}
// extract money and percents
moneyAndPercentRecognizer(tokenSequence);
// ordinals
// NumberNormalizer probably catches these but let's be safe
ordinalRecognizer(tokenSequence);
return tokenSequence;
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class AnnotationUtils method shuffleSentences.
/**
* Randomized shuffle of all sentences int this dataset
* @param dataset
*/
public static void shuffleSentences(CoreMap dataset) {
List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
// we use a constant seed for replicability of experiments
Collections.shuffle(sentences, new Random(0));
dataset.set(CoreAnnotations.SentencesAnnotation.class, sentences);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class AnnotationUtils method entityMentionsToCoreLabels.
/**
* Converts the labels of all entity mentions in this dataset to sequences of CoreLabels
* @param dataset
* @param annotationsToSkip
* @param useSubTypes
*/
public static List<List<CoreLabel>> entityMentionsToCoreLabels(CoreMap dataset, Set<String> annotationsToSkip, boolean useSubTypes, boolean useBIO) {
List<List<CoreLabel>> retVal = new ArrayList<>();
List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> labeledSentence = sentenceEntityMentionsToCoreLabels(sentence, true, annotationsToSkip, null, useSubTypes, useBIO);
assert (labeledSentence != null);
retVal.add(labeledSentence);
}
return retVal;
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class AnnotationUtils method datasetToString.
public static String datasetToString(CoreMap dataset) {
List<CoreMap> sents = dataset.get(CoreAnnotations.SentencesAnnotation.class);
StringBuffer b = new StringBuffer();
if (sents != null) {
for (CoreMap sent : sents) {
b.append(sentenceToString(sent));
}
}
return b.toString();
}
Aggregations