use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class DcorefExactOutputITest method testCoref.
@Test
public void testCoref() throws IOException {
String doc = IOUtils.slurpFile("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm");
Annotation annotation = pipeline.process(doc);
Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
Map<Integer, List<ExpectedMention>> expected = loadExpectedResults("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.expectedcoref");
compareResults(expected, chains);
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class NERBenchmarkSlowITest method evalConll.
/**
* The main engine that does the heavy lifting for evaluating a dataset. We are performing
* 4-way classification on: ORG, PER, LOC, MISC
* @param dataset Dataset prefix to evaluate. Should be one of "train", "dev", "test"
* @throws IOException
* @throws Exception
* @return F1 computed for given dataset by model
*/
// NOTE that CoNLL tests assume a 4-class classification scheme: ORG, PER, LOC, MISC
public HashMap<String, Double> evalConll(String dataset) throws IOException {
SeqClassifierFlags flags = new SeqClassifierFlags();
flags.entitySubclassification = "noprefix";
CoNLLDocumentReaderAndWriter rw = new CoNLLDocumentReaderAndWriter();
rw.init(flags);
String inputFile;
File resultsFile;
switch(dataset) {
case "train":
resultsFile = File.createTempFile("conlldev", null);
inputFile = CONLL_DEV;
break;
case "dev":
resultsFile = File.createTempFile("conlldev", null);
inputFile = CONLL_DEV;
break;
case "test":
resultsFile = File.createTempFile("conlltest", null);
inputFile = CONLL_TEST;
break;
default:
throw new RuntimeException("Not a valid dataset name provided!");
}
resultsFile.deleteOnExit();
PrintWriter writer = new PrintWriter(resultsFile);
for (Iterator<List<CoreLabel>> itr = rw.getIterator(IOUtils.readerFromString(inputFile)); itr.hasNext(); ) {
List<CoreLabel> goldLabels = itr.next();
String docString = "";
for (CoreLabel f1 : goldLabels) {
docString += " " + f1.word();
}
Annotation docAnnotation = new Annotation(docString);
conllNERAnnotationPipeline.annotate(docAnnotation);
List<CoreLabel> predictLabels = new ArrayList<CoreLabel>();
for (CoreLabel l : docAnnotation.get(TokensAnnotation.class)) {
predictLabels.add(l);
}
assertEquals("# gold outputs not same as # predicted!\n", goldLabels.size(), predictLabels.size());
int numLabels = goldLabels.size();
// Write to output file
for (int i = 0; i < numLabels; i++) {
CoreLabel gold = goldLabels.get(i);
String goldToken;
// TODO(meric): What is difference between GoldAnswer and Answer annotation?
goldToken = gold.get(AnswerAnnotation.class);
CoreLabel predict = predictLabels.get(i);
String predictStr = predict.get(NamedEntityTagAnnotation.class);
String predictPrefix = convert(predictStr);
assertEquals("Gold and Predict words don't match!\n", gold.get(TextAnnotation.class), predict.get(TextAnnotation.class));
writer.println(gold.get(TextAnnotation.class) + "\t" + "_" + "\t" + goldToken + "\t" + predictPrefix);
}
}
writer.close();
// Run CoNLL eval script and extract F1 score
String result = runEvalScript(resultsFile);
HashMap<String, Double> parsedF1 = parseResults(result);
return parsedF1;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class TrueCaseAnnotatorITest method processFile.
private static int processFile(String arg, StanfordCoreNLP nlp) {
System.err.print("### ");
System.err.println(arg);
String doc = IOUtils.slurpFileNoExceptions(arg);
Matcher sm = START_TEXT.matcher(doc);
sm.find();
Matcher em = END_TEXT.matcher(doc);
em.find();
int start = sm.end();
assert (start > 0);
int end = em.start();
assert (end > 0);
String text = doc.substring(start, end);
Annotation anno = nlp.process(text);
int count = 0;
for (CoreMap sent : anno.get(SentencesAnnotation.class)) {
List<? extends CoreLabel> words = sent.get(TokensAnnotation.class);
for (int i = 0; i < words.size(); i++) {
String w = words.get(i).word();
String tcw = words.get(i).get(TrueCaseTextAnnotation.class);
if (!w.equals(tcw)) {
System.err.print('"' + w + "\" true cased to \"" + tcw + "\" in context:");
for (int j = Math.max(0, i - 2); j < Math.min(words.size(), i + 2); j++) {
System.err.print(" " + words.get(j).word());
}
System.err.println();
count++;
}
}
}
System.err.println("True case change count: " + count);
return count;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class PolarityITest method annotate.
private Polarity[] annotate(String text) {
Annotation ann = new Annotation(text);
pipeline.annotate(ann);
List<CoreLabel> tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class);
Polarity[] polarities = new Polarity[tokens.size()];
for (int i = 0; i < tokens.size(); ++i) {
polarities[i] = tokens.get(i).get(NaturalLogicAnnotations.PolarityAnnotation.class);
}
return polarities;
}
use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.
the class QuantifiableEntityExtractorITest method runAndCheck.
private static void runAndCheck(String prefix, String[] sentences, ExpectedQuantity[][] expected) {
for (int si = 0; si < sentences.length; si++) {
String sentence = sentences[si];
Annotation annotation = createDocument(sentence);
List<MatchedExpression> matchedExpressions = extractor.extract(annotation);
// Print out matched text and value
if (expected == null) {
for (MatchedExpression matchedExpression : matchedExpressions) {
String text = matchedExpression.getText();
Object value = matchedExpression.getValue();
System.out.println(prefix + ": Got expression " + text + " with value " + value);
}
fail(prefix + ": No expected provided");
} else {
int minMatchable = Math.min(expected[si].length, matchedExpressions.size());
for (int i = 0; i < minMatchable; i++) {
ExpectedQuantity expectedQuantity = expected[si][i];
MatchedExpression matched = matchedExpressions.get(i);
SimpleQuantifiableEntity actualQuantity = (SimpleQuantifiableEntity) matched.getValue().get();
assertEquals(prefix + ".matched." + si + "." + i + ".text", expectedQuantity.text, matched.getText());
assertEquals(prefix + ".matched." + si + "." + i + ".normalizedValue", expectedQuantity.normalizedValue, actualQuantity.toString());
assertEquals(prefix + ".matched." + si + "." + i + ".type", expectedQuantity.type, actualQuantity.getUnit().type);
}
assertEquals(prefix + ".length." + si, expected[si].length, matchedExpressions.size());
}
}
}
Aggregations