Search in sources :

Example 11 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class DcorefExactOutputITest method testCoref.

@Test
public void testCoref() throws IOException {
    String doc = IOUtils.slurpFile("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm");
    Annotation annotation = pipeline.process(doc);
    Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    Map<Integer, List<ExpectedMention>> expected = loadExpectedResults("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.expectedcoref");
    compareResults(expected, chains);
}
Also used : CorefChain(edu.stanford.nlp.coref.data.CorefChain) ArrayList(java.util.ArrayList) List(java.util.List) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) Annotation(edu.stanford.nlp.pipeline.Annotation) Test(org.junit.Test)

Example 12 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class NERBenchmarkSlowITest method evalConll.

/**
 * The main engine that does the heavy lifting for evaluating a dataset. We are performing
 * 4-way classification on: ORG, PER, LOC, MISC
 * @param dataset Dataset prefix to evaluate. Should be one of "train", "dev", "test"
 * @throws IOException
 * @throws Exception
 * @return F1 computed for given dataset by model
 */
// NOTE that CoNLL tests assume a 4-class classification scheme: ORG, PER, LOC, MISC
public HashMap<String, Double> evalConll(String dataset) throws IOException {
    SeqClassifierFlags flags = new SeqClassifierFlags();
    flags.entitySubclassification = "noprefix";
    CoNLLDocumentReaderAndWriter rw = new CoNLLDocumentReaderAndWriter();
    rw.init(flags);
    String inputFile;
    File resultsFile;
    switch(dataset) {
        case "train":
            resultsFile = File.createTempFile("conlldev", null);
            inputFile = CONLL_DEV;
            break;
        case "dev":
            resultsFile = File.createTempFile("conlldev", null);
            inputFile = CONLL_DEV;
            break;
        case "test":
            resultsFile = File.createTempFile("conlltest", null);
            inputFile = CONLL_TEST;
            break;
        default:
            throw new RuntimeException("Not a valid dataset name provided!");
    }
    resultsFile.deleteOnExit();
    PrintWriter writer = new PrintWriter(resultsFile);
    for (Iterator<List<CoreLabel>> itr = rw.getIterator(IOUtils.readerFromString(inputFile)); itr.hasNext(); ) {
        List<CoreLabel> goldLabels = itr.next();
        String docString = "";
        for (CoreLabel f1 : goldLabels) {
            docString += " " + f1.word();
        }
        Annotation docAnnotation = new Annotation(docString);
        conllNERAnnotationPipeline.annotate(docAnnotation);
        List<CoreLabel> predictLabels = new ArrayList<CoreLabel>();
        for (CoreLabel l : docAnnotation.get(TokensAnnotation.class)) {
            predictLabels.add(l);
        }
        assertEquals("# gold outputs not same as # predicted!\n", goldLabels.size(), predictLabels.size());
        int numLabels = goldLabels.size();
        // Write to output file
        for (int i = 0; i < numLabels; i++) {
            CoreLabel gold = goldLabels.get(i);
            String goldToken;
            // TODO(meric): What is difference between GoldAnswer and Answer annotation?
            goldToken = gold.get(AnswerAnnotation.class);
            CoreLabel predict = predictLabels.get(i);
            String predictStr = predict.get(NamedEntityTagAnnotation.class);
            String predictPrefix = convert(predictStr);
            assertEquals("Gold and Predict words don't match!\n", gold.get(TextAnnotation.class), predict.get(TextAnnotation.class));
            writer.println(gold.get(TextAnnotation.class) + "\t" + "_" + "\t" + goldToken + "\t" + predictPrefix);
        }
    }
    writer.close();
    // Run CoNLL eval script and extract F1 score
    String result = runEvalScript(resultsFile);
    HashMap<String, Double> parsedF1 = parseResults(result);
    return parsedF1;
}
Also used : AnswerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation) ArrayList(java.util.ArrayList) SeqClassifierFlags(edu.stanford.nlp.sequences.SeqClassifierFlags) TextAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation) AnswerAnnotation(edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) NamedEntityTagAnnotation(edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation) CoreLabel(edu.stanford.nlp.ling.CoreLabel) ArrayList(java.util.ArrayList) List(java.util.List) TextAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation) File(java.io.File) CoNLLDocumentReaderAndWriter(edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter) PrintWriter(java.io.PrintWriter)

Example 13 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class TrueCaseAnnotatorITest method processFile.

private static int processFile(String arg, StanfordCoreNLP nlp) {
    System.err.print("### ");
    System.err.println(arg);
    String doc = IOUtils.slurpFileNoExceptions(arg);
    Matcher sm = START_TEXT.matcher(doc);
    sm.find();
    Matcher em = END_TEXT.matcher(doc);
    em.find();
    int start = sm.end();
    assert (start > 0);
    int end = em.start();
    assert (end > 0);
    String text = doc.substring(start, end);
    Annotation anno = nlp.process(text);
    int count = 0;
    for (CoreMap sent : anno.get(SentencesAnnotation.class)) {
        List<? extends CoreLabel> words = sent.get(TokensAnnotation.class);
        for (int i = 0; i < words.size(); i++) {
            String w = words.get(i).word();
            String tcw = words.get(i).get(TrueCaseTextAnnotation.class);
            if (!w.equals(tcw)) {
                System.err.print('"' + w + "\" true cased to \"" + tcw + "\" in context:");
                for (int j = Math.max(0, i - 2); j < Math.min(words.size(), i + 2); j++) {
                    System.err.print(" " + words.get(j).word());
                }
                System.err.println();
                count++;
            }
        }
    }
    System.err.println("True case change count: " + count);
    return count;
}
Also used : Matcher(java.util.regex.Matcher) CoreMap(edu.stanford.nlp.util.CoreMap) SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) TrueCaseTextAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TrueCaseTextAnnotation) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 14 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class PolarityITest method annotate.

private Polarity[] annotate(String text) {
    Annotation ann = new Annotation(text);
    pipeline.annotate(ann);
    List<CoreLabel> tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class);
    Polarity[] polarities = new Polarity[tokens.size()];
    for (int i = 0; i < tokens.size(); ++i) {
        polarities[i] = tokens.get(i).get(NaturalLogicAnnotations.PolarityAnnotation.class);
    }
    return polarities;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 15 with Annotation

use of edu.stanford.nlp.pipeline.Annotation in project CoreNLP by stanfordnlp.

the class QuantifiableEntityExtractorITest method runAndCheck.

private static void runAndCheck(String prefix, String[] sentences, ExpectedQuantity[][] expected) {
    for (int si = 0; si < sentences.length; si++) {
        String sentence = sentences[si];
        Annotation annotation = createDocument(sentence);
        List<MatchedExpression> matchedExpressions = extractor.extract(annotation);
        // Print out matched text and value
        if (expected == null) {
            for (MatchedExpression matchedExpression : matchedExpressions) {
                String text = matchedExpression.getText();
                Object value = matchedExpression.getValue();
                System.out.println(prefix + ": Got expression " + text + " with value " + value);
            }
            fail(prefix + ": No expected provided");
        } else {
            int minMatchable = Math.min(expected[si].length, matchedExpressions.size());
            for (int i = 0; i < minMatchable; i++) {
                ExpectedQuantity expectedQuantity = expected[si][i];
                MatchedExpression matched = matchedExpressions.get(i);
                SimpleQuantifiableEntity actualQuantity = (SimpleQuantifiableEntity) matched.getValue().get();
                assertEquals(prefix + ".matched." + si + "." + i + ".text", expectedQuantity.text, matched.getText());
                assertEquals(prefix + ".matched." + si + "." + i + ".normalizedValue", expectedQuantity.normalizedValue, actualQuantity.toString());
                assertEquals(prefix + ".matched." + si + "." + i + ".type", expectedQuantity.type, actualQuantity.getUnit().type);
            }
            assertEquals(prefix + ".length." + si, expected[si].length, matchedExpressions.size());
        }
    }
}
Also used : Annotation(edu.stanford.nlp.pipeline.Annotation) MatchedExpression(edu.stanford.nlp.ling.tokensregex.MatchedExpression)

Aggregations

Annotation (edu.stanford.nlp.pipeline.Annotation)138 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)84 CoreMap (edu.stanford.nlp.util.CoreMap)77 CoreLabel (edu.stanford.nlp.ling.CoreLabel)48 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)31 ArrayList (java.util.ArrayList)31 Properties (java.util.Properties)28 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)18 Test (org.junit.Test)18 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)15 Tree (edu.stanford.nlp.trees.Tree)14 TokensAnnotation (edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation)12 TreeAnnotation (edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation)12 List (java.util.List)12 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)11 IOException (java.io.IOException)11 CorefChain (edu.stanford.nlp.coref.data.CorefChain)10 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)10