Search in sources :

Example 16 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class NERAnnotatorTest method testTokenization.

/**
     * test tokenization produces the correct number of constinuents.
     */
@Test
public void testTokenization() {
    TextAnnotation ta = tab.createTextAnnotation(TOKEN_TEST);
    View nerView = null;
    try {
        nerView = getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertEquals(nerView.getConstituents().size(), 2);
    String tokTestB = "Grigory Pasko, crusading Russian journalist who documented Russian Navy's mishandling of " + "nuclear waste, is released on parole after serving two-thirds of his four-year prison sentence.";
    ta = tab.createTextAnnotation(tokTestB);
    try {
        nerView = getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertEquals(3, nerView.getNumberOfConstituents());
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Test(org.junit.Test)

Example 17 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class NERAnnotatorTest method evaluatePerformance.

/**
     * Make sure it runs in reasonable time. We will test the performance of the machine we run on
     * to get a better measure.
     */
// @Test
public void evaluatePerformance() {
    // now do performance.
    final int SIZE = 100;
    // make sure any lazy loading is done outside the performance test.
    TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
    try {
        getView(tat);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    long expectedPerformance = this.measureMachinePerformance();
    logger.info("Expect " + expectedPerformance);
    {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
    }
    // start the performance test.
    long start = System.currentTimeMillis();
    for (int i = 0; i < SIZE; i++) {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
        for (Constituent c : view.getConstituents()) {
            assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
        }
    }
    start = System.currentTimeMillis() - start;
    start /= SIZE;
    System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
    assertTrue(start <= expectedPerformance);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 18 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class NerInitTest method testInit.

@Test
public void testInit() {
    Properties props = new Properties();
    props.setProperty(NerBaseConfigurator.GAZETTEER_FEATURES, "0");
    props.setProperty(NerBaseConfigurator.BROWN_CLUSTER_PATHS, "0");
    ResourceManager rm = (new NerBaseConfigurator()).getConfig(new ResourceManager(props));
    NERAnnotator ner = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
    assertNotNull(ner);
    TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = tab.createTextAnnotation(TESTSTR);
    try {
        ner.getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assert (ta.hasView(ViewNames.NER_CONLL));
    assertEquals(ta.getView(ViewNames.NER_CONLL).getConstituents().size(), 2);
}
Also used : NerBaseConfigurator(edu.illinois.cs.cogcomp.ner.config.NerBaseConfigurator) TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) Properties(java.util.Properties) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Test(org.junit.Test)

Example 19 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class StanfordDepHandler method addView.

@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
    // If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
    // parse
    StanfordParseHandler.checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
    TreeView treeView = new TreeView(ViewNames.DEPENDENCY_STANFORD, "StanfordDepHandler", textAnnotation, 1d);
    // The (tokenized) sentence offset in case we have more than one sentences in the record
    List<CoreMap> sentences = StanfordParseHandler.buildStanfordSentences(textAnnotation);
    Annotation document = new Annotation(sentences);
    posAnnotator.annotate(document);
    parseAnnotator.annotate(document);
    sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
        // This is most like because we ran out of time
        throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
    }
    for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
        boolean runtimeExceptionWasThrown = false;
        CoreMap sentence = sentences.get(sentenceId);
        if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
            logger.warn(HandlerUtils.getSentenceLengthError(textAnnotation.getId(), sentence.toString(), maxParseSentenceLength));
        } else {
            SemanticGraph depGraph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
            IndexedWord root = null;
            try {
                root = depGraph.getFirstRoot();
            } catch (RuntimeException e) {
                String msg = "ERROR in getting root of dep graph for sentence.  Sentence is:\n" + sentence.toString() + "'\nDependency graph is:\n" + depGraph.toCompactString() + "\nText is:\n" + textAnnotation.getText();
                logger.error(msg);
                System.err.println(msg);
                e.printStackTrace();
                if (throwExceptionOnSentenceLengthCheck)
                    throw e;
                else
                    runtimeExceptionWasThrown = true;
            }
            if (!runtimeExceptionWasThrown) {
                int tokenStart = getNodePosition(textAnnotation, root, sentenceId);
                Pair<String, Integer> nodePair = new Pair<>(root.originalText(), tokenStart);
                Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
                populateChildren(depGraph, root, tree, textAnnotation, sentenceId);
                treeView.setDependencyTree(sentenceId, tree);
            }
        }
    }
    textAnnotation.addView(getViewName(), treeView);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 20 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class StanfordParseHandler method checkLength.

static void checkLength(TextAnnotation textAnnotation, boolean throwExceptionOnSentenceLengthCheck, int maxParseSentenceLength) throws AnnotatorException {
    if (throwExceptionOnSentenceLengthCheck) {
        Constituent c = HandlerUtils.checkTextAnnotationRespectsSentenceLengthLimit(textAnnotation, maxParseSentenceLength);
        if (null != c) {
            String msg = HandlerUtils.getSentenceLengthError(textAnnotation.getId(), c.getSurfaceForm(), maxParseSentenceLength);
            logger.error(msg);
            throw new AnnotatorException(msg);
        }
    }
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)39 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)28 Test (org.junit.Test)14 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)11 IOException (java.io.IOException)8 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)7 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)6 FileNotFoundException (java.io.FileNotFoundException)6 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)4 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)3 Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)3 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)3 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)3 Properties (java.util.Properties)3 AnnotatorService (edu.illinois.cs.cogcomp.annotation.AnnotatorService)2 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)2 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)2 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)2 Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)2