Search in sources :

Example 21 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class StanfordParseHandler method addView.

@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
    // If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
    // parse
    checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
    TreeView treeView = new TreeView(ViewNames.PARSE_STANFORD, "StanfordParseHandler", textAnnotation, 1d);
    // The (tokenized) sentence offset in case we have more than one sentences in the record
    List<CoreMap> sentences = buildStanfordSentences(textAnnotation);
    Annotation document = new Annotation(sentences);
    posAnnotator.annotate(document);
    parseAnnotator.annotate(document);
    sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
        // This is most like because we ran out of time
        throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
    }
    for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
        CoreMap sentence = sentences.get(sentenceId);
        if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
            logger.warn("Unable to parse TextAnnotation " + textAnnotation.getId() + " since it is larger than the maximum sentence length of the parser (" + maxParseSentenceLength + ").");
        } else {
            edu.stanford.nlp.trees.Tree stanfordTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            Tree<String> tree = new Tree<>(stanfordTree.value());
            for (edu.stanford.nlp.trees.Tree pt : stanfordTree.getChildrenAsList()) {
                tree.addSubtree(generateNode(pt));
            }
            treeView.setParseTree(sentenceId, tree);
        }
    }
    textAnnotation.addView(getViewName(), treeView);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) Tree(edu.illinois.cs.cogcomp.core.datastructures.trees.Tree) CoreMap(edu.stanford.nlp.util.CoreMap) ArrayCoreMap(edu.stanford.nlp.util.ArrayCoreMap)

Example 22 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class RunPipeline method main.

public static void main(String[] args) {
    if (args.length != 3) {
        System.err.println("Usage: " + NAME + " config inputFile/inputDirectory outFile/outputDirectory");
        System.exit(-1);
    }
    String config = args[0];
    String inFileName = args[1];
    String outFileName = args[2];
    File inFile = new File(inFileName);
    File outFile = new File(outFileName);
    if (!inFile.exists()) {
        System.err.println("input source '" + inFileName + "' does not exist.");
        System.exit(-1);
    }
    if (!outFile.exists()) {
        System.err.println("output file/directory '" + outFileName + "' does not exist.");
        System.exit(-1);
    }
    RunPipeline rp = null;
    try {
        rp = new RunPipeline(config);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }
    try {
        if (inFile.isDirectory()) {
            if (!outFile.isDirectory()) {
                System.err.println("output '" + outFileName + "' is not a directory, but input '" + inFileName + "' is a directory. Input and Output must both be either files or directories.");
            } else
                rp.RunPipelineOnDataset(Paths.get(inFileName), Paths.get(outFileName));
        } else {
            TextAnnotation ta = rp.RunPipelineOnFile(inFileName);
            SerializationHelper.serializeTextAnnotationToFile(ta, outFileName, true, true);
            System.out.println("Processed file.  TextAnnotation.toString(): " + ta.toString());
        }
    } catch (AnnotatorException | IOException e) {
        e.printStackTrace();
    }
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) IOException(java.io.IOException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) File(java.io.File) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException)

Example 23 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class CachingPipelineTest method testCachingPipeline.

@Test
public void testCachingPipeline() {
    TextAnnotation ta = null;
    String newText = "This is some text that the USA hasn't seen from Bill Smith before...";
    try {
        ta = processor.createBasicTextAnnotation("test", "test", newText);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertFalse(ta.hasView(ViewNames.SHALLOW_PARSE));
    assertFalse(ta.hasView(ViewNames.NER_CONLL));
    String[] viewsToAdd = { ViewNames.SHALLOW_PARSE, ViewNames.NER_CONLL };
    Set<String> viewNames = new HashSet<>();
    Collections.addAll(viewNames, viewsToAdd);
    try {
        ta = processor.addViewsAndCache(ta, viewNames, false);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(ta.hasView(ViewNames.SHALLOW_PARSE));
    assertTrue(ta.hasView(ViewNames.NER_CONLL));
    assertTrue(IOUtils.exists(TEST_CACHE_FILE));
    try {
        processor.addView(ta, ViewNames.QUANTITIES);
    } catch (AnnotatorException e) {
        e.printStackTrace();
    }
    assertTrue(ta.hasView(ViewNames.QUANTITIES));
    System.out.println(ta.getView(ViewNames.QUANTITIES));
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 24 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class CachingPipelineTest method stanfordParseHandler.

@Test
public void stanfordParseHandler() {
    String text = "In the United States, Cinco de Mayo has taken on a significance beyond that in Mexico. ";
    TextAnnotation basicTextAnnotation = null;
    try {
        basicTextAnnotation = processor.createBasicTextAnnotation("test", "test", text);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    try {
        processor.addView(basicTextAnnotation, ViewNames.DEPENDENCY_STANFORD);
        processor.addView(basicTextAnnotation, ViewNames.PARSE_STANFORD);
    } catch (RuntimeException | AnnotatorException e) {
        e.printStackTrace();
        System.out.println("Expected exception from stanford.");
    }
    String predictedDepTree = basicTextAnnotation.getView(ViewNames.DEPENDENCY_STANFORD).toString();
    String goldDepTree = "(taken (:LABEL:prep In (:LABEL:pobj States :LABEL:det the\n" + "                    :LABEL:nn United))\n" + "       (:LABEL:nsubj Cinco (:LABEL:prep de :LABEL:pobj Mayo))\n" + "       :LABEL:aux has\n" + "       (:LABEL:prep on (:LABEL:pobj significance :LABEL:det a))\n" + "       (:LABEL:prep beyond (:LABEL:pobj that (:LABEL:prep in :LABEL:pobj Mexico))))";
    assertEquals("DEPENDENCY_STANFORD - Dependency parse tree should match gold parse.", predictedDepTree.trim(), goldDepTree);
    String predictedParseTree = basicTextAnnotation.getView(ViewNames.PARSE_STANFORD).toString();
    String goldParseTree = "(ROOT (S (PP (IN In)\n" + "    (NP (DT the)\n" + "        (NNP United)\n" + "        (NNPS States)))\n" + "   (, ,)\n" + "   (NP (NP (NNP Cinco))\n" + "       (PP (IN de)\n" + "           (NP (NNP Mayo))))\n" + "   (VP (VBZ has)\n" + "       (VP (VBN taken)\n" + "           (PP (IN on)\n" + "               (NP (DT a)\n" + "                   (NN significance)))\n" + "           (PP (IN beyond)\n" + "               (NP (NP (DT that))\n" + "                   (PP (IN in)\n" + "                       (NP (NNP Mexico)))))))\n" + "   (. .)))";
    assertEquals("PARSE_STANFORD - Constituency parse tree  generated should match gold parse.", predictedParseTree.trim(), goldParseTree);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 25 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class CachingPipelineTest method testHyphenSplit.

@Test
public void testHyphenSplit() {
    String source = "The man said that Jean-Pierre Thibault was only present from 2002-2003.  Jean-Pierre (" + "also known as John-Paul) saw fit to share this only last Tuesday- who knows why.";
    TextAnnotation basicTextAnnotation = null;
    try {
        basicTextAnnotation = processor.createBasicTextAnnotation("test", "test", source);
        processor.addView(basicTextAnnotation, ViewNames.NER_CONLL);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(basicTextAnnotation.hasView(ViewNames.NER_CONLL));
    List<Constituent> nes = basicTextAnnotation.getView(ViewNames.NER_CONLL).getConstituents();
    assertEquals(3, nes.size());
    String tokForm = nes.get(0).getTokenizedSurfaceForm();
    assertEquals("Jean-Pierre Thibault", tokForm);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)39 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)28 Test (org.junit.Test)14 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)11 IOException (java.io.IOException)8 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)7 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)6 FileNotFoundException (java.io.FileNotFoundException)6 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)4 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)3 Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)3 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)3 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)3 Properties (java.util.Properties)3 AnnotatorService (edu.illinois.cs.cogcomp.annotation.AnnotatorService)2 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)2 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)2 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)2 Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)2