use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class StanfordParseHandler method addView.
@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
// If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
// parse
checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
TreeView treeView = new TreeView(ViewNames.PARSE_STANFORD, "StanfordParseHandler", textAnnotation, 1d);
// The (tokenized) sentence offset in case we have more than one sentences in the record
List<CoreMap> sentences = buildStanfordSentences(textAnnotation);
Annotation document = new Annotation(sentences);
posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
// This is most like because we ran out of time
throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
}
for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
CoreMap sentence = sentences.get(sentenceId);
if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
logger.warn("Unable to parse TextAnnotation " + textAnnotation.getId() + " since it is larger than the maximum sentence length of the parser (" + maxParseSentenceLength + ").");
} else {
edu.stanford.nlp.trees.Tree stanfordTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
Tree<String> tree = new Tree<>(stanfordTree.value());
for (edu.stanford.nlp.trees.Tree pt : stanfordTree.getChildrenAsList()) {
tree.addSubtree(generateNode(pt));
}
treeView.setParseTree(sentenceId, tree);
}
}
textAnnotation.addView(getViewName(), treeView);
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class RunPipeline method main.
public static void main(String[] args) {
if (args.length != 3) {
System.err.println("Usage: " + NAME + " config inputFile/inputDirectory outFile/outputDirectory");
System.exit(-1);
}
String config = args[0];
String inFileName = args[1];
String outFileName = args[2];
File inFile = new File(inFileName);
File outFile = new File(outFileName);
if (!inFile.exists()) {
System.err.println("input source '" + inFileName + "' does not exist.");
System.exit(-1);
}
if (!outFile.exists()) {
System.err.println("output file/directory '" + outFileName + "' does not exist.");
System.exit(-1);
}
RunPipeline rp = null;
try {
rp = new RunPipeline(config);
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
try {
if (inFile.isDirectory()) {
if (!outFile.isDirectory()) {
System.err.println("output '" + outFileName + "' is not a directory, but input '" + inFileName + "' is a directory. Input and Output must both be either files or directories.");
} else
rp.RunPipelineOnDataset(Paths.get(inFileName), Paths.get(outFileName));
} else {
TextAnnotation ta = rp.RunPipelineOnFile(inFileName);
SerializationHelper.serializeTextAnnotationToFile(ta, outFileName, true, true);
System.out.println("Processed file. TextAnnotation.toString(): " + ta.toString());
}
} catch (AnnotatorException | IOException e) {
e.printStackTrace();
}
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class CachingPipelineTest method testCachingPipeline.
@Test
public void testCachingPipeline() {
TextAnnotation ta = null;
String newText = "This is some text that the USA hasn't seen from Bill Smith before...";
try {
ta = processor.createBasicTextAnnotation("test", "test", newText);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertFalse(ta.hasView(ViewNames.SHALLOW_PARSE));
assertFalse(ta.hasView(ViewNames.NER_CONLL));
String[] viewsToAdd = { ViewNames.SHALLOW_PARSE, ViewNames.NER_CONLL };
Set<String> viewNames = new HashSet<>();
Collections.addAll(viewNames, viewsToAdd);
try {
ta = processor.addViewsAndCache(ta, viewNames, false);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(ta.hasView(ViewNames.SHALLOW_PARSE));
assertTrue(ta.hasView(ViewNames.NER_CONLL));
assertTrue(IOUtils.exists(TEST_CACHE_FILE));
try {
processor.addView(ta, ViewNames.QUANTITIES);
} catch (AnnotatorException e) {
e.printStackTrace();
}
assertTrue(ta.hasView(ViewNames.QUANTITIES));
System.out.println(ta.getView(ViewNames.QUANTITIES));
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class CachingPipelineTest method stanfordParseHandler.
@Test
public void stanfordParseHandler() {
String text = "In the United States, Cinco de Mayo has taken on a significance beyond that in Mexico. ";
TextAnnotation basicTextAnnotation = null;
try {
basicTextAnnotation = processor.createBasicTextAnnotation("test", "test", text);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
try {
processor.addView(basicTextAnnotation, ViewNames.DEPENDENCY_STANFORD);
processor.addView(basicTextAnnotation, ViewNames.PARSE_STANFORD);
} catch (RuntimeException | AnnotatorException e) {
e.printStackTrace();
System.out.println("Expected exception from stanford.");
}
String predictedDepTree = basicTextAnnotation.getView(ViewNames.DEPENDENCY_STANFORD).toString();
String goldDepTree = "(taken (:LABEL:prep In (:LABEL:pobj States :LABEL:det the\n" + " :LABEL:nn United))\n" + " (:LABEL:nsubj Cinco (:LABEL:prep de :LABEL:pobj Mayo))\n" + " :LABEL:aux has\n" + " (:LABEL:prep on (:LABEL:pobj significance :LABEL:det a))\n" + " (:LABEL:prep beyond (:LABEL:pobj that (:LABEL:prep in :LABEL:pobj Mexico))))";
assertEquals("DEPENDENCY_STANFORD - Dependency parse tree should match gold parse.", predictedDepTree.trim(), goldDepTree);
String predictedParseTree = basicTextAnnotation.getView(ViewNames.PARSE_STANFORD).toString();
String goldParseTree = "(ROOT (S (PP (IN In)\n" + " (NP (DT the)\n" + " (NNP United)\n" + " (NNPS States)))\n" + " (, ,)\n" + " (NP (NP (NNP Cinco))\n" + " (PP (IN de)\n" + " (NP (NNP Mayo))))\n" + " (VP (VBZ has)\n" + " (VP (VBN taken)\n" + " (PP (IN on)\n" + " (NP (DT a)\n" + " (NN significance)))\n" + " (PP (IN beyond)\n" + " (NP (NP (DT that))\n" + " (PP (IN in)\n" + " (NP (NNP Mexico)))))))\n" + " (. .)))";
assertEquals("PARSE_STANFORD - Constituency parse tree generated should match gold parse.", predictedParseTree.trim(), goldParseTree);
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class CachingPipelineTest method testHyphenSplit.
@Test
public void testHyphenSplit() {
String source = "The man said that Jean-Pierre Thibault was only present from 2002-2003. Jean-Pierre (" + "also known as John-Paul) saw fit to share this only last Tuesday- who knows why.";
TextAnnotation basicTextAnnotation = null;
try {
basicTextAnnotation = processor.createBasicTextAnnotation("test", "test", source);
processor.addView(basicTextAnnotation, ViewNames.NER_CONLL);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(basicTextAnnotation.hasView(ViewNames.NER_CONLL));
List<Constituent> nes = basicTextAnnotation.getView(ViewNames.NER_CONLL).getConstituents();
assertEquals(3, nes.size());
String tokForm = nes.get(0).getTokenizedSurfaceForm();
assertEquals("Jean-Pierre Thibault", tokForm);
}
Aggregations