use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method testTokenization.
/**
* test tokenization produces the correct number of constinuents.
*/
@Test
public void testTokenization() {
TextAnnotation ta = tab.createTextAnnotation(TOKEN_TEST);
View nerView = null;
try {
nerView = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertEquals(nerView.getConstituents().size(), 2);
String tokTestB = "Grigory Pasko, crusading Russian journalist who documented Russian Navy's mishandling of " + "nuclear waste, is released on parole after serving two-thirds of his four-year prison sentence.";
ta = tab.createTextAnnotation(tokTestB);
try {
nerView = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertEquals(3, nerView.getNumberOfConstituents());
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method evaluatePerformance.
/**
* Make sure it runs in reasonable time. We will test the performance of the machine we run on
* to get a better measure.
*/
// @Test
public void evaluatePerformance() {
// now do performance.
final int SIZE = 100;
// make sure any lazy loading is done outside the performance test.
TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
try {
getView(tat);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
long expectedPerformance = this.measureMachinePerformance();
logger.info("Expect " + expectedPerformance);
{
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
}
// start the performance test.
long start = System.currentTimeMillis();
for (int i = 0; i < SIZE; i++) {
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
for (Constituent c : view.getConstituents()) {
assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
}
}
start = System.currentTimeMillis() - start;
start /= SIZE;
System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
assertTrue(start <= expectedPerformance);
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class NerInitTest method testInit.
@Test
public void testInit() {
Properties props = new Properties();
props.setProperty(NerBaseConfigurator.GAZETTEER_FEATURES, "0");
props.setProperty(NerBaseConfigurator.BROWN_CLUSTER_PATHS, "0");
ResourceManager rm = (new NerBaseConfigurator()).getConfig(new ResourceManager(props));
NERAnnotator ner = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
assertNotNull(ner);
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = tab.createTextAnnotation(TESTSTR);
try {
ner.getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assert (ta.hasView(ViewNames.NER_CONLL));
assertEquals(ta.getView(ViewNames.NER_CONLL).getConstituents().size(), 2);
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class StanfordDepHandler method addView.
@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
// If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
// parse
StanfordParseHandler.checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck, maxParseSentenceLength);
TreeView treeView = new TreeView(ViewNames.DEPENDENCY_STANFORD, "StanfordDepHandler", textAnnotation, 1d);
// The (tokenized) sentence offset in case we have more than one sentences in the record
List<CoreMap> sentences = StanfordParseHandler.buildStanfordSentences(textAnnotation);
Annotation document = new Annotation(sentences);
posAnnotator.annotate(document);
parseAnnotator.annotate(document);
sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
// This is most like because we ran out of time
throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". " + "This is most likely due to a timeout.");
}
for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
boolean runtimeExceptionWasThrown = false;
CoreMap sentence = sentences.get(sentenceId);
if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
logger.warn(HandlerUtils.getSentenceLengthError(textAnnotation.getId(), sentence.toString(), maxParseSentenceLength));
} else {
SemanticGraph depGraph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
IndexedWord root = null;
try {
root = depGraph.getFirstRoot();
} catch (RuntimeException e) {
String msg = "ERROR in getting root of dep graph for sentence. Sentence is:\n" + sentence.toString() + "'\nDependency graph is:\n" + depGraph.toCompactString() + "\nText is:\n" + textAnnotation.getText();
logger.error(msg);
System.err.println(msg);
e.printStackTrace();
if (throwExceptionOnSentenceLengthCheck)
throw e;
else
runtimeExceptionWasThrown = true;
}
if (!runtimeExceptionWasThrown) {
int tokenStart = getNodePosition(textAnnotation, root, sentenceId);
Pair<String, Integer> nodePair = new Pair<>(root.originalText(), tokenStart);
Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
populateChildren(depGraph, root, tree, textAnnotation, sentenceId);
treeView.setDependencyTree(sentenceId, tree);
}
}
}
textAnnotation.addView(getViewName(), treeView);
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class StanfordParseHandler method checkLength.
static void checkLength(TextAnnotation textAnnotation, boolean throwExceptionOnSentenceLengthCheck, int maxParseSentenceLength) throws AnnotatorException {
if (throwExceptionOnSentenceLengthCheck) {
Constituent c = HandlerUtils.checkTextAnnotationRespectsSentenceLengthLimit(textAnnotation, maxParseSentenceLength);
if (null != c) {
String msg = HandlerUtils.getSentenceLengthError(textAnnotation.getId(), c.getSurfaceForm(), maxParseSentenceLength);
logger.error(msg);
throw new AnnotatorException(msg);
}
}
}
Aggregations