use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.ERENerReader in project cogcomp-nlp by CogComp.
the class EREReaderTest method runTest.
private static XmlTextAnnotation runTest(EreCorpus ereCorpus, String corpusRoot) {
ERENerReader nerReader = null;
boolean addNominalMentions = true;
boolean throwExceptionOnXmlTagMismatch = true;
try {
nerReader = new EREMentionRelationReader(ereCorpus, corpusRoot, throwExceptionOnXmlTagMismatch);
} catch (Exception e) {
e.printStackTrace();
System.err.println("ERROR: " + NAME + ": couldn't instantiate ERENerReader for ERE release " + ereCorpus.name() + ": " + e.getMessage());
}
XmlTextAnnotation outputXmlTa = nerReader.next();
TextAnnotation output = outputXmlTa.getTextAnnotation();
View nerEre = null;
if (addNominalMentions) {
assert (output.hasView(ViewNames.MENTION_ERE));
nerEre = output.getView(ViewNames.MENTION_ERE);
} else {
assert (output.hasView(ViewNames.NER_ERE));
nerEre = output.getView(ViewNames.NER_ERE);
}
assert (nerEre.getConstituents().size() > 0);
StringTransformation xmlSt = outputXmlTa.getXmlSt();
String origXmlStr = xmlSt.getOrigText();
System.out.println("ERENerReader found " + nerEre.getConstituents().size() + " NER constituents: ");
for (Constituent c : nerEre.getConstituents()) {
System.out.println(TextAnnotationPrintHelper.printConstituent(c));
int start = c.getStartCharOffset();
int end = c.getEndCharOffset();
IntPair origOffsets = xmlSt.getOriginalOffsets(start, end);
String origStr = origXmlStr.substring(origOffsets.getFirst(), origOffsets.getSecond());
System.out.println("Constituent (clean) text: '" + c.getSurfaceForm() + "'");
System.out.println("Original text: '" + origStr + "'\n---------\n");
}
System.out.println("Report: " + nerReader.generateReport());
return outputXmlTa;
}
Aggregations