use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReaderWithTrueCaseFixer in project cogcomp-nlp by CogComp.
the class BIOTester method statistics.
public static void statistics() {
int ace_nam = 0;
int ace_nom = 0;
int ace_pro = 0;
int ere_nam = 0;
int ere_nom = 0;
int ere_pro = 0;
int tac_nam = 0;
int tac_nom = 0;
try {
ACEReaderWithTrueCaseFixer aceReader = new ACEReaderWithTrueCaseFixer("data/all", false);
for (TextAnnotation ta : aceReader) {
for (Constituent c : ta.getView(ViewNames.MENTION_ACE)) {
if (c.getAttribute("EntityMentionType").equals("NAM")) {
ace_nam++;
}
if (c.getAttribute("EntityMentionType").equals("NOM")) {
ace_nom++;
}
if (c.getAttribute("EntityMentionType").equals("PRO")) {
ace_pro++;
}
}
}
EREMentionRelationReader ereReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false);
for (XmlTextAnnotation xta : ereReader) {
TextAnnotation ta = xta.getTextAnnotation();
for (Constituent c : ta.getView(ViewNames.MENTION_ERE)) {
if (c.getAttribute("EntityMentionType").equals("NAM")) {
ere_nam++;
}
if (c.getAttribute("EntityMentionType").equals("NOM")) {
ere_nom++;
}
if (c.getAttribute("EntityMentionType").equals("PRO")) {
ere_pro++;
}
}
}
ColumnFormatReader columnFormatReader = new ColumnFormatReader("data/tac/2016.nam");
for (TextAnnotation ta : columnFormatReader) {
for (Constituent c : ta.getView("MENTIONS")) {
tac_nam++;
}
}
columnFormatReader = new ColumnFormatReader("data/tac/2016.nom");
for (TextAnnotation ta : columnFormatReader) {
for (Constituent c : ta.getView("MENTIONS")) {
tac_nom++;
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("ACE_NAM: " + ace_nam);
System.out.println("ACE_NOM: " + ace_nom);
System.out.println("ACE_PRO: " + ace_pro);
System.out.println("ERE_NAM: " + ere_nam);
System.out.println("ERE_NOM: " + ere_nom);
System.out.println("ERE_PRO: " + ere_pro);
System.out.println("TAC_NAM: " + tac_nam);
System.out.println("TAC_NOM: " + tac_nom);
}
use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReaderWithTrueCaseFixer in project cogcomp-nlp by CogComp.
the class ExtentReader method getTextAnnotations.
public List<TextAnnotation> getTextAnnotations() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
List<TextAnnotation> ret = new ArrayList<>();
if (_corpus.equals("ACE")) {
ACEReaderWithTrueCaseFixer aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
aceReader = new ACEReaderWithTrueCaseFixer(_path, false);
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.equals("ERE")) {
EREMentionRelationReader ereMentionRelationReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, _path, false);
for (XmlTextAnnotation xta : ereMentionRelationReader) {
TextAnnotation ta = xta.getTextAnnotation();
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.startsWith("COMBINED")) {
String realCorpus = _corpus.split("-")[1];
String mode = _corpus.split("-")[2];
int fold = Integer.parseInt(_corpus.split("-")[3]);
BIOCombinedReader bioCombinedReader = new BIOCombinedReader(fold, realCorpus + "-" + mode, "ALL", true);
for (Object ta = bioCombinedReader.next(); ta != null; ta = bioCombinedReader.next()) {
ret.add((TextAnnotation) ta);
}
}
return ret;
}
use of edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReaderWithTrueCaseFixer in project cogcomp-nlp by CogComp.
the class RelationExtractionTest method testAnnotator.
@Test
public void testAnnotator() {
File modelDir = null;
try {
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
modelDir = ds.getDirectory("org.cogcomp.re", "ACE_TEST_DOCS", 1.1, false);
} catch (Exception e) {
e.printStackTrace();
}
try {
ACEReaderWithTrueCaseFixer aceReader = new ACEReaderWithTrueCaseFixer(modelDir.getAbsolutePath() + File.separator + "ACE_TEST_DOCS", false);
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
RelationAnnotator relationAnnotator = new RelationAnnotator();
for (TextAnnotation ta : aceReader) {
ta.addView(pos_annotator);
chunker.addView(ta);
stanfordDepHandler.addView(ta);
mentionAnnotator.addView(ta);
relationAnnotator.addView(ta);
View mentionView = ta.getView(ViewNames.MENTION);
assertTrue(mentionView.getConstituents().size() > 0);
View relationView = ta.getView(ViewNames.RELATION);
assertTrue(relationView.getRelations().size() > 0);
}
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations