use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class ExtentReader method getTextAnnotations.
public List<TextAnnotation> getTextAnnotations() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
List<TextAnnotation> ret = new ArrayList<>();
if (_corpus.equals("ACE")) {
ACEReaderWithTrueCaseFixer aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
aceReader = new ACEReaderWithTrueCaseFixer(_path, false);
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.equals("ERE")) {
EREMentionRelationReader ereMentionRelationReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
try {
ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, _path, false);
for (XmlTextAnnotation xta : ereMentionRelationReader) {
TextAnnotation ta = xta.getTextAnnotation();
ta.addView(posAnnotator);
ret.add(ta);
}
} catch (Exception e) {
e.printStackTrace();
}
}
if (_corpus.startsWith("COMBINED")) {
String realCorpus = _corpus.split("-")[1];
String mode = _corpus.split("-")[2];
int fold = Integer.parseInt(_corpus.split("-")[3]);
BIOCombinedReader bioCombinedReader = new BIOCombinedReader(fold, realCorpus + "-" + mode, "ALL", true);
for (Object ta = bioCombinedReader.next(); ta != null; ta = bioCombinedReader.next()) {
ret.add((TextAnnotation) ta);
}
}
return ret;
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class AnnotatorTester method test_custom_annotator.
public static void test_custom_annotator() {
ACEReader aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
int total_labeled = 0;
int total_predicted = 0;
int total_correct = 0;
int total_type_correct = 0;
int total_extent_correct = 0;
try {
aceReader = new ACEReader("data/partition_with_dev/dev", false);
MentionAnnotator mentionAnnotator = new MentionAnnotator("", "models/TAC_NOM", "", "", "");
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
mentionAnnotator.addView(ta);
total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
gc.addAttribute("EntityType", gc.getLabel());
Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
if (gch == null) {
continue;
}
if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
total_correct++;
if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
total_type_correct++;
}
if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
total_extent_correct++;
}
break;
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Labeled: " + total_labeled);
System.out.println("Predicted: " + total_predicted);
System.out.println("Correct: " + total_correct);
System.out.println("Type Correct: " + total_type_correct);
System.out.println("Extent Correct: " + total_extent_correct);
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class AnnotatorTester method test_basic_annotator.
/**
* By default, this function uses the ACE model trained with Type on ACE corpus, should have a fairly high performance.
*/
public static void test_basic_annotator() {
ACEReader aceReader = null;
POSAnnotator posAnnotator = new POSAnnotator();
int total_labeled = 0;
int total_predicted = 0;
int total_correct = 0;
int total_type_correct = 0;
int total_extent_correct = 0;
try {
aceReader = new ACEReader("data/partition_with_dev/dev", false);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_NONTYPE");
for (TextAnnotation ta : aceReader) {
ta.addView(posAnnotator);
mentionAnnotator.addView(ta);
total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
gc.addAttribute("EntityType", gc.getLabel());
Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
if (gch == null) {
continue;
}
if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
total_correct++;
if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
total_type_correct++;
}
if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
total_extent_correct++;
}
break;
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Labeled: " + total_labeled);
System.out.println("Predicted: " + total_predicted);
System.out.println("Correct: " + total_correct);
System.out.println("Type Correct: " + total_type_correct);
System.out.println("Extent Correct: " + total_extent_correct);
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class ExampleUsage method SemEvalAnnotate.
public static void SemEvalAnnotate() {
String text = "People have been moving back into downtown.";
String corpus = "semeval";
String textId = "001";
// Create a TextAnnotation From Text
TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
String modelPath = "";
FlatGazetteers gazetteers = null;
try {
ta.addView(pos_annotator);
chunker.addView(ta);
stanfordDepHandler.addView(ta);
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
File model = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false);
modelPath = model.getPath();
File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
WordNetManager.loadConfigAsClasspathResource(true);
WordNetManager wordnet = WordNetManager.getInstance();
View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
Constituent c = co.cloneForNewView("RE_ANNOTATED");
for (String s : co.getAttributeKeys()) {
c.addAttribute(s, co.getAttribute(s));
}
c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, c));
c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, c));
annotatedTokenView.addConstituent(c);
}
ta.addView("RE_ANNOTATED", annotatedTokenView);
} catch (Exception e) {
e.printStackTrace();
}
Constituent source = new Constituent("first", "Mention", ta, 0, 1);
Constituent target = new Constituent("second", "Mention", ta, 6, 7);
source.addAttribute("GAZ", gazetteers.annotatePhrase(source));
target.addAttribute("GAZ", gazetteers.annotatePhrase(target));
Relation relation = new Relation("TEST", source, target, 1.0f);
String prefix = modelPath + File.separator + "SEMEVAL" + File.separator + "SEMEVAL";
semeval_relation_classifier classifier = new semeval_relation_classifier(prefix + ".lc", prefix + ".lex");
String tag = classifier.discreteValue(relation);
System.out.println(tag);
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class ExampleUsage method AnnotatorExample.
public static void AnnotatorExample() {
String text = "He went to Chicago after his Father moved there.";
String corpus = "story";
String textId = "001";
// Create a TextAnnotation From Text
TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
RelationAnnotator relationAnnotator = new RelationAnnotator();
try {
ta.addView(pos_annotator);
chunker.addView(ta);
stanfordDepHandler.addView(ta);
relationAnnotator.addView(ta);
} catch (Exception e) {
e.printStackTrace();
}
View mentionView = ta.getView(ViewNames.MENTION);
List<Constituent> predictedMentions = mentionView.getConstituents();
List<Relation> predictedRelations = mentionView.getRelations();
for (Relation r : predictedRelations) {
IOHelper.printRelation(r);
}
}
Aggregations