use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.
the class ExampleUsage method AnnotatorExample.
public static void AnnotatorExample() {
String text = "He went to Chicago after his Father moved there.";
String corpus = "story";
String textId = "001";
// Create a TextAnnotation From Text
TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
RelationAnnotator relationAnnotator = new RelationAnnotator();
try {
ta.addView(pos_annotator);
chunker.addView(ta);
stanfordDepHandler.addView(ta);
relationAnnotator.addView(ta);
} catch (Exception e) {
e.printStackTrace();
}
View mentionView = ta.getView(ViewNames.MENTION);
List<Constituent> predictedMentions = mentionView.getConstituents();
List<Relation> predictedRelations = mentionView.getRelations();
for (Relation r : predictedRelations) {
IOHelper.printRelation(r);
}
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project CoreNLP by stanfordnlp.
the class SUTimeSimpleParser method makeNumericPipeline.
private static AnnotationPipeline makeNumericPipeline() {
AnnotationPipeline pipeline = new AnnotationPipeline();
pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(false));
pipeline.addAnnotator(new TimeAnnotator(true));
return pipeline;
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.
the class ACERelationTester method testRandomText.
public static void testRandomText(String text) {
String corpus = "";
String textId = "";
TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
try {
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
RelationAnnotator relationAnnotator = new RelationAnnotator();
ta.addView(pos_annotator);
stanfordDepHandler.addView(ta);
chunker.addView(ta);
mentionAnnotator.addView(ta);
relationAnnotator.addView(ta);
for (Relation r : ta.getView(ViewNames.RELATION).getRelations()) {
IOHelper.printRelation(r);
}
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.
the class ACERelationTester method testAnnotator.
public static void testAnnotator() {
int total_correct = 0;
int total_labeled = 0;
int total_predicted = 0;
int total_coarse_correct = 0;
try {
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
ACEReader aceReader = new ACEReader("data/partition_with_dev/dev", false);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
RelationAnnotator relationAnnotator = new RelationAnnotator();
for (TextAnnotation ta : aceReader) {
ta.addView(pos_annotator);
stanfordDepHandler.addView(ta);
chunker.addView(ta);
mentionAnnotator.addView(ta);
relationAnnotator.addView(ta);
total_labeled += ta.getView(ViewNames.MENTION_ACE).getRelations().size();
total_predicted += ta.getView(ViewNames.RELATION).getRelations().size();
for (Relation pr : ta.getView(ViewNames.RELATION).getRelations()) {
for (Relation gr : ta.getView(ViewNames.MENTION_ACE).getRelations()) {
Constituent prSourceHead = RelationFeatureExtractor.getEntityHeadForConstituent(pr.getSource(), ta, "");
Constituent grSourceHead = RelationFeatureExtractor.getEntityHeadForConstituent(gr.getSource(), ta, "");
Constituent prTargetHead = RelationFeatureExtractor.getEntityHeadForConstituent(pr.getTarget(), ta, "");
Constituent grTargetHead = RelationFeatureExtractor.getEntityHeadForConstituent(gr.getTarget(), ta, "");
if (prSourceHead.getStartSpan() == grSourceHead.getStartSpan() && prSourceHead.getEndSpan() == grSourceHead.getEndSpan() && prTargetHead.getEndSpan() == grTargetHead.getEndSpan() && prTargetHead.getStartSpan() == grTargetHead.getStartSpan()) {
if (pr.getAttribute("RelationType").equals(gr.getAttribute("RelationType"))) {
total_coarse_correct++;
}
if (pr.getAttribute("RelationSubtype").equals(gr.getAttribute("RelationSubtype"))) {
total_correct++;
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Total labeled: " + total_labeled);
System.out.println("Total predicted: " + total_predicted);
System.out.println("Total correct: " + total_correct);
System.out.println("Total coarse correct: " + total_coarse_correct);
double p = (double) total_correct * 100.0 / (double) total_predicted;
double r = (double) total_correct * 100.0 / (double) total_labeled;
double f = 2 * p * r / (p + r);
System.out.println("Precision: " + p);
System.out.println("Recall: " + r);
System.out.println("Fine Type F1: " + f);
System.out.println("Coarse Type F1: " + f * (double) total_coarse_correct / (double) total_correct);
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.
the class PipelineFactory method buildAnnotators.
/**
* instantiate a set of annotators for use in an AnnotatorService object by default, will use
* lazy initialization where possible -- change this behavior with the
* {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
*
* @param nonDefaultRm ResourceManager with all non-default values for Annotators
* @return a Map from annotator view name to annotator
*/
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
ResourceManager rm = new PipelineConfigurator().getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key, PipelineConfigurator.TRUE);
Map<String, Annotator> viewGenerators = new HashMap<>();
if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
POSAnnotator pos = new POSAnnotator();
viewGenerators.put(pos.getViewName(), pos);
}
if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
viewGenerators.put(lem.getViewName(), lem);
}
if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
}
if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
viewGenerators.put(nerConll.getViewName(), nerConll);
}
if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
}
if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
DepAnnotator dep = new DepAnnotator();
viewGenerators.put(dep.getViewName(), dep);
}
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP) || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", maxParseSentenceLength);
// per sentence? could be per
stanfordProps.put("parse.maxtime", timePerSentence);
// document but no idea from
// stanford javadoc
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
int maxLength = Integer.parseInt(maxParseSentenceLength);
boolean throwExceptionOnSentenceLengthCheck = rm.getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
viewGenerators.put(depParser.getViewName(), depParser);
}
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
viewGenerators.put(parser.getViewName(), parser);
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
Properties verbProps = new Properties();
String verbType = SRLType.Verb.name();
verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
ResourceManager verbRm = new ResourceManager(verbProps);
rm = Configurator.mergeProperties(rm, verbRm);
try {
SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
} catch (Exception e) {
throw new IOException("SRL verb cannot init: " + e.getMessage());
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
Properties nomProps = new Properties();
String nomType = SRLType.Nom.name();
nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
ResourceManager nomRm = new ResourceManager(nomProps);
rm = Configurator.mergeProperties(rm, nomRm);
try {
SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
// note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
// initialized yet
viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
// viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
// ViewNames.SRL_NOM,
// useLazyInitialization, rm));
} catch (Exception e) {
throw new IOException("SRL nom cannot init .." + e.getMessage());
}
}
if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
Quantifier quantifierAnnotator = new Quantifier();
viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) {
for (Language lang : TransliterationAnnotator.supportedLanguages) {
TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang);
viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator);
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
CommaLabeler commaLabeler = new CommaLabeler();
viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
}
if (rm.getBoolean(PipelineConfigurator.USE_VERB_SENSE)) {
VerbSenseAnnotator verbSense = new VerbSenseAnnotator();
viewGenerators.put(ViewNames.VERB_SENSE, verbSense);
}
if (rm.getBoolean(PipelineConfigurator.USE_MENTION)) {
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
viewGenerators.put(ViewNames.MENTION, mentionAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_RELATION)) {
viewGenerators.put(ViewNames.RELATION, new RelationAnnotator(true));
}
if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)) {
Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps));
viewGenerators.put(ViewNames.TIMEX3, tca);
}
if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_ESA)) {
rm = new ESADatalessConfigurator().getConfig(nonDefaultRm);
ESADatalessAnnotator esaDataless = new ESADatalessAnnotator(rm);
viewGenerators.put(ViewNames.DATALESS_ESA, esaDataless);
}
if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_W2V)) {
rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm);
viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless);
}
if (rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) {
QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator();
viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper);
}
return viewGenerators;
}
Aggregations