use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method setUp.
@Before
public void setUp() throws Exception {
synchronized (TokenSequenceMatcherITest.class) {
if (pipeline == null) {
pipeline = new AnnotationPipeline();
pipeline.addAnnotator(new TokenizerAnnotator(false, "en", "invertible,splitHyphenated=false"));
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(false));
pipeline.addAnnotator(new NumberAnnotator(false, false));
// pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false));
}
}
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project CoreNLP by stanfordnlp.
the class PatternsSimpleThreadedITest method setUp.
@BeforeClass
public static void setUp() {
nlpPipeline = new AnnotationPipeline();
// We assume the input is already tokenized, so we use a cheap whitespace tokenizer.
// The original code uses this property for the tokenizer:
// props.setProperty("tokenize.options", "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");
nlpPipeline.addAnnotator(new TokenizerAnnotator(false, TokenizerType.Whitespace));
nlpPipeline.addAnnotator(new WordsToSentencesAnnotator(false));
nlpPipeline.addAnnotator(new POSTaggerAnnotator());
nlpPipeline.addAnnotator(new MorphaAnnotator(false));
Properties nerAnnotatorProperties = new Properties();
nerAnnotatorProperties.setProperty("ner.useSUTime", Boolean.toString(false));
nerAnnotatorProperties.setProperty("ner.applyFineGrained", Boolean.toString(false));
// nerAnnotatorProperties.setProperty("ner.fine.regexner.mapping", spiedProperties.getProperty("fineGrainedRegexnerMapping"));
try {
nlpPipeline.addAnnotator(new NERCombinerAnnotator(nerAnnotatorProperties));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.
the class SemEvalMentionReader method initExternalTools.
public void initExternalTools() {
try {
_posAnnotator = new POSAnnotator();
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
_gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
WordNetManager.loadConfigAsClasspathResource(true);
_wordnet = WordNetManager.getInstance();
__chunker = new ChunkerAnnotator(true);
__chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
__stanfordDep = new StanfordDepHandler(posAnnotator, parseAnnotator);
__mentionAnnotator = new MentionAnnotator("ACE_TYPE");
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.
the class RelationExtractionTest method testAnnotator.
@Test
public void testAnnotator() {
File modelDir = null;
try {
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
modelDir = ds.getDirectory("org.cogcomp.re", "ACE_TEST_DOCS", 1.1, false);
} catch (Exception e) {
e.printStackTrace();
}
try {
ACEReaderWithTrueCaseFixer aceReader = new ACEReaderWithTrueCaseFixer(modelDir.getAbsolutePath() + File.separator + "ACE_TEST_DOCS", false);
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
RelationAnnotator relationAnnotator = new RelationAnnotator();
for (TextAnnotation ta : aceReader) {
ta.addView(pos_annotator);
chunker.addView(ta);
stanfordDepHandler.addView(ta);
mentionAnnotator.addView(ta);
relationAnnotator.addView(ta);
View mentionView = ta.getView(ViewNames.MENTION);
assertTrue(mentionView.getConstituents().size() > 0);
View relationView = ta.getView(ViewNames.RELATION);
assertTrue(relationView.getRelations().size() > 0);
}
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.
the class ExampleUsage method SemEvalAnnotate.
public static void SemEvalAnnotate() {
String text = "People have been moving back into downtown.";
String corpus = "semeval";
String textId = "001";
// Create a TextAnnotation From Text
TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
String modelPath = "";
FlatGazetteers gazetteers = null;
try {
ta.addView(pos_annotator);
chunker.addView(ta);
stanfordDepHandler.addView(ta);
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
File model = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false);
modelPath = model.getPath();
File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
WordNetManager.loadConfigAsClasspathResource(true);
WordNetManager wordnet = WordNetManager.getInstance();
View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
Constituent c = co.cloneForNewView("RE_ANNOTATED");
for (String s : co.getAttributeKeys()) {
c.addAttribute(s, co.getAttribute(s));
}
c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, c));
c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, c));
annotatedTokenView.addConstituent(c);
}
ta.addView("RE_ANNOTATED", annotatedTokenView);
} catch (Exception e) {
e.printStackTrace();
}
Constituent source = new Constituent("first", "Mention", ta, 0, 1);
Constituent target = new Constituent("second", "Mention", ta, 6, 7);
source.addAttribute("GAZ", gazetteers.annotatePhrase(source));
target.addAttribute("GAZ", gazetteers.annotatePhrase(target));
Relation relation = new Relation("TEST", source, target, 1.0f);
String prefix = modelPath + File.separator + "SEMEVAL" + File.separator + "SEMEVAL";
semeval_relation_classifier classifier = new semeval_relation_classifier(prefix + ".lc", prefix + ".lex");
String tag = classifier.discreteValue(relation);
System.out.println(tag);
}
Aggregations