Search in sources :

Example 1 with POSTaggerAnnotator

use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method setUp.

@Before
public void setUp() throws Exception {
    synchronized (TokenSequenceMatcherITest.class) {
        if (pipeline == null) {
            pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false, "en", "invertible,splitHyphenated=false"));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
            pipeline.addAnnotator(new POSTaggerAnnotator(false));
            pipeline.addAnnotator(new NumberAnnotator(false, false));
        // pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false));
        }
    }
}
Also used : POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) AnnotationPipeline(edu.stanford.nlp.pipeline.AnnotationPipeline) WordsToSentencesAnnotator(edu.stanford.nlp.pipeline.WordsToSentencesAnnotator) NumberAnnotator(edu.stanford.nlp.pipeline.NumberAnnotator) TokenizerAnnotator(edu.stanford.nlp.pipeline.TokenizerAnnotator) Before(org.junit.Before)

Example 2 with POSTaggerAnnotator

use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project CoreNLP by stanfordnlp.

the class PatternsSimpleThreadedITest method setUp.

@BeforeClass
public static void setUp() {
    nlpPipeline = new AnnotationPipeline();
    // We assume the input is already tokenized, so we use a cheap whitespace tokenizer.
    // The original code uses this property for the tokenizer:
    // props.setProperty("tokenize.options", "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");
    nlpPipeline.addAnnotator(new TokenizerAnnotator(false, TokenizerType.Whitespace));
    nlpPipeline.addAnnotator(new WordsToSentencesAnnotator(false));
    nlpPipeline.addAnnotator(new POSTaggerAnnotator());
    nlpPipeline.addAnnotator(new MorphaAnnotator(false));
    Properties nerAnnotatorProperties = new Properties();
    nerAnnotatorProperties.setProperty("ner.useSUTime", Boolean.toString(false));
    nerAnnotatorProperties.setProperty("ner.applyFineGrained", Boolean.toString(false));
    // nerAnnotatorProperties.setProperty("ner.fine.regexner.mapping", spiedProperties.getProperty("fineGrainedRegexnerMapping"));
    try {
        nlpPipeline.addAnnotator(new NERCombinerAnnotator(nerAnnotatorProperties));
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) MorphaAnnotator(edu.stanford.nlp.pipeline.MorphaAnnotator) NERCombinerAnnotator(edu.stanford.nlp.pipeline.NERCombinerAnnotator) AnnotationPipeline(edu.stanford.nlp.pipeline.AnnotationPipeline) WordsToSentencesAnnotator(edu.stanford.nlp.pipeline.WordsToSentencesAnnotator) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException) TokenizerAnnotator(edu.stanford.nlp.pipeline.TokenizerAnnotator) Properties(java.util.Properties) BeforeClass(org.junit.BeforeClass)

Example 3 with POSTaggerAnnotator

use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.

the class SemEvalMentionReader method initExternalTools.

public void initExternalTools() {
    try {
        _posAnnotator = new POSAnnotator();
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        _gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        _wordnet = WordNetManager.getInstance();
        __chunker = new ChunkerAnnotator(true);
        __chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        __stanfordDep = new StanfordDepHandler(posAnnotator, parseAnnotator);
        __mentionAnnotator = new MentionAnnotator("ACE_TYPE");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) ChunkerConfigurator(edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator) Datastore(org.cogcomp.Datastore) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) File(java.io.File) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)

Example 4 with POSTaggerAnnotator

use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.

the class RelationExtractionTest method testAnnotator.

@Test
public void testAnnotator() {
    File modelDir = null;
    try {
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        modelDir = ds.getDirectory("org.cogcomp.re", "ACE_TEST_DOCS", 1.1, false);
    } catch (Exception e) {
        e.printStackTrace();
    }
    try {
        ACEReaderWithTrueCaseFixer aceReader = new ACEReaderWithTrueCaseFixer(modelDir.getAbsolutePath() + File.separator + "ACE_TEST_DOCS", false);
        POSAnnotator pos_annotator = new POSAnnotator();
        ChunkerAnnotator chunker = new ChunkerAnnotator(true);
        chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
        RelationAnnotator relationAnnotator = new RelationAnnotator();
        for (TextAnnotation ta : aceReader) {
            ta.addView(pos_annotator);
            chunker.addView(ta);
            stanfordDepHandler.addView(ta);
            mentionAnnotator.addView(ta);
            relationAnnotator.addView(ta);
            View mentionView = ta.getView(ViewNames.MENTION);
            assertTrue(mentionView.getConstituents().size() > 0);
            View relationView = ta.getView(ViewNames.RELATION);
            assertTrue(relationView.getRelations().size() > 0);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : ACEReaderWithTrueCaseFixer(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReaderWithTrueCaseFixer) ChunkerConfigurator(edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) Properties(java.util.Properties) RelationAnnotator(org.cogcomp.re.RelationAnnotator) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) Datastore(org.cogcomp.Datastore) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) File(java.io.File) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler) Test(org.junit.Test)

Example 5 with POSTaggerAnnotator

use of edu.stanford.nlp.pipeline.POSTaggerAnnotator in project cogcomp-nlp by CogComp.

the class ExampleUsage method SemEvalAnnotate.

public static void SemEvalAnnotate() {
    String text = "People have been moving back into downtown.";
    String corpus = "semeval";
    String textId = "001";
    // Create a TextAnnotation From Text
    TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
    POSAnnotator pos_annotator = new POSAnnotator();
    ChunkerAnnotator chunker = new ChunkerAnnotator(true);
    chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
    Properties stanfordProps = new Properties();
    stanfordProps.put("annotators", "pos, parse");
    stanfordProps.put("parse.originalDependencies", true);
    stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
    ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
    StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
    String modelPath = "";
    FlatGazetteers gazetteers = null;
    try {
        ta.addView(pos_annotator);
        chunker.addView(ta);
        stanfordDepHandler.addView(ta);
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File model = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false);
        modelPath = model.getPath();
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordnet = WordNetManager.getInstance();
        View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
        for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
            Constituent c = co.cloneForNewView("RE_ANNOTATED");
            for (String s : co.getAttributeKeys()) {
                c.addAttribute(s, co.getAttribute(s));
            }
            c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, c));
            c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, c));
            annotatedTokenView.addConstituent(c);
        }
        ta.addView("RE_ANNOTATED", annotatedTokenView);
    } catch (Exception e) {
        e.printStackTrace();
    }
    Constituent source = new Constituent("first", "Mention", ta, 0, 1);
    Constituent target = new Constituent("second", "Mention", ta, 6, 7);
    source.addAttribute("GAZ", gazetteers.annotatePhrase(source));
    target.addAttribute("GAZ", gazetteers.annotatePhrase(target));
    Relation relation = new Relation("TEST", source, target, 1.0f);
    String prefix = modelPath + File.separator + "SEMEVAL" + File.separator + "SEMEVAL";
    semeval_relation_classifier classifier = new semeval_relation_classifier(prefix + ".lc", prefix + ".lex");
    String tag = classifier.discreteValue(relation);
    System.out.println(tag);
}
Also used : ChunkerConfigurator(edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator) TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) Properties(java.util.Properties) LbjGen.semeval_relation_classifier(org.cogcomp.re.LbjGen.semeval_relation_classifier) FlatGazetteers(edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.FlatGazetteers) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) WordNetManager(edu.illinois.cs.cogcomp.edison.utilities.WordNetManager) Datastore(org.cogcomp.Datastore) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) File(java.io.File) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)

Aggregations

POSTaggerAnnotator (edu.stanford.nlp.pipeline.POSTaggerAnnotator)11 ChunkerAnnotator (edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator)7 StanfordDepHandler (edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)7 POSAnnotator (edu.illinois.cs.cogcomp.pos.POSAnnotator)7 ParserAnnotator (edu.stanford.nlp.pipeline.ParserAnnotator)7 Properties (java.util.Properties)7 ChunkerConfigurator (edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator)6 MentionAnnotator (org.cogcomp.md.MentionAnnotator)5 AnnotationPipeline (edu.stanford.nlp.pipeline.AnnotationPipeline)4 TokenizerAnnotator (edu.stanford.nlp.pipeline.TokenizerAnnotator)4 WordsToSentencesAnnotator (edu.stanford.nlp.pipeline.WordsToSentencesAnnotator)4 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)3 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)3 ResourceConfigurator (edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator)3 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)3 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)3 File (java.io.File)3 Datastore (org.cogcomp.Datastore)3 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)2 IOException (java.io.IOException)2