Search in sources :

Example 1 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class ExtentReader method getTextAnnotations.

public List<TextAnnotation> getTextAnnotations() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
    List<TextAnnotation> ret = new ArrayList<>();
    if (_corpus.equals("ACE")) {
        ACEReaderWithTrueCaseFixer aceReader = null;
        POSAnnotator posAnnotator = new POSAnnotator();
        try {
            aceReader = new ACEReaderWithTrueCaseFixer(_path, false);
            for (TextAnnotation ta : aceReader) {
                ta.addView(posAnnotator);
                ret.add(ta);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    if (_corpus.equals("ERE")) {
        EREMentionRelationReader ereMentionRelationReader = null;
        POSAnnotator posAnnotator = new POSAnnotator();
        try {
            ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, _path, false);
            for (XmlTextAnnotation xta : ereMentionRelationReader) {
                TextAnnotation ta = xta.getTextAnnotation();
                ta.addView(posAnnotator);
                ret.add(ta);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    if (_corpus.startsWith("COMBINED")) {
        String realCorpus = _corpus.split("-")[1];
        String mode = _corpus.split("-")[2];
        int fold = Integer.parseInt(_corpus.split("-")[3]);
        BIOCombinedReader bioCombinedReader = new BIOCombinedReader(fold, realCorpus + "-" + mode, "ALL", true);
        for (Object ta = bioCombinedReader.next(); ta != null; ta = bioCombinedReader.next()) {
            ret.add((TextAnnotation) ta);
        }
    }
    return ret;
}
Also used : EREMentionRelationReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader) ACEReaderWithTrueCaseFixer(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReaderWithTrueCaseFixer) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ArrayList(java.util.ArrayList) DatastoreException(org.cogcomp.DatastoreException) JWNLException(net.didion.jwnl.JWNLException) IOException(java.io.IOException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) InvalidPortException(io.minio.errors.InvalidPortException)

Example 2 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class AnnotatorTester method test_custom_annotator.

public static void test_custom_annotator() {
    ACEReader aceReader = null;
    POSAnnotator posAnnotator = new POSAnnotator();
    int total_labeled = 0;
    int total_predicted = 0;
    int total_correct = 0;
    int total_type_correct = 0;
    int total_extent_correct = 0;
    try {
        aceReader = new ACEReader("data/partition_with_dev/dev", false);
        MentionAnnotator mentionAnnotator = new MentionAnnotator("", "models/TAC_NOM", "", "", "");
        for (TextAnnotation ta : aceReader) {
            ta.addView(posAnnotator);
            mentionAnnotator.addView(ta);
            total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
            total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
            for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
                for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
                    gc.addAttribute("EntityType", gc.getLabel());
                    Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
                    if (gch == null) {
                        continue;
                    }
                    if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
                        total_correct++;
                        if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
                            total_type_correct++;
                        }
                        if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
                            total_extent_correct++;
                        }
                        break;
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("Labeled: " + total_labeled);
    System.out.println("Predicted: " + total_predicted);
    System.out.println("Correct: " + total_correct);
    System.out.println("Type Correct: " + total_type_correct);
    System.out.println("Extent Correct: " + total_extent_correct);
}
Also used : ACEReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 3 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class AnnotatorTester method test_basic_annotator.

/**
 * By default, this function uses the ACE model trained with Type on ACE corpus, should have a fairly high performance.
 */
public static void test_basic_annotator() {
    ACEReader aceReader = null;
    POSAnnotator posAnnotator = new POSAnnotator();
    int total_labeled = 0;
    int total_predicted = 0;
    int total_correct = 0;
    int total_type_correct = 0;
    int total_extent_correct = 0;
    try {
        aceReader = new ACEReader("data/partition_with_dev/dev", false);
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_NONTYPE");
        for (TextAnnotation ta : aceReader) {
            ta.addView(posAnnotator);
            mentionAnnotator.addView(ta);
            total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
            total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
            for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()) {
                for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()) {
                    gc.addAttribute("EntityType", gc.getLabel());
                    Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
                    if (gch == null) {
                        continue;
                    }
                    if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() && Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()) {
                        total_correct++;
                        if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))) {
                            total_type_correct++;
                        }
                        if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()) {
                            total_extent_correct++;
                        }
                        break;
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("Labeled: " + total_labeled);
    System.out.println("Predicted: " + total_predicted);
    System.out.println("Correct: " + total_correct);
    System.out.println("Type Correct: " + total_type_correct);
    System.out.println("Extent Correct: " + total_extent_correct);
}
Also used : ACEReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 4 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class ExampleUsage method SemEvalAnnotate.

public static void SemEvalAnnotate() {
    String text = "People have been moving back into downtown.";
    String corpus = "semeval";
    String textId = "001";
    // Create a TextAnnotation From Text
    TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
    POSAnnotator pos_annotator = new POSAnnotator();
    ChunkerAnnotator chunker = new ChunkerAnnotator(true);
    chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
    Properties stanfordProps = new Properties();
    stanfordProps.put("annotators", "pos, parse");
    stanfordProps.put("parse.originalDependencies", true);
    stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
    ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
    StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
    String modelPath = "";
    FlatGazetteers gazetteers = null;
    try {
        ta.addView(pos_annotator);
        chunker.addView(ta);
        stanfordDepHandler.addView(ta);
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File model = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false);
        modelPath = model.getPath();
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordnet = WordNetManager.getInstance();
        View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
        for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
            Constituent c = co.cloneForNewView("RE_ANNOTATED");
            for (String s : co.getAttributeKeys()) {
                c.addAttribute(s, co.getAttribute(s));
            }
            c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, c));
            c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, c));
            annotatedTokenView.addConstituent(c);
        }
        ta.addView("RE_ANNOTATED", annotatedTokenView);
    } catch (Exception e) {
        e.printStackTrace();
    }
    Constituent source = new Constituent("first", "Mention", ta, 0, 1);
    Constituent target = new Constituent("second", "Mention", ta, 6, 7);
    source.addAttribute("GAZ", gazetteers.annotatePhrase(source));
    target.addAttribute("GAZ", gazetteers.annotatePhrase(target));
    Relation relation = new Relation("TEST", source, target, 1.0f);
    String prefix = modelPath + File.separator + "SEMEVAL" + File.separator + "SEMEVAL";
    semeval_relation_classifier classifier = new semeval_relation_classifier(prefix + ".lc", prefix + ".lex");
    String tag = classifier.discreteValue(relation);
    System.out.println(tag);
}
Also used : ChunkerConfigurator(edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator) TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) Properties(java.util.Properties) LbjGen.semeval_relation_classifier(org.cogcomp.re.LbjGen.semeval_relation_classifier) FlatGazetteers(edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.FlatGazetteers) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) WordNetManager(edu.illinois.cs.cogcomp.edison.utilities.WordNetManager) Datastore(org.cogcomp.Datastore) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) File(java.io.File) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)

Example 5 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class ExampleUsage method AnnotatorExample.

public static void AnnotatorExample() {
    String text = "He went to Chicago after his Father moved there.";
    String corpus = "story";
    String textId = "001";
    // Create a TextAnnotation From Text
    TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
    POSAnnotator pos_annotator = new POSAnnotator();
    ChunkerAnnotator chunker = new ChunkerAnnotator(true);
    chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
    Properties stanfordProps = new Properties();
    stanfordProps.put("annotators", "pos, parse");
    stanfordProps.put("parse.originalDependencies", true);
    stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
    ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
    StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
    RelationAnnotator relationAnnotator = new RelationAnnotator();
    try {
        ta.addView(pos_annotator);
        chunker.addView(ta);
        stanfordDepHandler.addView(ta);
        relationAnnotator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }
    View mentionView = ta.getView(ViewNames.MENTION);
    List<Constituent> predictedMentions = mentionView.getConstituents();
    List<Relation> predictedRelations = mentionView.getRelations();
    for (Relation r : predictedRelations) {
        IOHelper.printRelation(r);
    }
}
Also used : ChunkerConfigurator(edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator) TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) Properties(java.util.Properties) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)

Aggregations

POSAnnotator (edu.illinois.cs.cogcomp.pos.POSAnnotator)20 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)12 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)7 ChunkerAnnotator (edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator)7 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)7 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)7 StanfordDepHandler (edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)7 POSTaggerAnnotator (edu.stanford.nlp.pipeline.POSTaggerAnnotator)7 ParserAnnotator (edu.stanford.nlp.pipeline.ParserAnnotator)7 Properties (java.util.Properties)7 ChunkerConfigurator (edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator)6 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)6 MentionAnnotator (org.cogcomp.md.MentionAnnotator)6 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)5 Test (org.junit.Test)5 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)4 ResourceConfigurator (edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator)4 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)4 ACEReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader)3 InvalidEndpointException (io.minio.errors.InvalidEndpointException)3