Search in sources :

Example 16 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class ACERelationTester method testAnnotator.

public static void testAnnotator() {
    int total_correct = 0;
    int total_labeled = 0;
    int total_predicted = 0;
    int total_coarse_correct = 0;
    try {
        POSAnnotator pos_annotator = new POSAnnotator();
        ChunkerAnnotator chunker = new ChunkerAnnotator(true);
        chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
        ACEReader aceReader = new ACEReader("data/partition_with_dev/dev", false);
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
        RelationAnnotator relationAnnotator = new RelationAnnotator();
        for (TextAnnotation ta : aceReader) {
            ta.addView(pos_annotator);
            stanfordDepHandler.addView(ta);
            chunker.addView(ta);
            mentionAnnotator.addView(ta);
            relationAnnotator.addView(ta);
            total_labeled += ta.getView(ViewNames.MENTION_ACE).getRelations().size();
            total_predicted += ta.getView(ViewNames.RELATION).getRelations().size();
            for (Relation pr : ta.getView(ViewNames.RELATION).getRelations()) {
                for (Relation gr : ta.getView(ViewNames.MENTION_ACE).getRelations()) {
                    Constituent prSourceHead = RelationFeatureExtractor.getEntityHeadForConstituent(pr.getSource(), ta, "");
                    Constituent grSourceHead = RelationFeatureExtractor.getEntityHeadForConstituent(gr.getSource(), ta, "");
                    Constituent prTargetHead = RelationFeatureExtractor.getEntityHeadForConstituent(pr.getTarget(), ta, "");
                    Constituent grTargetHead = RelationFeatureExtractor.getEntityHeadForConstituent(gr.getTarget(), ta, "");
                    if (prSourceHead.getStartSpan() == grSourceHead.getStartSpan() && prSourceHead.getEndSpan() == grSourceHead.getEndSpan() && prTargetHead.getEndSpan() == grTargetHead.getEndSpan() && prTargetHead.getStartSpan() == grTargetHead.getStartSpan()) {
                        if (pr.getAttribute("RelationType").equals(gr.getAttribute("RelationType"))) {
                            total_coarse_correct++;
                        }
                        if (pr.getAttribute("RelationSubtype").equals(gr.getAttribute("RelationSubtype"))) {
                            total_correct++;
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("Total labeled: " + total_labeled);
    System.out.println("Total predicted: " + total_predicted);
    System.out.println("Total correct: " + total_correct);
    System.out.println("Total coarse correct: " + total_coarse_correct);
    double p = (double) total_correct * 100.0 / (double) total_predicted;
    double r = (double) total_correct * 100.0 / (double) total_labeled;
    double f = 2 * p * r / (p + r);
    System.out.println("Precision: " + p);
    System.out.println("Recall: " + r);
    System.out.println("Fine Type F1: " + f);
    System.out.println("Coarse Type F1: " + f * (double) total_coarse_correct / (double) total_correct);
}
Also used : ACEReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader) ChunkerConfigurator(edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) Properties(java.util.Properties) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 17 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class PipelineFactory method buildAnnotators.

/**
 * instantiate a set of annotators for use in an AnnotatorService object by default, will use
 * lazy initialization where possible -- change this behavior with the
 * {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
 *
 * @param nonDefaultRm ResourceManager with all non-default values for Annotators
 * @return a Map from annotator view name to annotator
 */
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
    ResourceManager rm = new PipelineConfigurator().getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
    String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key, PipelineConfigurator.TRUE);
    Map<String, Annotator> viewGenerators = new HashMap<>();
    if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
        POSAnnotator pos = new POSAnnotator();
        viewGenerators.put(pos.getViewName(), pos);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
        IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
        viewGenerators.put(lem.getViewName(), lem);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
        viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
        NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
        viewGenerators.put(nerConll.getViewName(), nerConll);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
        NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
        viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
        DepAnnotator dep = new DepAnnotator();
        viewGenerators.put(dep.getViewName(), dep);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP) || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", maxParseSentenceLength);
        // per sentence? could be per
        stanfordProps.put("parse.maxtime", timePerSentence);
        // document but no idea from
        // stanford javadoc
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        int maxLength = Integer.parseInt(maxParseSentenceLength);
        boolean throwExceptionOnSentenceLengthCheck = rm.getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
            StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(depParser.getViewName(), depParser);
        }
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
            StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(parser.getViewName(), parser);
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
        Properties verbProps = new Properties();
        String verbType = SRLType.Verb.name();
        verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
        ResourceManager verbRm = new ResourceManager(verbProps);
        rm = Configurator.mergeProperties(rm, verbRm);
        try {
            SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
        } catch (Exception e) {
            throw new IOException("SRL verb cannot init: " + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
        Properties nomProps = new Properties();
        String nomType = SRLType.Nom.name();
        nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
        ResourceManager nomRm = new ResourceManager(nomProps);
        rm = Configurator.mergeProperties(rm, nomRm);
        try {
            SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            // note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
            // initialized yet
            viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
        // viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
        // ViewNames.SRL_NOM,
        // useLazyInitialization, rm));
        } catch (Exception e) {
            throw new IOException("SRL nom cannot init .." + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
        Quantifier quantifierAnnotator = new Quantifier();
        viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) {
        for (Language lang : TransliterationAnnotator.supportedLanguages) {
            TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang);
            viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator);
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
        PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
        viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
        CommaLabeler commaLabeler = new CommaLabeler();
        viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_VERB_SENSE)) {
        VerbSenseAnnotator verbSense = new VerbSenseAnnotator();
        viewGenerators.put(ViewNames.VERB_SENSE, verbSense);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_MENTION)) {
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
        viewGenerators.put(ViewNames.MENTION, mentionAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_RELATION)) {
        viewGenerators.put(ViewNames.RELATION, new RelationAnnotator(true));
    }
    if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)) {
        Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
        TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps));
        viewGenerators.put(ViewNames.TIMEX3, tca);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_ESA)) {
        rm = new ESADatalessConfigurator().getConfig(nonDefaultRm);
        ESADatalessAnnotator esaDataless = new ESADatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_ESA, esaDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_W2V)) {
        rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
        W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) {
        QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator();
        viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper);
    }
    return viewGenerators;
}
Also used : HashMap(java.util.HashMap) VerbSenseAnnotator(edu.illinois.cs.cogcomp.verbsense.VerbSenseAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) Properties(java.util.Properties) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) PrepSRLAnnotator(edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator) Language(edu.illinois.cs.cogcomp.core.constants.Language) SemanticRoleLabeler(edu.illinois.cs.cogcomp.srl.SemanticRoleLabeler) TemporalChunkerAnnotator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerAnnotator) W2VDatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator) TemporalChunkerConfigurator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerConfigurator) Stanford331Configurator(edu.illinois.cs.cogcomp.pipeline.common.Stanford331Configurator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) ESADatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.ESADatalessAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) IOException(java.io.IOException) RelationAnnotator(org.cogcomp.re.RelationAnnotator) IOException(java.io.IOException) TemporalChunkerAnnotator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerAnnotator) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) PipelineConfigurator(edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator) TemporalChunkerAnnotator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerAnnotator) W2VDatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator) RelationAnnotator(org.cogcomp.re.RelationAnnotator) ESADatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.ESADatalessAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) PrepSRLAnnotator(edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) QuestionTypeAnnotator(edu.illinois.cs.cogcomp.question_typer.QuestionTypeAnnotator) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) DepAnnotator(edu.illinois.cs.cogcomp.depparse.DepAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) VerbSenseAnnotator(edu.illinois.cs.cogcomp.verbsense.VerbSenseAnnotator) NERAnnotator(edu.illinois.cs.cogcomp.ner.NERAnnotator) NERAnnotator(edu.illinois.cs.cogcomp.ner.NERAnnotator) ESADatalessConfigurator(edu.illinois.cs.cogcomp.datalessclassification.config.ESADatalessConfigurator) DepAnnotator(edu.illinois.cs.cogcomp.depparse.DepAnnotator) W2VDatalessConfigurator(edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator) IllinoisLemmatizer(edu.illinois.cs.cogcomp.nlp.lemmatizer.IllinoisLemmatizer) Quantifier(edu.illinois.cs.cogcomp.quant.driver.Quantifier) CommaLabeler(edu.illinois.cs.cogcomp.comma.CommaLabeler) StanfordParseHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordParseHandler) QuestionTypeAnnotator(edu.illinois.cs.cogcomp.question_typer.QuestionTypeAnnotator) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)

Example 18 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class MentionDetectionTest method testHeadInference.

@Test
public void testHeadInference() {
    EREMentionRelationReader ereMentionRelationReader = null;
    try {
        String path = "src/test/resources/ERE";
        ereMentionRelationReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, path, false);
        POSAnnotator posAnnotator = new POSAnnotator();
        MentionAnnotator mentionAnnotator = new MentionAnnotator();
        for (XmlTextAnnotation xta : ereMentionRelationReader) {
            TextAnnotation ta = xta.getTextAnnotation();
            ta.addView(posAnnotator);
            mentionAnnotator.addView(ta);
            if (ta.getView("MENTION").getNumberOfConstituents() < 60) {
                fail("Mention Head predicted performance dropped");
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : EREMentionRelationReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader) XmlTextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) XmlTextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation) Test(org.junit.Test)

Example 19 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class ExtentTester method testExtentOnGoldHead.

public static void testExtentOnGoldHead() throws InvalidPortException, InvalidEndpointException, IOException, JWNLException, DatastoreException {
    int labeled = 0;
    int correct = 0;
    POSAnnotator posAnnotator = null;
    WordNetManager wordNet = null;
    Gazetteers gazetteers = null;
    BrownClusters brownClusters = null;
    try {
        WordNetManager.loadConfigAsClasspathResource(true);
        wordNet = WordNetManager.getInstance();
        posAnnotator = new POSAnnotator();
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        gazetteers = GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        Vector<String> bcs = new Vector<>();
        bcs.add("brown-clusters" + File.separator + "brown-english-wikitext.case-intact.txt-c1000-freq10-v3.txt");
        bcs.add("brown-clusters" + File.separator + "brownBllipClusters");
        bcs.add("brown-clusters" + File.separator + "brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt");
        Vector<Integer> bcst = new Vector<>();
        bcst.add(5);
        bcst.add(5);
        bcst.add(5);
        Vector<Boolean> bcsl = new Vector<>();
        bcsl.add(false);
        bcsl.add(false);
        bcsl.add(false);
        brownClusters = BrownClusters.get(bcs, bcst, bcsl);
    } catch (Exception e) {
        e.printStackTrace();
    }
    for (int i = 0; i < 1; i++) {
        ExtentReader train_parser = new ExtentReader("data/partition_with_dev/train/" + i, "COMBINED-ALL-TRAIN-" + i);
        extent_classifier classifier = train_extent_classifier(train_parser);
        BIOCombinedReader bioCombinedReader = null;
        try {
            bioCombinedReader = new BIOCombinedReader(i, "ALL-EVAL", "ALL", true);
        } catch (Exception e) {
            e.printStackTrace();
        }
        for (Object ota = bioCombinedReader.next(); ota != null; ota = bioCombinedReader.next()) {
            TextAnnotation ta = (TextAnnotation) ota;
            try {
                ta.addView(posAnnotator);
            } catch (Exception e) {
                e.printStackTrace();
            }
            String mentionViewName = ViewNames.MENTION_ERE;
            if (ta.getId().startsWith("bn") || ta.getId().startsWith("nw")) {
                mentionViewName = ViewNames.MENTION_ACE;
            }
            View mentionView = ta.getView(mentionViewName);
            for (Constituent mention : mentionView.getConstituents()) {
                Constituent head = ACEReader.getEntityHeadForConstituent(mention, ta, "HEADS");
                if (head == null) {
                    continue;
                }
                labeled++;
                Constituent predictedFullMention = getFullMention(classifier, head, gazetteers, brownClusters, wordNet);
                if (predictedFullMention.getStartSpan() == mention.getStartSpan() && predictedFullMention.getEndSpan() == mention.getEndSpan()) {
                    correct++;
                } else {
                    System.out.println("Gold: " + mention.toString());
                    System.out.println("Predicted: " + predictedFullMention.toString());
                }
            }
        }
    }
    System.out.println("Labeled: " + labeled);
    System.out.println("Correct: " + correct);
    System.out.println("Correctness: " + (double) correct * 100.0 / (double) labeled);
}
Also used : POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ResourceConfigurator(edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) DatastoreException(org.cogcomp.DatastoreException) JWNLException(net.didion.jwnl.JWNLException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) IOException(java.io.IOException) InvalidPortException(io.minio.errors.InvalidPortException) WordNetManager(edu.illinois.cs.cogcomp.edison.utilities.WordNetManager) Gazetteers(edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.Gazetteers) FlatGazetteers(edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.FlatGazetteers) Datastore(org.cogcomp.Datastore) BrownClusters(edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.BrownClusters) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) File(java.io.File) Vector(java.util.Vector) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 20 with POSAnnotator

use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.

the class BIOCombinedReader method readTasByFold.

public List<TextAnnotation> readTasByFold(int fold, String mode) {
    List<String> corpus = new ArrayList<>();
    if (mode.contains("ACE")) {
        corpus.add("ACE");
    }
    if (mode.contains("ERE")) {
        corpus.add("ERE");
    }
    if (mode.contains("ALL")) {
        corpus.add("ACE");
        corpus.add("ERE");
    }
    List<TextAnnotation> tas = getTAs(corpus);
    HashMap<String, TextAnnotation> taMap = new HashMap<>();
    for (TextAnnotation ta : tas) {
        taMap.put(ta.getId(), ta);
    }
    List<TextAnnotation> ret = new ArrayList<>();
    String file_name = "";
    if (mode.contains("ACE")) {
        if (mode.contains("TRAIN")) {
            file_name = "data/split/ace_train_fold_" + fold;
        } else if (mode.contains("EVAL")) {
            file_name = "data/split/ace_eval_fold_" + fold;
        } else {
            return ret;
        }
    } else if (mode.contains("ERE")) {
        if (mode.contains("TRAIN")) {
            file_name = "data/split/ere_train_fold_" + fold;
        } else if (mode.contains("EVAL")) {
            file_name = "data/split/ere_eval_fold_" + fold;
        } else {
            return ret;
        }
    } else if (mode.contains("ALL")) {
        if (mode.contains("TRAIN")) {
            file_name = "data/split/train_fold_" + fold;
        } else if (mode.contains("EVAL")) {
            file_name = "data/split/eval_fold_" + fold;
        } else {
            return ret;
        }
    }
    POSAnnotator posAnnotator = new POSAnnotator();
    try (BufferedReader br = new BufferedReader(new FileReader(file_name))) {
        String line;
        while ((line = br.readLine()) != null) {
            TextAnnotation ta = taMap.get(line);
            try {
                ta.addView(posAnnotator);
            } catch (Exception e) {
                e.printStackTrace();
            }
            ret.add(ta);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;
}
Also used : POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) DatastoreException(org.cogcomp.DatastoreException) JWNLException(net.didion.jwnl.JWNLException) InvalidEndpointException(io.minio.errors.InvalidEndpointException) InvalidPortException(io.minio.errors.InvalidPortException)

Aggregations

POSAnnotator (edu.illinois.cs.cogcomp.pos.POSAnnotator)20 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)12 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)7 ChunkerAnnotator (edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator)7 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)7 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)7 StanfordDepHandler (edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)7 POSTaggerAnnotator (edu.stanford.nlp.pipeline.POSTaggerAnnotator)7 ParserAnnotator (edu.stanford.nlp.pipeline.ParserAnnotator)7 Properties (java.util.Properties)7 ChunkerConfigurator (edu.illinois.cs.cogcomp.chunker.main.ChunkerConfigurator)6 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)6 MentionAnnotator (org.cogcomp.md.MentionAnnotator)6 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)5 Test (org.junit.Test)5 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)4 ResourceConfigurator (edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator)4 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)4 ACEReader (edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader)3 InvalidEndpointException (io.minio.errors.InvalidEndpointException)3