Search in sources :

Example 1 with W2VDatalessConfigurator

use of edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator in project cogcomp-nlp by CogComp.

the class PipelineFactory method buildAnnotators.

/**
 * instantiate a set of annotators for use in an AnnotatorService object by default, will use
 * lazy initialization where possible -- change this behavior with the
 * {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
 *
 * @param nonDefaultRm ResourceManager with all non-default values for Annotators
 * @return a Map from annotator view name to annotator
 */
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
    ResourceManager rm = new PipelineConfigurator().getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
    String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key, PipelineConfigurator.TRUE);
    Map<String, Annotator> viewGenerators = new HashMap<>();
    if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
        POSAnnotator pos = new POSAnnotator();
        viewGenerators.put(pos.getViewName(), pos);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
        IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
        viewGenerators.put(lem.getViewName(), lem);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
        viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
        NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
        viewGenerators.put(nerConll.getViewName(), nerConll);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
        NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
        viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
        DepAnnotator dep = new DepAnnotator();
        viewGenerators.put(dep.getViewName(), dep);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP) || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", maxParseSentenceLength);
        // per sentence? could be per
        stanfordProps.put("parse.maxtime", timePerSentence);
        // document but no idea from
        // stanford javadoc
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        int maxLength = Integer.parseInt(maxParseSentenceLength);
        boolean throwExceptionOnSentenceLengthCheck = rm.getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
            StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(depParser.getViewName(), depParser);
        }
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
            StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(parser.getViewName(), parser);
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
        Properties verbProps = new Properties();
        String verbType = SRLType.Verb.name();
        verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
        ResourceManager verbRm = new ResourceManager(verbProps);
        rm = Configurator.mergeProperties(rm, verbRm);
        try {
            SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
        } catch (Exception e) {
            throw new IOException("SRL verb cannot init: " + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
        Properties nomProps = new Properties();
        String nomType = SRLType.Nom.name();
        nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
        ResourceManager nomRm = new ResourceManager(nomProps);
        rm = Configurator.mergeProperties(rm, nomRm);
        try {
            SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            // note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
            // initialized yet
            viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
        // viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
        // ViewNames.SRL_NOM,
        // useLazyInitialization, rm));
        } catch (Exception e) {
            throw new IOException("SRL nom cannot init .." + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
        Quantifier quantifierAnnotator = new Quantifier();
        viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) {
        for (Language lang : TransliterationAnnotator.supportedLanguages) {
            TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang);
            viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator);
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
        PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
        viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
        CommaLabeler commaLabeler = new CommaLabeler();
        viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_VERB_SENSE)) {
        VerbSenseAnnotator verbSense = new VerbSenseAnnotator();
        viewGenerators.put(ViewNames.VERB_SENSE, verbSense);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_MENTION)) {
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
        viewGenerators.put(ViewNames.MENTION, mentionAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_RELATION)) {
        viewGenerators.put(ViewNames.RELATION, new RelationAnnotator(true));
    }
    if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)) {
        Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
        TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps));
        viewGenerators.put(ViewNames.TIMEX3, tca);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_ESA)) {
        rm = new ESADatalessConfigurator().getConfig(nonDefaultRm);
        ESADatalessAnnotator esaDataless = new ESADatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_ESA, esaDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_W2V)) {
        rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
        W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) {
        QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator();
        viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper);
    }
    return viewGenerators;
}
Also used : HashMap(java.util.HashMap) VerbSenseAnnotator(edu.illinois.cs.cogcomp.verbsense.VerbSenseAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) Properties(java.util.Properties) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) PrepSRLAnnotator(edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator) Language(edu.illinois.cs.cogcomp.core.constants.Language) SemanticRoleLabeler(edu.illinois.cs.cogcomp.srl.SemanticRoleLabeler) TemporalChunkerAnnotator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerAnnotator) W2VDatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator) TemporalChunkerConfigurator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerConfigurator) Stanford331Configurator(edu.illinois.cs.cogcomp.pipeline.common.Stanford331Configurator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) ESADatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.ESADatalessAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) IOException(java.io.IOException) RelationAnnotator(org.cogcomp.re.RelationAnnotator) IOException(java.io.IOException) TemporalChunkerAnnotator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerAnnotator) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) PipelineConfigurator(edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator) TemporalChunkerAnnotator(edu.illinois.cs.cogcomp.temporal.normalizer.main.TemporalChunkerAnnotator) W2VDatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator) RelationAnnotator(org.cogcomp.re.RelationAnnotator) ESADatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.ESADatalessAnnotator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) PrepSRLAnnotator(edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) MentionAnnotator(org.cogcomp.md.MentionAnnotator) QuestionTypeAnnotator(edu.illinois.cs.cogcomp.question_typer.QuestionTypeAnnotator) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) DepAnnotator(edu.illinois.cs.cogcomp.depparse.DepAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) VerbSenseAnnotator(edu.illinois.cs.cogcomp.verbsense.VerbSenseAnnotator) NERAnnotator(edu.illinois.cs.cogcomp.ner.NERAnnotator) NERAnnotator(edu.illinois.cs.cogcomp.ner.NERAnnotator) ESADatalessConfigurator(edu.illinois.cs.cogcomp.datalessclassification.config.ESADatalessConfigurator) DepAnnotator(edu.illinois.cs.cogcomp.depparse.DepAnnotator) W2VDatalessConfigurator(edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator) IllinoisLemmatizer(edu.illinois.cs.cogcomp.nlp.lemmatizer.IllinoisLemmatizer) Quantifier(edu.illinois.cs.cogcomp.quant.driver.Quantifier) CommaLabeler(edu.illinois.cs.cogcomp.comma.CommaLabeler) StanfordParseHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordParseHandler) QuestionTypeAnnotator(edu.illinois.cs.cogcomp.question_typer.QuestionTypeAnnotator) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)

Example 2 with W2VDatalessConfigurator

use of edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator in project cogcomp-nlp by CogComp.

the class W2VDatalessTest method testPredictions.

@Test
public void testPredictions() {
    try {
        configFile = "config/project.properties";
        ResourceManager nonDefaultRm = new ResourceManager(configFile);
        ResourceManager rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
        dataless = new W2VDatalessAnnotator(rm);
        documents = new ArrayList<>();
        String doc1 = "i m looking for some recommendations for screen capture programs a couple" + " of issues ago pc mag listed as editor s choices both conversion artist" + " and hijaak for windows anyone have any experience with those or some others" + " i m trying to get an alpha manual in the next few days and i m not making much" + " progress with the screen shots i m currently using dodot and i m about to burn it" + " and the disks it rode it on it s got a lot of freaky bugs and oversights that are " + "driving me crazy tonight it decided that for any graphic it writes out as a tiff " + "file that s under a certain arbitrary size it will swap the left and right sides of" + " the picture usually it confines itself to not copying things to the clipboard so i " + "have to save and load pix for editing in paintbrush or crashing every hour or so the " + "one nice thing it has though is it s dither option you d think that this would turn " + "colors into dots which it does if you go from say colors to colors but if you go " + "from or colors to b w you can set a threshold level for which colors turn to black " + "and which turn to white for me this is useful because i can turn light grays on buttons" + " to white and the dark grays to black and thereby preserve the d effect on buttons and " + "other parts of the window if you understood my description can you tell me if another " + "less buggy program can do this as well much thanks for any help signature david delgreco " + "what lies behind us and what lies technically a writer before us are tiny matters compared " + "delgreco rahul net to what lies within us oliver wendell holmes david f delgreco delgreco rahul " + "net recommendation for screen capture program";
        documents.add(doc1);
        String doc2 = "yes i know it s nowhere near christmas time but i m gonna loose net access in a few days maybe " + "a week or if i m lucky and wanted to post this for interested people to save till xmas " + "note bell labs is a good place if you have a phd and a good boss i have neither subject " + "xmas light set with levels of brightness another version of a variable brightness xmas " + "light set this set starts with a blinker bulb string diagram orginal way set 0v b b " + "0rtn modified set for level brightness string 0v 0k w string b 0v rtn note no mods to " + "wiring to the right of this point only one blinker is used note that the blinker " + "would not have as much current thru it as the string bulbs because of the second " + "string of bulbs in parallel with it that s why the use of the 0k w resistor here to " + "add extra current thru the blinker to make up for the current shunted thru the second " + "string while the blinker is glowing and the second string is not glowing when the " + "blinker goes open this resistor has only a slight effect on the brightness of the " + "strings s slightly dimmer s slightly brighter or use a w 0v bulb in place of the 0k " + "resistor if you can get one caution do not replace with a standard c bulb as these " + "draw too much current and burn out the blinker c approx w what you ll see when it s " + "working powerup string will light at full brightness and b will be lit bypassing most " + "of the current from the second string making them not light b will open placing both " + "strings in series making the string that was out to glow at a low brightness and the " + "other string that was on before to glow at reduced brightness be sure to wire and insulate" + " the splices resistor leads and cut wires in a safe manner level brightness xmas light " + "set for easter";
        documents.add(doc2);
        docLabels = new ArrayList<>();
        Set<String> docLabels1 = new HashSet<>(Arrays.asList("computer", "comp.os.ms.windows.misc"));
        docLabels.add(docLabels1);
        Set<String> docLabels2 = new HashSet<>(Arrays.asList("computer", "comp.windows.x"));
        docLabels.add(docLabels2);
    } catch (IOException e) {
        e.printStackTrace();
        System.out.println("IO Error while initializing the annotator .. " + e.getMessage());
        fail("IO Error while initializing the annotator .. " + e.getMessage());
    }
    try {
        for (int i = 0; i < documents.size(); i++) {
            // String docText = getDocumentText(docPaths.get(i));
            String docText = documents.get(i);
            Set<String> docPredictions = getPredictions(getTextAnnotation(docText), dataless);
            System.out.println("Doc" + i + ": Gold LabelIDs:");
            for (String goldLabel : docLabels.get(i)) {
                System.out.println(goldLabel);
            }
            System.out.println("Doc" + i + ": Predicted LabelIDs:");
            for (String predictedLabel : docPredictions) {
                System.out.println(predictedLabel);
            }
            System.out.println();
            assertTrue(checkSetEquality(docLabels.get(i), docPredictions));
        }
    } catch (AnnotatorException e) {
        e.printStackTrace();
        System.out.println("Error annotating the document .. " + e.getMessage());
        fail("Error annotating the document .. " + e.getMessage());
    }
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) W2VDatalessConfigurator(edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) IOException(java.io.IOException) W2VDatalessAnnotator(edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator) Test(org.junit.Test)

Example 3 with W2VDatalessConfigurator

use of edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator in project cogcomp-nlp by CogComp.

the class W2VDatalessAnnotator method main.

/**
 * @param args config: config file path testFile: Test File
 */
public static void main(String[] args) {
    CommandLine cmd = ESADatalessAnnotator.getCMDOpts(args);
    ResourceManager rm;
    try {
        String configFile = cmd.getOptionValue("config", "config/project.properties");
        ResourceManager nonDefaultRm = new ResourceManager(configFile);
        rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
    } catch (IOException e) {
        rm = new W2VDatalessConfigurator().getDefaultConfig();
    }
    String testFile = cmd.getOptionValue("testFile", "data/graphicsTestDocument.txt");
    StringBuilder sb = new StringBuilder();
    String line;
    try (BufferedReader br = new BufferedReader(new FileReader(new File(testFile)))) {
        while ((line = br.readLine()) != null) {
            sb.append(line);
            sb.append(" ");
        }
        String text = sb.toString().trim();
        TokenizerTextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
        TextAnnotation ta = taBuilder.createTextAnnotation(text);
        W2VDatalessAnnotator datalessAnnotator = new W2VDatalessAnnotator(rm);
        datalessAnnotator.addView(ta);
        List<Constituent> annots = ta.getView(ViewNames.DATALESS_W2V).getConstituents();
        System.out.println("Predicted LabelIDs:");
        for (Constituent annot : annots) {
            System.out.println(annot.getLabel());
        }
        Map<String, String> labelNameMap = DatalessAnnotatorUtils.getLabelNameMap(rm.getString(DatalessConfigurator.LabelName_Path.key));
        System.out.println("Predicted Labels:");
        for (Constituent annot : annots) {
            System.out.println(labelNameMap.get(annot.getLabel()));
        }
    } catch (FileNotFoundException e) {
        e.printStackTrace();
        logger.error("Test File not found at " + testFile + " ... exiting");
        System.exit(-1);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        logger.error("Error Annotating the Test Document with the Dataless View ... exiting");
        System.exit(-1);
    } catch (IOException e) {
        e.printStackTrace();
        logger.error("IO Error while reading the test file ... exiting");
        System.exit(-1);
    }
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) CommandLine(org.apache.commons.cli.CommandLine) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) W2VDatalessConfigurator(edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)3 W2VDatalessConfigurator (edu.illinois.cs.cogcomp.datalessclassification.config.W2VDatalessConfigurator)3 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 W2VDatalessAnnotator (edu.illinois.cs.cogcomp.datalessclassification.ta.W2VDatalessAnnotator)2 IOException (java.io.IOException)2 ChunkerAnnotator (edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator)1 CommaLabeler (edu.illinois.cs.cogcomp.comma.CommaLabeler)1 Language (edu.illinois.cs.cogcomp.core.constants.Language)1 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)1 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)1 ESADatalessConfigurator (edu.illinois.cs.cogcomp.datalessclassification.config.ESADatalessConfigurator)1 ESADatalessAnnotator (edu.illinois.cs.cogcomp.datalessclassification.ta.ESADatalessAnnotator)1 DepAnnotator (edu.illinois.cs.cogcomp.depparse.DepAnnotator)1 NERAnnotator (edu.illinois.cs.cogcomp.ner.NERAnnotator)1 IllinoisLemmatizer (edu.illinois.cs.cogcomp.nlp.lemmatizer.IllinoisLemmatizer)1 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)1 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)1 PipelineConfigurator (edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator)1 Stanford331Configurator (edu.illinois.cs.cogcomp.pipeline.common.Stanford331Configurator)1 StanfordDepHandler (edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)1