Search in sources :

Example 16 with ResourceManager

use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotatorTest method testSimpleGazetteerAnnotatorString.

/**
     * Test method for {@link SimpleGazetteerAnnotator#SimpleGazetteerAnnotator(ResourceManager)}.
     * 
     * @throws URISyntaxException
     * @throws IOException
     */
@Test
public void testSimpleGazetteerAnnotatorString() throws IOException, URISyntaxException {
    Properties props = new Properties();
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
    props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.FALSE);
    ResourceManager localRm = new SimpleGazetteerAnnotatorConfigurator().getConfig(new ResourceManager(props));
    SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(localRm);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
    assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
}
Also used : SimpleGazetteerAnnotatorConfigurator(edu.illinois.cs.cogcomp.edison.config.SimpleGazetteerAnnotatorConfigurator) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) Properties(java.util.Properties) Test(org.junit.Test)

Example 17 with ResourceManager

use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.

the class TestBrownClusterViewGenerator method testCharniakParseViewGenerator.

/**
     * Test the configuration of normalizing tokens in the brown clusters
     */
@Test
public final void testCharniakParseViewGenerator() {
    String sentence = "a test .";
    TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString(sentence);
    // The default configuration: do normalization
    BrownClusterViewGenerator viewGenerator = null;
    try {
        viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100);
        viewGenerator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }
    SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
    assertEquals("111011111", view.getConstituents().get(0).getLabel());
    assertEquals("a", view.getConstituents().get(1).getSurfaceForm());
    assertEquals("10010", view.getConstituents().get(1).getLabel());
    assertEquals("test", view.getConstituents().get(2).getSurfaceForm());
    assertEquals("001110", view.getConstituents().get(2).getLabel());
    // Don't normalize tokens in the brown clusters
    Properties props = new Properties();
    props.setProperty(BrownClusterViewGeneratorConfigurator.NORMALIZE_TOKEN.key, Configurator.FALSE);
    ResourceManager rm = new ResourceManager(props);
    try {
        viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100, rm);
        viewGenerator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }
    view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
    assertEquals("10010", view.getConstituents().get(0).getLabel());
    assertEquals("test", view.getConstituents().get(1).getSurfaceForm());
    assertEquals("001110", view.getConstituents().get(1).getLabel());
}
Also used : ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Properties(java.util.Properties) Test(org.junit.Test)

Example 18 with ResourceManager

use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.

the class WordEmbeddings method initWithDefaults.

public static void initWithDefaults() throws IOException {
    ResourceManager rm = (new WordEmbeddingsConfigurator()).getDefaultConfig();
    List<String> fileNames = new LinkedList<>();
    fileNames.add(rm.getString(WordEmbeddingsConfigurator.fileNames.key));
    List<Integer> embeddingDimensionality = new LinkedList<>();
    embeddingDimensionality.add(rm.getInt(WordEmbeddingsConfigurator.dimensionalities.key));
    List<Integer> minWordAppearanceThres = new LinkedList<>();
    minWordAppearanceThres.add(rm.getInt(WordEmbeddingsConfigurator.wordNumThreshold.key));
    List<Boolean> isLowercasedEmbedding = new LinkedList<>();
    isLowercasedEmbedding.add(rm.getBoolean(WordEmbeddingsConfigurator.isLowercase.key));
    List<Double> normalizationConstant = new LinkedList<>();
    normalizationConstant.add(rm.getDouble(WordEmbeddingsConfigurator.normalizationConstants.key));
    List<NormalizationMethod> normalizationMethods = new LinkedList<>();
    normalizationMethods.add(NormalizationMethod.valueOf(rm.getString(WordEmbeddingsConfigurator.normalizationMethods.key)));
    init(fileNames, embeddingDimensionality, minWordAppearanceThres, isLowercasedEmbedding, normalizationConstant, normalizationMethods);
}
Also used : ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)

Example 19 with ResourceManager

use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.

the class PipelineFactory method buildPipeline.

/**
     * create an AnnotatorService with the given view names in the argument. The names are supposed
     * be strings, separated by space.
     *
     * @return AnnotatorService with specified NLP components
     * @throws IOException
     * @throws AnnotatorException
     */
public static BasicAnnotatorService buildPipeline(Boolean disableCache, String... views) throws IOException, AnnotatorException {
    List<String> allViewNames = ViewNames.getAllViewNames();
    Map<String, String> nonDefaultValues = new HashMap<>();
    for (String vu : views) {
        if (allViewNames.contains(vu)) {
            switch(vu) {
                case ViewNames.POS:
                    nonDefaultValues.put(PipelineConfigurator.USE_POS.key, Configurator.TRUE);
                    break;
                case ViewNames.LEMMA:
                    nonDefaultValues.put(PipelineConfigurator.USE_LEMMA.key, Configurator.TRUE);
                    break;
                case ViewNames.NER_CONLL:
                    nonDefaultValues.put(PipelineConfigurator.USE_NER_CONLL.key, Configurator.TRUE);
                    break;
                case ViewNames.NER_ONTONOTES:
                    nonDefaultValues.put(PipelineConfigurator.USE_NER_ONTONOTES.key, Configurator.TRUE);
                    break;
                case ViewNames.QUANTITIES:
                    nonDefaultValues.put(PipelineConfigurator.USE_QUANTIFIER.key, Configurator.TRUE);
                    break;
                case ViewNames.SHALLOW_PARSE:
                    nonDefaultValues.put(PipelineConfigurator.USE_SHALLOW_PARSE.key, Configurator.TRUE);
                    break;
                case ViewNames.SRL_VERB:
                    nonDefaultValues.put(PipelineConfigurator.USE_SRL_VERB.key, Configurator.TRUE);
                    break;
                case ViewNames.DEPENDENCY_STANFORD:
                    nonDefaultValues.put(PipelineConfigurator.USE_STANFORD_DEP.key, Configurator.TRUE);
                    break;
                case ViewNames.DEPENDENCY:
                    nonDefaultValues.put(PipelineConfigurator.USE_DEP.key, Configurator.TRUE);
                    break;
                case ViewNames.PARSE_STANFORD:
                    nonDefaultValues.put(PipelineConfigurator.USE_STANFORD_PARSE.key, Configurator.TRUE);
                    break;
                case ViewNames.SRL_PREP:
                    nonDefaultValues.put(PipelineConfigurator.USE_SRL_PREP.key, Configurator.TRUE);
                    break;
                case ViewNames.SRL_COMMA:
                    nonDefaultValues.put(PipelineConfigurator.USE_SRL_COMMA.key, Configurator.TRUE);
                    break;
                default:
                    logger.warn("View name " + vu + " is not supported yet. Look into the readme of the pipeline to see the list of valid annotators. ");
            }
        } else {
            throw new IllegalArgumentException("The view name " + vu + " is not a valid view name. " + "The possible view names are static members of the class `ViewName`. ");
        }
    }
    if (disableCache) {
        nonDefaultValues.put(AnnotatorServiceConfigurator.DISABLE_CACHE.key, Configurator.TRUE);
    } else {
        nonDefaultValues.put(AnnotatorServiceConfigurator.DISABLE_CACHE.key, Configurator.FALSE);
    }
    // using the default settings and changing the views
    ResourceManager fullRm = (new PipelineConfigurator()).getConfig(new Stanford331Configurator().getConfig(nonDefaultValues));
    boolean splitOnHypen = fullRm.getBoolean(PipelineConfigurator.SPLIT_ON_DASH.key);
    TextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(splitOnHypen));
    Map<String, Annotator> annotators = buildAnnotators(fullRm);
    return new SentencePipeline(taBldr, annotators, fullRm);
}
Also used : Stanford331Configurator(edu.illinois.cs.cogcomp.pipeline.common.Stanford331Configurator) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) HashMap(java.util.HashMap) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) PipelineConfigurator(edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) PrepSRLAnnotator(edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) DepAnnotator(edu.illinois.cs.cogcomp.depparse.DepAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) NERAnnotator(edu.illinois.cs.cogcomp.ner.NERAnnotator) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)

Example 20 with ResourceManager

use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.

the class PipelineFactory method buildAnnotators.

/**
     * instantiate a set of annotators for use in an AnnotatorService object by default, will use
     * lazy initialization where possible -- change this behavior with the
     * {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
     * 
     * @param nonDefaultRm ResourceManager with all non-default values for Annotators
     * @return a Map from annotator view name to annotator
     */
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
    ResourceManager rm = new PipelineConfigurator().getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
    String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key, PipelineConfigurator.TRUE);
    Map<String, Annotator> viewGenerators = new HashMap<>();
    if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
        POSAnnotator pos = new POSAnnotator();
        viewGenerators.put(pos.getViewName(), pos);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
        IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
        viewGenerators.put(lem.getViewName(), lem);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
        viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
        NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
        viewGenerators.put(nerConll.getViewName(), nerConll);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
        NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
        viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
        DepAnnotator dep = new DepAnnotator();
        viewGenerators.put(dep.getViewName(), dep);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP) || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", maxParseSentenceLength);
        // per sentence? could be per
        stanfordProps.put("parse.maxtime", timePerSentence);
        // document but no idea from
        // stanford javadoc
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        int maxLength = Integer.parseInt(maxParseSentenceLength);
        boolean throwExceptionOnSentenceLengthCheck = rm.getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
            StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(depParser.getViewName(), depParser);
        }
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
            StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(parser.getViewName(), parser);
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
        Properties verbProps = new Properties();
        String verbType = SRLType.Verb.name();
        verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
        ResourceManager verbRm = new ResourceManager(verbProps);
        rm = Configurator.mergeProperties(rm, verbRm);
        try {
            SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
        } catch (Exception e) {
            throw new IOException("SRL verb cannot init: " + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
        Properties nomProps = new Properties();
        String nomType = SRLType.Nom.name();
        nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
        ResourceManager nomRm = new ResourceManager(nomProps);
        rm = Configurator.mergeProperties(rm, nomRm);
        try {
            SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            // note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
            // initialized yet
            viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
        // viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
        // ViewNames.SRL_NOM,
        // useLazyInitialization, rm));
        } catch (Exception e) {
            throw new IOException("SRL nom cannot init .." + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
        Quantifier quantifierAnnotator = new Quantifier();
        viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
        PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
        viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
        CommaLabeler commaLabeler = new CommaLabeler();
        viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
    }
    return viewGenerators;
}
Also used : HashMap(java.util.HashMap) Properties(java.util.Properties) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) PrepSRLAnnotator(edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator) SemanticRoleLabeler(edu.illinois.cs.cogcomp.srl.SemanticRoleLabeler) Stanford331Configurator(edu.illinois.cs.cogcomp.pipeline.common.Stanford331Configurator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) IOException(java.io.IOException) IOException(java.io.IOException) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) PipelineConfigurator(edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator) POSTaggerAnnotator(edu.stanford.nlp.pipeline.POSTaggerAnnotator) PrepSRLAnnotator(edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator) ParserAnnotator(edu.stanford.nlp.pipeline.ParserAnnotator) ChunkerAnnotator(edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator) DepAnnotator(edu.illinois.cs.cogcomp.depparse.DepAnnotator) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) NERAnnotator(edu.illinois.cs.cogcomp.ner.NERAnnotator) NERAnnotator(edu.illinois.cs.cogcomp.ner.NERAnnotator) DepAnnotator(edu.illinois.cs.cogcomp.depparse.DepAnnotator) IllinoisLemmatizer(edu.illinois.cs.cogcomp.nlp.lemmatizer.IllinoisLemmatizer) Quantifier(edu.illinois.cs.cogcomp.quant.driver.Quantifier) CommaLabeler(edu.illinois.cs.cogcomp.comma.CommaLabeler) StanfordParseHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordParseHandler) StanfordDepHandler(edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler)

Aggregations

ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)28 Properties (java.util.Properties)14 Test (org.junit.Test)8 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)6 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)5 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)5 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)4 ChunkerAnnotator (edu.illinois.cs.cogcomp.chunker.main.ChunkerAnnotator)3 DepAnnotator (edu.illinois.cs.cogcomp.depparse.DepAnnotator)3 NERAnnotator (edu.illinois.cs.cogcomp.ner.NERAnnotator)3 PipelineConfigurator (edu.illinois.cs.cogcomp.pipeline.common.PipelineConfigurator)3 Stanford331Configurator (edu.illinois.cs.cogcomp.pipeline.common.Stanford331Configurator)3 POSAnnotator (edu.illinois.cs.cogcomp.pos.POSAnnotator)3 PrepSRLAnnotator (edu.illinois.cs.cogcomp.prepsrl.PrepSRLAnnotator)3 POSTaggerAnnotator (edu.stanford.nlp.pipeline.POSTaggerAnnotator)3 ParserAnnotator (edu.stanford.nlp.pipeline.ParserAnnotator)3 IOException (java.io.IOException)3 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)2 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)2 InFile (edu.illinois.cs.cogcomp.ner.IO.InFile)2