use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class SimpleGazetteerAnnotatorTest method testSimpleGazetteerAnnotatorString.
/**
* Test method for {@link SimpleGazetteerAnnotator#SimpleGazetteerAnnotator(ResourceManager)}.
*
* @throws URISyntaxException
* @throws IOException
*/
@Test
public void testSimpleGazetteerAnnotatorString() throws IOException, URISyntaxException {
Properties props = new Properties();
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.FALSE);
ResourceManager localRm = new SimpleGazetteerAnnotatorConfigurator().getConfig(new ResourceManager(props));
SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(localRm);
assertTrue("Wrong number of dictionaries loaded.", sga.dictionaries.size() == 1);
assertTrue("Wrong number of dictionaries loaded.", sga.dictionariesIgnoreCase.size() == 1);
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class TestBrownClusterViewGenerator method testCharniakParseViewGenerator.
/**
* Test the configuration of normalizing tokens in the brown clusters
*/
@Test
public final void testCharniakParseViewGenerator() {
String sentence = "a test .";
TextAnnotation ta = TextAnnotationUtilities.createFromTokenizedString(sentence);
// The default configuration: do normalization
BrownClusterViewGenerator viewGenerator = null;
try {
viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100);
viewGenerator.addView(ta);
} catch (Exception e) {
e.printStackTrace();
}
SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
assertEquals("111011111", view.getConstituents().get(0).getLabel());
assertEquals("a", view.getConstituents().get(1).getSurfaceForm());
assertEquals("10010", view.getConstituents().get(1).getLabel());
assertEquals("test", view.getConstituents().get(2).getSurfaceForm());
assertEquals("001110", view.getConstituents().get(2).getLabel());
// Don't normalize tokens in the brown clusters
Properties props = new Properties();
props.setProperty(BrownClusterViewGeneratorConfigurator.NORMALIZE_TOKEN.key, Configurator.FALSE);
ResourceManager rm = new ResourceManager(props);
try {
viewGenerator = new BrownClusterViewGenerator(BrownClusterViewGenerator.file100, BrownClusterViewGenerator.file100, rm);
viewGenerator.addView(ta);
} catch (Exception e) {
e.printStackTrace();
}
view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
assertEquals("a", view.getConstituents().get(0).getSurfaceForm());
assertEquals("10010", view.getConstituents().get(0).getLabel());
assertEquals("test", view.getConstituents().get(1).getSurfaceForm());
assertEquals("001110", view.getConstituents().get(1).getLabel());
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class WordEmbeddings method initWithDefaults.
public static void initWithDefaults() throws IOException {
ResourceManager rm = (new WordEmbeddingsConfigurator()).getDefaultConfig();
List<String> fileNames = new LinkedList<>();
fileNames.add(rm.getString(WordEmbeddingsConfigurator.fileNames.key));
List<Integer> embeddingDimensionality = new LinkedList<>();
embeddingDimensionality.add(rm.getInt(WordEmbeddingsConfigurator.dimensionalities.key));
List<Integer> minWordAppearanceThres = new LinkedList<>();
minWordAppearanceThres.add(rm.getInt(WordEmbeddingsConfigurator.wordNumThreshold.key));
List<Boolean> isLowercasedEmbedding = new LinkedList<>();
isLowercasedEmbedding.add(rm.getBoolean(WordEmbeddingsConfigurator.isLowercase.key));
List<Double> normalizationConstant = new LinkedList<>();
normalizationConstant.add(rm.getDouble(WordEmbeddingsConfigurator.normalizationConstants.key));
List<NormalizationMethod> normalizationMethods = new LinkedList<>();
normalizationMethods.add(NormalizationMethod.valueOf(rm.getString(WordEmbeddingsConfigurator.normalizationMethods.key)));
init(fileNames, embeddingDimensionality, minWordAppearanceThres, isLowercasedEmbedding, normalizationConstant, normalizationMethods);
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class PipelineFactory method buildPipeline.
/**
* create an AnnotatorService with the given view names in the argument. The names are supposed
* be strings, separated by space.
*
* @return AnnotatorService with specified NLP components
* @throws IOException
* @throws AnnotatorException
*/
public static BasicAnnotatorService buildPipeline(Boolean disableCache, String... views) throws IOException, AnnotatorException {
List<String> allViewNames = ViewNames.getAllViewNames();
Map<String, String> nonDefaultValues = new HashMap<>();
for (String vu : views) {
if (allViewNames.contains(vu)) {
switch(vu) {
case ViewNames.POS:
nonDefaultValues.put(PipelineConfigurator.USE_POS.key, Configurator.TRUE);
break;
case ViewNames.LEMMA:
nonDefaultValues.put(PipelineConfigurator.USE_LEMMA.key, Configurator.TRUE);
break;
case ViewNames.NER_CONLL:
nonDefaultValues.put(PipelineConfigurator.USE_NER_CONLL.key, Configurator.TRUE);
break;
case ViewNames.NER_ONTONOTES:
nonDefaultValues.put(PipelineConfigurator.USE_NER_ONTONOTES.key, Configurator.TRUE);
break;
case ViewNames.QUANTITIES:
nonDefaultValues.put(PipelineConfigurator.USE_QUANTIFIER.key, Configurator.TRUE);
break;
case ViewNames.SHALLOW_PARSE:
nonDefaultValues.put(PipelineConfigurator.USE_SHALLOW_PARSE.key, Configurator.TRUE);
break;
case ViewNames.SRL_VERB:
nonDefaultValues.put(PipelineConfigurator.USE_SRL_VERB.key, Configurator.TRUE);
break;
case ViewNames.DEPENDENCY_STANFORD:
nonDefaultValues.put(PipelineConfigurator.USE_STANFORD_DEP.key, Configurator.TRUE);
break;
case ViewNames.DEPENDENCY:
nonDefaultValues.put(PipelineConfigurator.USE_DEP.key, Configurator.TRUE);
break;
case ViewNames.PARSE_STANFORD:
nonDefaultValues.put(PipelineConfigurator.USE_STANFORD_PARSE.key, Configurator.TRUE);
break;
case ViewNames.SRL_PREP:
nonDefaultValues.put(PipelineConfigurator.USE_SRL_PREP.key, Configurator.TRUE);
break;
case ViewNames.SRL_COMMA:
nonDefaultValues.put(PipelineConfigurator.USE_SRL_COMMA.key, Configurator.TRUE);
break;
default:
logger.warn("View name " + vu + " is not supported yet. Look into the readme of the pipeline to see the list of valid annotators. ");
}
} else {
throw new IllegalArgumentException("The view name " + vu + " is not a valid view name. " + "The possible view names are static members of the class `ViewName`. ");
}
}
if (disableCache) {
nonDefaultValues.put(AnnotatorServiceConfigurator.DISABLE_CACHE.key, Configurator.TRUE);
} else {
nonDefaultValues.put(AnnotatorServiceConfigurator.DISABLE_CACHE.key, Configurator.FALSE);
}
// using the default settings and changing the views
ResourceManager fullRm = (new PipelineConfigurator()).getConfig(new Stanford331Configurator().getConfig(nonDefaultValues));
boolean splitOnHypen = fullRm.getBoolean(PipelineConfigurator.SPLIT_ON_DASH.key);
TextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(splitOnHypen));
Map<String, Annotator> annotators = buildAnnotators(fullRm);
return new SentencePipeline(taBldr, annotators, fullRm);
}
use of edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager in project cogcomp-nlp by CogComp.
the class PipelineFactory method buildAnnotators.
/**
* instantiate a set of annotators for use in an AnnotatorService object by default, will use
* lazy initialization where possible -- change this behavior with the
* {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
*
* @param nonDefaultRm ResourceManager with all non-default values for Annotators
* @return a Map from annotator view name to annotator
*/
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
ResourceManager rm = new PipelineConfigurator().getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key, PipelineConfigurator.TRUE);
Map<String, Annotator> viewGenerators = new HashMap<>();
if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
POSAnnotator pos = new POSAnnotator();
viewGenerators.put(pos.getViewName(), pos);
}
if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
viewGenerators.put(lem.getViewName(), lem);
}
if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
}
if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
viewGenerators.put(nerConll.getViewName(), nerConll);
}
if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
}
if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
DepAnnotator dep = new DepAnnotator();
viewGenerators.put(dep.getViewName(), dep);
}
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP) || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", maxParseSentenceLength);
// per sentence? could be per
stanfordProps.put("parse.maxtime", timePerSentence);
// document but no idea from
// stanford javadoc
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
int maxLength = Integer.parseInt(maxParseSentenceLength);
boolean throwExceptionOnSentenceLengthCheck = rm.getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
viewGenerators.put(depParser.getViewName(), depParser);
}
if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck);
viewGenerators.put(parser.getViewName(), parser);
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
Properties verbProps = new Properties();
String verbType = SRLType.Verb.name();
verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
ResourceManager verbRm = new ResourceManager(verbProps);
rm = Configurator.mergeProperties(rm, verbRm);
try {
SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
} catch (Exception e) {
throw new IOException("SRL verb cannot init: " + e.getMessage());
}
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
Properties nomProps = new Properties();
String nomType = SRLType.Nom.name();
nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
ResourceManager nomRm = new ResourceManager(nomProps);
rm = Configurator.mergeProperties(rm, nomRm);
try {
SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
// note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
// initialized yet
viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
// viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
// ViewNames.SRL_NOM,
// useLazyInitialization, rm));
} catch (Exception e) {
throw new IOException("SRL nom cannot init .." + e.getMessage());
}
}
if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
Quantifier quantifierAnnotator = new Quantifier();
viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
}
if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
CommaLabeler commaLabeler = new CommaLabeler();
viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
}
return viewGenerators;
}
Aggregations