use of org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig in project stanbol by apache.
the class KeywordLinkingEngine method activateTextAnalyzerConfig.
/**
* Initialise the {@link TextAnalyzer} component.<p>
* Currently this includes the following configurations: <ul>
* <li>{@link #PROCESSED_LANGUAGES}: If no configuration is present the
* default (process all languages) is used.
* <li> {@value #MIN_POS_TAG_PROBABILITY}: If no configuration is
* present the #DEFAULT_MIN_POS_TAG_PROBABILITY is used
* languages based on the value of the
*
* @param configuration the OSGI component configuration
*/
protected final void activateTextAnalyzerConfig(Dictionary<String, Object> configuration) throws ConfigurationException {
nlpConfig = new TextAnalyzerConfig();
Object value;
value = configuration.get(PROCESSED_LANGUAGES);
if (value == null) {
this.languages = DEFAULT_LANGUAGES;
} else if (value.toString().trim().isEmpty()) {
this.languages = Collections.emptySet();
} else {
String[] languageArray = value.toString().split(",");
languages = new HashSet<String>();
for (String language : languageArray) {
if (language != null) {
language = language.trim();
if (!language.isEmpty()) {
languages.add(language);
}
}
}
}
value = configuration.get(MIN_POS_TAG_PROBABILITY);
double minPosTagProb;
if (value instanceof Number) {
minPosTagProb = ((Number) value).doubleValue();
} else if (value != null && !value.toString().isEmpty()) {
try {
minPosTagProb = Double.valueOf(value.toString());
} catch (NumberFormatException e) {
throw new ConfigurationException(MIN_POS_TAG_PROBABILITY, "Unable to parse the min POS tag probability from the parsed value " + value, e);
}
} else {
minPosTagProb = DEFAULT_MIN_POS_TAG_PROBABILITY;
}
if (minPosTagProb > 1) {
throw new ConfigurationException(MIN_POS_TAG_PROBABILITY, "The configured min POS tag probability MUST BE in the range [0..1] " + "or < 0 to deactivate this feature (parsed value " + value + ")!");
}
nlpConfig.setMinPosTagProbability(minPosTagProb);
value = configuration.get(KEYWORD_TOKENIZER);
//the keyword tokenizer config
if (value instanceof Boolean) {
nlpConfig.forceKeywordTokenizer((Boolean) value);
} else if (value != null && !value.toString().isEmpty()) {
nlpConfig.forceKeywordTokenizer(Boolean.valueOf(value.toString()));
}
//nlpConfig.enablePosTypeChunker(false);
//nlpConfig.enableChunker(false);
analysedContentFactory = OpenNlpAnalysedContentFactory.getInstance(openNLP, nlpConfig);
}
Aggregations