use of org.osgi.service.cm.ConfigurationException in project stanbol by apache.
the class KeywordLinkingEngine method activateEntitySearcher.
/**
* Initialise the {@link #entitySearcher} based on the value of the
* {@link #REFERENCED_SITE_ID} property in the parsed configuration
* @param context
* @param configuration
* @throws ConfigurationException
*/
protected void activateEntitySearcher(ComponentContext context, Dictionary<String, Object> configuration) throws ConfigurationException {
Object value = configuration.get(REFERENCED_SITE_ID);
// init the EntitySource
if (value == null) {
throw new ConfigurationException(REFERENCED_SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be NULL!");
}
referencedSiteName = value.toString();
if (referencedSiteName.isEmpty()) {
throw new ConfigurationException(REFERENCED_SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be an empty String!");
}
// TODO: make limit configurable!
if (Entityhub.ENTITYHUB_IDS.contains(referencedSiteName.toLowerCase())) {
entitySearcher = new EntityhubSearcher(context.getBundleContext(), 10);
} else {
entitySearcher = new ReferencedSiteSearcher(context.getBundleContext(), referencedSiteName, 10);
}
}
use of org.osgi.service.cm.ConfigurationException in project stanbol by apache.
the class FstLinkingEngineComponent method applyConfig.
/**
* Called by {@link #activate(ComponentContext)},
* {@link PlainFstLinkingComponnet#activate(ComponentContext)} and
* {@link NamedEntityFstLinkingComponnet#activate(ComponentContext)} to
* apply the parsed {@link ComponentContext#getProperties()}. The
* {@link LinkingModeEnum linking mode} is parsed separately as OSGI does not
* allow to modify the parsed config and sup-classes do need to override
* the linking mode.
* @param linkingMode the linking mode
* @param properties
* @throws ConfigurationException
*/
protected void applyConfig(LinkingModeEnum linkingMode, Dictionary<String, Object> properties, NamespacePrefixService prefixService) throws ConfigurationException {
// (0) The name for the Enhancement Engine and the basic metadata
Object value = properties.get(PROPERTY_NAME);
if (value == null || value.toString().isEmpty()) {
throw new ConfigurationException(PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
} else {
this.engineName = value.toString();
}
log.info(" - engine name: {}", engineName);
engineMetadata = new Hashtable<String, Object>();
engineMetadata.put(PROPERTY_NAME, this.engineName);
value = properties.get(Constants.SERVICE_RANKING);
engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);
// (0) set the linking mode
this.linkingMode = linkingMode;
log.info(" - linking mode: {}", linkingMode);
// (1) parse the TextProcessing configuration
// TODO: decide if we should use the TextProcessingConfig for this engine
textProcessingConfig = TextProcessingConfig.createInstance(properties);
// change default for EntityLinkerConfig.MIN_FOUND_TOKENS
value = properties.get(EntityLinkerConfig.MIN_FOUND_TOKENS);
entityLinkerConfig = EntityLinkerConfig.createInstance(properties, prefixService);
if (value == null) {
// no MIN_FOUND_TOKENS config present
// manually set the default to the value used by this engine
entityLinkerConfig.setMinFoundTokens(FST_DEFAULT_MIN_FOUND_TOKENS);
}
// (2) parse the configured IndexReference
value = properties.get(SOLR_CORE);
if (value == null) {
throw new ConfigurationException(SOLR_CORE, "Missing required configuration of the SolrCore");
} else {
indexReference = IndexReference.parse(value.toString());
}
value = properties.get(IndexConfiguration.FIELD_ENCODING);
if (value == null) {
throw new ConfigurationException(IndexConfiguration.FIELD_ENCODING, "Missing required configuration of the Solr Field Encoding");
} else {
try {
fieldEncoding = FieldEncodingEnum.valueOf(value.toString().trim());
} catch (IllegalArgumentException e) {
throw new ConfigurationException(IndexConfiguration.FIELD_ENCODING, "The configured " + "FieldEncoding MUST BE a member of " + Arrays.toString(FieldEncodingEnum.values()), e);
}
}
value = properties.get(IndexConfiguration.SKIP_ALT_TOKENS);
if (value instanceof Boolean) {
skipAltTokensConfig = ((Boolean) value);
} else if (value != null) {
skipAltTokensConfig = Boolean.valueOf(value.toString());
}
// else no config -> will use the default
// (4) parse Origin information
value = properties.get(ORIGIN);
if (value instanceof RDFTerm) {
origin = (RDFTerm) origin;
} else if (value instanceof String) {
try {
URI originUri = new URI((String) value);
if (originUri.isAbsolute()) {
origin = new IRI((String) value);
} else {
origin = new PlainLiteralImpl((String) value);
}
} catch (URISyntaxException e) {
origin = new PlainLiteralImpl((String) value);
}
log.info(" - origin: {}", origin);
} else if (value != null) {
log.warn("Values of the {} property MUST BE of type RDFTerm or String " + "(parsed: {} (type:{}))", new Object[] { ORIGIN, value, value.getClass() });
}
// else no ORIGIN information provided
// (5) init the FST configuration
// We can create the default configuration only here, as it depends on the
// name of the solrIndex
String defaultConfig = "*;" + IndexConfiguration.PARAM_FST + "=" + indexReference.getIndex() + ";" + IndexConfiguration.PARAM_FIELD + "=" + IndexConfiguration.DEFAULT_FIELD;
fstConfig = new LanguageConfiguration(IndexConfiguration.FST_CONFIG, new String[] { defaultConfig });
// now set the actual configuration parsed to the engine
value = properties.get(IndexConfiguration.FST_CONFIG);
if (value != null && !StringUtils.isBlank(value.toString())) {
fstConfig.setConfiguration(properties);
}
// else keep the default
value = properties.get(IndexConfiguration.FST_FOLDER);
if (value instanceof String) {
this.fstFolder = ((String) value).trim();
if (this.fstFolder.isEmpty()) {
this.fstFolder = null;
}
} else if (value == null) {
this.fstFolder = null;
} else {
throw new ConfigurationException(IndexConfiguration.FST_FOLDER, "Values MUST BE of type String" + "(found: " + value.getClass().getName() + ")!");
}
// (6) Create the ThreadPool used for the runtime creation of FST models
value = properties.get(FST_THREAD_POOL_SIZE);
int tpSize;
if (value instanceof Number) {
tpSize = ((Number) value).intValue();
} else if (value != null) {
try {
tpSize = Integer.parseInt(value.toString());
} catch (NumberFormatException e) {
throw new ConfigurationException(FST_THREAD_POOL_SIZE, "Unable to parse the integer FST thread pool size from the " + "configured " + value.getClass().getSimpleName() + " '" + value + "'!", e);
}
} else {
tpSize = -1;
}
if (tpSize <= 0) {
// if configured value <= 0 we use the default
tpSize = DEFAULT_FST_THREAD_POOL_SIZE;
}
// build a ThreadFactoryBuilder for low priority daemon threads that
// do use a meaningful name
ThreadFactoryBuilder tfBuilder = new ThreadFactoryBuilder();
// should be stopped if the VM closes
tfBuilder.setDaemon(true);
// low priority
tfBuilder.setPriority(Thread.MIN_PRIORITY);
tfBuilder.setNameFormat(engineName + "-FstRuntimeCreation-thread-%d");
if (fstCreatorService != null && !fstCreatorService.isTerminated()) {
// NOTE: We can not call terminateNow, because to interrupt threads
// here would also close FileChannels used by the SolrCore
// and produce java.nio.channels.ClosedByInterruptException
// exceptions followed by java.nio.channels.ClosedChannelException
// on following calls to affected files of the SolrIndex.
// Because of that we just log a warning and let uncompleted tasks
// complete!
log.warn("some items in a previouse FST Runtime Creation Threadpool have " + "still not finished!");
}
fstCreatorService = Executors.newFixedThreadPool(tpSize, tfBuilder.build());
// (7) Parse the EntityCache config
int entityCacheSize;
value = properties.get(ENTITY_CACHE_SIZE);
if (value instanceof Number) {
entityCacheSize = ((Number) value).intValue();
} else if (value != null) {
try {
entityCacheSize = Integer.parseInt(value.toString());
} catch (NumberFormatException e) {
throw new ConfigurationException(ENTITY_CACHE_SIZE, "Unable to parse the integer EntityCacheSize from the " + "configured " + value.getClass().getSimpleName() + " '" + value + "'!", e);
}
} else {
entityCacheSize = -1;
}
if (entityCacheSize == 0) {
log.info(" ... EntityCache deactivated");
this.entityCacheSize = entityCacheSize;
} else {
this.entityCacheSize = entityCacheSize < 0 ? DEFAULT_ENTITY_CACHE_SIZE : entityCacheSize;
log.info(" ... EntityCache enabled (size: {})", this.entityCacheSize);
}
// (8) parse the Entity type field
value = properties.get(IndexConfiguration.SOLR_TYPE_FIELD);
if (value == null || StringUtils.isBlank(value.toString())) {
solrTypeField = null;
} else {
solrTypeField = value.toString().trim();
}
// (9) parse the Entity Ranking field
value = properties.get(IndexConfiguration.SOLR_RANKING_FIELD);
if (value == null) {
solrRankingField = null;
} else {
solrRankingField = value.toString().trim();
}
// (10) parse the NamedEntity type mappings (if linkingMode = NER)
if (linkingMode == LinkingModeEnum.NER) {
nerTypeMappings = new HashMap<String, Set<String>>();
value = properties.get(NAMED_ENTITY_TYPE_MAPPINGS);
if (value instanceof String[]) {
// support array
value = Arrays.asList((String[]) value);
} else if (value instanceof String) {
// single value
value = Collections.singleton(value);
}
if (value instanceof Collection<?>) {
// and collection
log.info(" - process Named Entity Type Mappings (used by LinkingMode: {})", linkingMode);
configs: for (Object o : (Iterable<?>) value) {
if (o != null) {
StringBuilder usage = new StringBuilder("useage: ");
usage.append("'{namedEntity-tag-or-uri} > {entityType-1}[,{entityType-n}]'");
String[] config = o.toString().split(">");
String namedEntityType = config[0].trim();
if (namedEntityType.isEmpty()) {
log.warn("Invalid Type Mapping Config '{}': Missing namedEntityType ({}) -> ignore this config", o, usage);
continue configs;
}
if (NamespaceMappingUtils.getPrefix(namedEntityType) != null) {
namedEntityType = NamespaceMappingUtils.getConfiguredUri(prefixService, NAMED_ENTITY_TYPE_MAPPINGS, namedEntityType);
}
if (config.length < 2 || config[1].isEmpty()) {
log.warn("Invalid Type Mapping Config '{}': Missing dc:type URI '{}' ({}) -> ignore this config", o, usage);
continue configs;
}
String entityTypes = config[1].trim();
if (config.length > 2) {
log.warn("Configuration after 2nd '>' gets ignored. Will use mapping '{} > {}' from config {}", new Object[] { namedEntityType, entityTypes, o });
}
Set<String> types = nerTypeMappings.get(namedEntityType);
if (types == null) {
// add new element to the mapping
types = new HashSet<String>();
nerTypeMappings.put(namedEntityType, types);
}
for (String entityType : entityTypes.split(";")) {
entityType = entityType.trim();
if (!entityType.isEmpty()) {
String typeUri;
if ("*".equals(entityType)) {
// null is used as wildcard
typeUri = null;
} else {
typeUri = NamespaceMappingUtils.getConfiguredUri(prefixService, NAMED_ENTITY_TYPE_MAPPINGS, entityType);
}
log.info(" - add {} > {}", namedEntityType, typeUri);
types.add(typeUri);
}
// else ignore empty mapping
}
}
}
} else {
// no mappings defined ... set wildcard mapping
log.info(" - No Named Entity type mappings configured. Will use wildcard mappings");
nerTypeMappings = Collections.singletonMap(null, Collections.<String>singleton(null));
}
}
// (11) start tracking the SolrCore
try {
solrServerTracker = new RegisteredSolrServerTracker(bundleContext, indexReference, null) {
@Override
public void removedService(ServiceReference reference, Object service) {
log.info(" ... SolrCore for {} was removed!", reference);
// try to get an other serviceReference from the tracker
if (reference.equals(FstLinkingEngineComponent.this.solrServerReference)) {
updateEngineRegistration(solrServerTracker.getServiceReference(), null);
} else {
log.info(" - removed SolrCore was not used for FST linking");
}
super.removedService(reference, service);
}
@Override
public void modifiedService(ServiceReference reference, Object service) {
log.info(" ... SolrCore for {} was updated!", indexReference);
updateEngineRegistration(solrServerTracker.getServiceReference(), null);
super.modifiedService(reference, service);
}
@Override
public SolrServer addingService(ServiceReference reference) {
SolrServer server = super.addingService(reference);
if (solrCore != null) {
log.info("Multiple SolrCores for name {}! Will update engine " + "with the newly added {}!", new Object[] { solrCore.getName(), indexReference, reference });
}
updateEngineRegistration(reference, server);
return server;
}
};
} catch (InvalidSyntaxException e) {
throw new ConfigurationException(SOLR_CORE, "parsed SolrCore name '" + value.toString() + "' is invalid (expected: '[{server-name}:]{indexname}'");
}
try {
solrServerTracker.open();
} catch (RuntimeException e) {
// FIX for STANBOL-1416 (see https://issues.apache.org/jira/browse/STANBOL-1416)
// If an available SolrCore can not be correctly initialized we will
// get the exception here. In this case we want this component to be
// activated and waiting for further service events. Because of that
// we catch here the exception.
log.debug("Error while processing existing SolrCore Service during " + "opening SolrServiceTracker ... waiting for further service" + "Events", e);
}
}
use of org.osgi.service.cm.ConfigurationException in project stanbol by apache.
the class TopicClassificationEngine method updatePerformanceEstimates.
public synchronized int updatePerformanceEstimates(boolean incremental) throws ClassifierException, TrainingSetException {
checkTrainingSet();
if (evaluationRunning) {
throw new ClassifierException("Another evaluation is already running");
}
int updatedTopics = 0;
// is now created within the #embeddedSolrServerDir
try {
evaluationRunning = true;
// 3-folds CV is hardcoded for now
int cvFoldCount = 3;
// make it possible to limit the number of folds to use
int cvIterationCount = 3;
// We will use the training set quite intensively, ensure that the index is packed and its
// statistics are up to date
getTrainingSet().optimize();
for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
updatedTopics = performCVFold(cvFoldIndex, cvFoldCount, cvIterationCount, incremental);
}
SolrServer solrServer = getActiveSolrServer();
solrServer.optimize();
} catch (ConfigurationException e) {
throw new ClassifierException(e);
} catch (IOException e) {
throw new ClassifierException(e);
} catch (SolrServerException e) {
throw new ClassifierException(e);
} finally {
FileUtils.deleteQuietly(__evaluationServerDir);
evaluationRunning = false;
}
return updatedTopics;
}
use of org.osgi.service.cm.ConfigurationException in project stanbol by apache.
the class TopicClassificationEngine method performCVFold.
protected int performCVFold(int cvFoldIndex, int cvFoldCount, int cvIterations, boolean incremental) throws ConfigurationException, TrainingSetException, ClassifierException {
cvIterations = cvIterations <= 0 ? cvFoldCount : cvFoldCount;
log.info(String.format("Performing evaluation %d-fold CV iteration %d/%d on classifier %s", cvFoldCount, cvFoldIndex + 1, cvIterations, engineName));
long start = System.currentTimeMillis();
final TopicClassificationEngine classifier = new TopicClassificationEngine();
try {
if (managedSolrServer != null) {
// OSGi setup: the evaluation server will be generated automatically using the
// managedSolrServer
classifier.bindManagedSolrServer(managedSolrServer);
classifier.activate(context, getCanonicalConfiguration(// TODO: maybe we should use the SolrCoreName instead
engineName + "-evaluation", solrCoreConfig));
} else {
if (__evaluationServer == null) {
__evaluationServerDir = new File(embeddedSolrServerDir, engineName + "-evaluation");
if (!__evaluationServerDir.exists()) {
FileUtils.forceMkdir(__evaluationServerDir);
}
__evaluationServer = EmbeddedSolrHelper.makeEmbeddedSolrServer(__evaluationServerDir, "evaluationclassifierserver", "default-topic-model", "default-topic-model");
}
classifier.configure(getCanonicalConfiguration(__evaluationServer, solrCoreConfig));
}
} catch (Exception e) {
throw new ClassifierException(e);
}
// clean all previous concepts from the evaluation classifier in case we are reusing an existing solr
// index from OSGi.
classifier.removeAllConcepts();
// iterate over all the topics to register them in the evaluation classifier
batchOverTopics(new BatchProcessor<SolrDocument>() {
@Override
public int process(List<SolrDocument> batch) throws ClassifierException {
for (SolrDocument topicEntry : batch) {
String conceptId = topicEntry.getFirstValue(conceptUriField).toString();
Collection<Object> broader = topicEntry.getFieldValues(broaderField);
if (broader == null) {
classifier.addConcept(conceptId, null, null);
} else {
List<String> broaderConcepts = new ArrayList<String>();
for (Object broaderConcept : broader) {
broaderConcepts.add(broaderConcept.toString());
}
classifier.addConcept(conceptId, null, broaderConcepts);
}
}
return batch.size();
}
});
// build the model on the for the current train CV folds
classifier.setCrossValidationInfo(cvFoldIndex, cvFoldCount);
// bind our new classifier to the same training set at the parent
classifier.setTrainingSet(getTrainingSet());
classifier.updateModel(false);
final int foldCount = cvFoldCount;
final int foldIndex = cvFoldIndex;
// iterate over the topics again to compute scores on the test fold
int updatedTopics = batchOverTopics(new BatchProcessor<SolrDocument>() {
@Override
public int process(List<SolrDocument> batch) throws TrainingSetException, ClassifierException {
int offset;
int updated = 0;
for (SolrDocument topicMetadata : batch) {
String topic = topicMetadata.getFirstValue(conceptUriField).toString();
List<String> topics = Arrays.asList(topic);
List<String> falseNegativeExamples = new ArrayList<String>();
int truePositives = 0;
int falseNegatives = 0;
int positiveSupport = 0;
offset = 0;
Batch<Example> examples = Batch.emtpyBatch(Example.class);
boolean skipTopic = false;
do {
examples = getTrainingSet().getPositiveExamples(topics, examples.nextOffset);
if (offset == 0 && examples.items.size() < MIN_EVALUATION_SAMPLES) {
// we need a minimum about of examples otherwise it's really not
// worth computing statistics
skipTopic = true;
break;
}
for (Example example : examples.items) {
if (!(offset % foldCount == foldIndex)) {
// this example is not part of the test fold, skip it
offset++;
continue;
}
positiveSupport++;
offset++;
List<TopicSuggestion> suggestedTopics = classifier.suggestTopics(example.contents);
boolean match = false;
for (TopicSuggestion suggestedTopic : suggestedTopics) {
if (topic.equals(suggestedTopic.conceptUri)) {
match = true;
truePositives++;
break;
}
}
if (!match) {
falseNegatives++;
if (falseNegativeExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
falseNegativeExamples.add(example.id);
}
}
}
} while (!skipTopic && examples.hasMore && offset < MAX_EVALUATION_SAMPLES);
List<String> falsePositiveExamples = new ArrayList<String>();
int falsePositives = 0;
int negativeSupport = 0;
offset = 0;
examples = Batch.emtpyBatch(Example.class);
do {
if (skipTopic) {
break;
}
examples = getTrainingSet().getNegativeExamples(topics, examples.nextOffset);
for (Example example : examples.items) {
if (!(offset % foldCount == foldIndex)) {
// this example is not part of the test fold, skip it
offset++;
continue;
}
negativeSupport++;
offset++;
List<TopicSuggestion> suggestedTopics = classifier.suggestTopics(example.contents);
for (TopicSuggestion suggestedTopic : suggestedTopics) {
if (topic.equals(suggestedTopic.conceptUri)) {
falsePositives++;
if (falsePositiveExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
falsePositiveExamples.add(example.id);
}
break;
}
}
// we don't need to collect true negatives
}
} while (examples.hasMore && offset < MAX_EVALUATION_SAMPLES);
if (skipTopic) {
log.debug("Skipping evaluation of {} because too few positive examples.", topic);
} else {
// compute precision, recall and f1 score for the current test fold and topic
float precision = 0;
if (truePositives != 0 || falsePositives != 0) {
precision = truePositives / (float) (truePositives + falsePositives);
}
float recall = 0;
if (truePositives != 0 || falseNegatives != 0) {
recall = truePositives / (float) (truePositives + falseNegatives);
}
updatePerformanceMetadata(topic, precision, recall, positiveSupport, negativeSupport, falsePositiveExamples, falseNegativeExamples);
updated += 1;
}
}
try {
getActiveSolrServer().commit();
} catch (Exception e) {
throw new ClassifierException(e);
}
return updated;
}
});
long stop = System.currentTimeMillis();
log.info(String.format("Finished CV iteration %d/%d on classifier %s in %fs.", cvFoldIndex + 1, cvFoldCount, engineName, (stop - start) / 1000.0));
if (context != null) {
// close open trackers
classifier.deactivate(context);
}
return updatedTopics;
}
use of org.osgi.service.cm.ConfigurationException in project stanbol by apache.
the class ConfiguredSolrCoreTracker method configureSolrCore.
protected void configureSolrCore(Dictionary<String, Object> config, String solrCoreProperty, String defaultCoreId, String solrCoreConfigProperty) throws ConfigurationException {
Object solrCoreInfo = config.get(solrCoreProperty);
if (solrCoreInfo instanceof SolrServer) {
// Bind a fixed Solr server client instead of doing dynamic OSGi lookup using the service tracker.
// This can be useful both for unit-testing .
solrServer = (SolrServer) config.get(solrCoreProperty);
solrCoreConfig = TopicClassificationEngine.DEFAULT_SOLR_CORE_CONFIG;
} else {
if (context == null) {
throw new ConfigurationException(solrCoreProperty, solrCoreProperty + " should be a SolrServer instance for using" + " the engine without any OSGi context. Got: " + solrCoreId);
}
if (solrCoreInfo != null && !solrCoreInfo.toString().trim().isEmpty()) {
this.solrCoreId = solrCoreInfo.toString().trim();
} else {
this.solrCoreId = defaultCoreId;
}
solrCoreConfig = getRequiredStringParam(config, solrCoreConfigProperty, this.solrCoreId + ".solrindex.zip");
try {
IndexReference indexReference = IndexReference.parse(solrCoreId);
// String configName = getRequiredStringParam(config, SOLR_CONFIG, defaultValue)
indexReference = checkInitSolrIndex(indexReference, solrCoreConfig);
// track the solr core OSGi updates
indexTracker = new RegisteredSolrServerTracker(context.getBundleContext(), indexReference);
indexTracker.open();
} catch (Exception e) {
throw new ConfigurationException(solrCoreProperty, e.getMessage(), e);
}
}
}
Aggregations