Search in sources :

Example 1 with LabelTokenizer

use of org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer in project stanbol by apache.

the class EntityCoMentionEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    AnalysedText at = getAnalysedText(this, ci, true);
    String language = getLanguage(this, ci, true);
    LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
    if (languageConfig == null) {
        throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
    }
    if (log.isDebugEnabled()) {
        log.debug("compute co-mentions for ContentItem {} language {}  text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
    }
    LabelTokenizer labelTokenizer = (LabelTokenizer) labelTokenizerTracker.getService();
    if (labelTokenizer == null) {
        throw new EngineException(this, ci, "No LabelTokenizer available!", null);
    }
    // create the in-memory database for the mentioned Entities
    ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(labelTokenizer, language, linkerConfig.getDefaultLanguage());
    Graph metadata = ci.getMetadata();
    Set<IRI> textAnnotations = new HashSet<IRI>();
    ci.getLock().readLock().lock();
    try {
        // iterate over all TextAnnotations (mentions of Entities)
        for (Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
            IRI ta = (IRI) it.next().getSubject();
            entityMentionIndex.registerTextAnnotation(ta, metadata);
            // store the registered text annotations
            textAnnotations.add(ta);
        }
    } finally {
        ci.getLock().readLock().unlock();
    }
    EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entityMentionIndex, linkerConfig, labelTokenizer, entityMentionIndex);
    // process
    try {
        entityLinker.process();
    } catch (EntitySearcherException e) {
        log.error("Unable to link Entities with " + entityLinker, e);
        throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
    }
    // TODO: write results
    ci.getLock().writeLock().lock();
    try {
        writeComentions(ci, entityLinker.getLinkedEntities().values(), language, textAnnotations);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LanguageProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) EntitySearcherException(org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException) EntityLinker(org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker) Triple(org.apache.clerezza.commons.rdf.Triple) NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) Graph(org.apache.clerezza.commons.rdf.Graph) LabelTokenizer(org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer) ContentItemMentionBuilder(org.apache.stanbol.enhancer.engines.entitycomention.impl.ContentItemMentionBuilder) HashSet(java.util.HashSet)

Example 2 with LabelTokenizer

use of org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer in project stanbol by apache.

the class MainLabelTokenizer method tokenize.

/* (non-Javadoc)
     * @see org.apache.stanbol.enhancer.engines.keywordextraction.impl.LabelTokenizerManager#tokenize(java.lang.String, java.lang.String)
     */
@Override
public String[] tokenize(String label, String language) {
    for (ServiceReference ref : getTokenizers(language)) {
        LabelTokenizer tokenizer = (LabelTokenizer) labelTokenizerTracker.getService(ref);
        if (tokenizer != null) {
            log.trace(" > use Tokenizer {} for language {}", tokenizer.getClass(), language);
            String[] tokens = tokenizer.tokenize(label, language);
            if (tokens != null) {
                if (log.isTraceEnabled()) {
                    log.trace("   - tokenized {} -> {}", label, Arrays.toString(tokens));
                }
                return tokens;
            }
        }
    }
    log.warn("No LabelTokenizer availabel for language {} -> return null", language);
    return null;
}
Also used : LabelTokenizer(org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer) ServiceReference(org.osgi.framework.ServiceReference)

Example 3 with LabelTokenizer

use of org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer in project stanbol by apache.

the class EntityhubLinkingEngine method activate.

@Activate
@SuppressWarnings("unchecked")
protected void activate(ComponentContext ctx) throws ConfigurationException {
    Dictionary<String, Object> properties = ctx.getProperties();
    bundleContext = ctx.getBundleContext();
    EntityLinkerConfig linkerConfig = EntityLinkerConfig.createInstance(properties, prefixService);
    TextProcessingConfig textProcessingConfig = TextProcessingConfig.createInstance(properties);
    Object value = properties.get(SITE_ID);
    // init the EntitySource
    if (value == null) {
        throw new ConfigurationException(SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be NULL!");
    }
    siteName = value.toString();
    if (siteName.isEmpty()) {
        throw new ConfigurationException(SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be an empty String!");
    }
    // get the metadata later set to the enhancement engine
    String engineName;
    engineMetadata = new Hashtable<String, Object>();
    value = properties.get(PROPERTY_NAME);
    if (value == null || value.toString().isEmpty()) {
        throw new ConfigurationException(PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
    } else {
        engineName = value.toString();
    }
    engineMetadata.put(PROPERTY_NAME, value);
    value = properties.get(Constants.SERVICE_RANKING);
    engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);
    // init the tracking entity searcher
    trackedServiceCount = 0;
    if (Entityhub.ENTITYHUB_IDS.contains(siteName.toLowerCase())) {
        entitySearcher = new EntityhubSearcher(bundleContext, 10, this);
    } else {
        entitySearcher = new ReferencedSiteSearcher(bundleContext, siteName, 10, this);
    }
    labelTokenizerTracker = new ServiceTracker(bundleContext, LabelTokenizer.class.getName(), new ServiceTrackerCustomizer() {

        @Override
        public Object addingService(ServiceReference reference) {
            Object service = bundleContext.getService(reference);
            if (service == null) {
                return service;
            }
            synchronized (labelTokenizersRefs) {
                labelTokenizersRefs.add(reference);
                ServiceReference higest;
                try {
                    higest = labelTokenizersRefs.last();
                } catch (NoSuchElementException e) {
                    higest = null;
                }
                EntityLinkingEngine engine = entityLinkingEngine;
                ServiceTracker tracker = labelTokenizerTracker;
                if (engine != null && tracker != null) {
                    LabelTokenizer lt = (LabelTokenizer) (reference.equals(higest) || higest == null ? service : tracker.getService(higest));
                    if (!lt.equals(engine.getLabelTokenizer())) {
                        log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
                        engine.setLabelTokenizer(lt);
                    }
                }
            // if engine or tracker is null deactivate was already called
            }
            return service;
        }

        @Override
        public void removedService(ServiceReference reference, Object service) {
            synchronized (labelTokenizersRefs) {
                // override
                labelTokenizersRefs.remove(reference);
                EntityLinkingEngine engine = entityLinkingEngine;
                ServiceTracker tracker = labelTokenizerTracker;
                if (engine != null && tracker != null) {
                    if (labelTokenizersRefs.isEmpty()) {
                        log.info(" ... setting LabelTokenizer of Engine '{}' to null", engine.getName());
                        engine.setLabelTokenizer(null);
                    } else {
                        LabelTokenizer lt = (LabelTokenizer) tracker.getService(labelTokenizersRefs.last());
                        if (!lt.equals(engine.getLabelTokenizer())) {
                            log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
                            engine.setLabelTokenizer(lt);
                        }
                    }
                }
            // if engine or tracker is null deactivate was already called
            }
            bundleContext.ungetService(reference);
        }

        @Override
        public void modifiedService(ServiceReference reference, Object service) {
            synchronized (labelTokenizersRefs) {
                // override
                labelTokenizersRefs.remove(reference);
                labelTokenizersRefs.add(reference);
                ServiceReference higest;
                try {
                    higest = labelTokenizersRefs.last();
                } catch (NoSuchElementException e) {
                    higest = null;
                }
                EntityLinkingEngine engine = entityLinkingEngine;
                ServiceTracker tracker = labelTokenizerTracker;
                if (engine != null && tracker != null) {
                    LabelTokenizer lt = (LabelTokenizer) (reference.equals(higest) || higest == null ? service : tracker.getService(higest));
                    if (!lt.equals(engine.getLabelTokenizer())) {
                        log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
                        engine.setLabelTokenizer(lt);
                    }
                }
            // if engine or tracker is null deactivate was already called
            }
        }
    });
    // create the engine
    entityLinkingEngine = new EntityLinkingEngine(engineName, // the searcher might not be available
    entitySearcher, textProcessingConfig, linkerConfig, null);
    // start tracking
    labelTokenizerTracker.open();
    entitySearcher.open();
}
Also used : EntityLinkerConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig) ServiceTracker(org.osgi.util.tracker.ServiceTracker) ServiceTrackerCustomizer(org.osgi.util.tracker.ServiceTrackerCustomizer) ServiceReference(org.osgi.framework.ServiceReference) EntityLinkingEngine(org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine) TextProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig) ConfigurationException(org.osgi.service.cm.ConfigurationException) LabelTokenizer(org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer) NoSuchElementException(java.util.NoSuchElementException) Activate(org.apache.felix.scr.annotations.Activate)

Aggregations

LabelTokenizer (org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer)3 ServiceReference (org.osgi.framework.ServiceReference)2 HashSet (java.util.HashSet)1 NoSuchElementException (java.util.NoSuchElementException)1 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)1 Graph (org.apache.clerezza.commons.rdf.Graph)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 Triple (org.apache.clerezza.commons.rdf.Triple)1 Activate (org.apache.felix.scr.annotations.Activate)1 ContentItemMentionBuilder (org.apache.stanbol.enhancer.engines.entitycomention.impl.ContentItemMentionBuilder)1 EntitySearcherException (org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException)1 EntityLinkerConfig (org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig)1 LanguageProcessingConfig (org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig)1 TextProcessingConfig (org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig)1 EntityLinkingEngine (org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine)1 EntityLinker (org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker)1 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)1 NlpEngineHelper.getAnalysedText (org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText)1 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)1 ConfigurationException (org.osgi.service.cm.ConfigurationException)1