Search in sources :

Example 1 with EntitySearcherException

use of org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException in project stanbol by apache.

the class EntityCoMentionEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    AnalysedText at = getAnalysedText(this, ci, true);
    String language = getLanguage(this, ci, true);
    LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
    if (languageConfig == null) {
        throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
    }
    if (log.isDebugEnabled()) {
        log.debug("compute co-mentions for ContentItem {} language {}  text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
    }
    LabelTokenizer labelTokenizer = (LabelTokenizer) labelTokenizerTracker.getService();
    if (labelTokenizer == null) {
        throw new EngineException(this, ci, "No LabelTokenizer available!", null);
    }
    //create the in-memory database for the mentioned Entities
    ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(labelTokenizer, language, linkerConfig.getDefaultLanguage());
    Graph metadata = ci.getMetadata();
    Set<IRI> textAnnotations = new HashSet<IRI>();
    ci.getLock().readLock().lock();
    try {
        //iterate over all TextAnnotations (mentions of Entities)
        for (Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
            IRI ta = (IRI) it.next().getSubject();
            entityMentionIndex.registerTextAnnotation(ta, metadata);
            //store the registered text annotations
            textAnnotations.add(ta);
        }
    } finally {
        ci.getLock().readLock().unlock();
    }
    EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entityMentionIndex, linkerConfig, labelTokenizer, entityMentionIndex);
    //process
    try {
        entityLinker.process();
    } catch (EntitySearcherException e) {
        log.error("Unable to link Entities with " + entityLinker, e);
        throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
    }
    //TODO: write results
    ci.getLock().writeLock().lock();
    try {
        writeComentions(ci, entityLinker.getLinkedEntities().values(), language, textAnnotations);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LanguageProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) EntitySearcherException(org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException) EntityLinker(org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker) Triple(org.apache.clerezza.commons.rdf.Triple) NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) Graph(org.apache.clerezza.commons.rdf.Graph) LabelTokenizer(org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer) ContentItemMentionBuilder(org.apache.stanbol.enhancer.engines.entitycomention.impl.ContentItemMentionBuilder) HashSet(java.util.HashSet)

Example 2 with EntitySearcherException

use of org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException in project stanbol by apache.

the class EntityhubSearcher method lookup.

@Override
public Collection<? extends Entity> lookup(IRI field, Set<IRI> includeFields, List<String> search, String[] languages, Integer limit, Integer offset) throws EntitySearcherException {
    Entityhub entityhub = getSearchService();
    if (entityhub == null) {
        throw new EntitySearcherException("The Entityhub is currently not active");
    }
    FieldQuery query = EntitySearcherUtils.createFieldQuery(entityhub.getQueryFactory(), field, includeFields, search, languages);
    if (limit != null && limit > 0) {
        query.setLimit(limit);
    } else if (this.limit != null) {
        query.setLimit(this.limit);
    }
    if (offset != null && offset.intValue() > 0) {
        query.setOffset(offset.intValue());
    }
    QueryResultList<Representation> results;
    try {
        results = entityhub.find(query);
    } catch (EntityhubException e) {
        throw new EntitySearcherException("Exception while searchign for " + search + '@' + Arrays.toString(languages) + "in the Entityhub", e);
    }
    if (!results.isEmpty()) {
        Set<String> languagesSet = new HashSet<String>(Arrays.asList(languages));
        Collection<Entity> entities = new ArrayList<Entity>(results.size());
        for (Representation result : results) {
            entities.add(new EntityhubEntity(result, null, languagesSet));
        }
        return entities;
    } else {
        return Collections.emptyList();
    }
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) ArrayList(java.util.ArrayList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) EntitySearcherException(org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) Entityhub(org.apache.stanbol.entityhub.servicesapi.Entityhub) HashSet(java.util.HashSet)

Example 3 with EntitySearcherException

use of org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException in project stanbol by apache.

the class EntityLinkingEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    log.trace(" enhance ci {}", ci.getUri());
    if (isOfflineMode() && !entitySearcher.supportsOfflineMode()) {
        throw new EngineException(this, ci, "Offline mode is not supported by the used EntitySearcher!", null);
    }
    AnalysedText at = getAnalysedText(this, ci, true);
    log.debug("  > AnalysedText {}", at);
    String language = getLanguage(this, ci, true);
    if (log.isDebugEnabled()) {
        log.debug("computeEnhancements for ContentItem {} language {} text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
    }
    log.debug("  > Language {}", language);
    LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
    if (languageConfig == null) {
        throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
    }
    EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entitySearcher, linkerConfig, labelTokenizer);
    //process
    try {
        entityLinker.process();
    } catch (EntitySearcherException e) {
        log.error("Unable to link Entities with " + entityLinker, e);
        throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
    }
    if (log.isInfoEnabled()) {
        entityLinker.logStatistics(log);
    }
    //write results (requires a write lock)
    ci.getLock().writeLock().lock();
    try {
        writeEnhancements(ci, entityLinker.getLinkedEntities().values(), language, linkerConfig.isWriteEntityRankings());
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) LanguageProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) EntitySearcherException(org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException) EntityLinker(org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker)

Example 4 with EntitySearcherException

use of org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException in project stanbol by apache.

the class EntityhubSearcher method get.

@Override
public Entity get(IRI id, Set<IRI> fields, String... languages) throws EntitySearcherException {
    if (id == null || id.getUnicodeString().isEmpty()) {
        return null;
    }
    Entityhub entityhub = getSearchService();
    if (entityhub == null) {
        throw new EntitySearcherException("The Entityhub is currently not active");
    }
    org.apache.stanbol.entityhub.servicesapi.model.Entity entity;
    try {
        entity = entityhub.getEntity(id.getUnicodeString());
    } catch (EntityhubException e) {
        throw new EntitySearcherException("Exception while getting " + id + " from the Entityhub", e);
    }
    if (entity != null) {
        Set<String> languageSet;
        if (languages == null || languages.length < 1) {
            languageSet = null;
        } else if (languages.length == 1) {
            languageSet = Collections.singleton(languages[0]);
        } else {
            languageSet = new HashSet<String>(Arrays.asList(languages));
        }
        return new EntityhubEntity(entity.getRepresentation(), fields, languageSet);
    } else {
        return null;
    }
}
Also used : EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) Entityhub(org.apache.stanbol.entityhub.servicesapi.Entityhub) EntitySearcherException(org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException) HashSet(java.util.HashSet)

Aggregations

EntitySearcherException (org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException)4 HashSet (java.util.HashSet)3 LanguageProcessingConfig (org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig)2 EntityLinker (org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker)2 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)2 NlpEngineHelper.getAnalysedText (org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText)2 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)2 Entityhub (org.apache.stanbol.entityhub.servicesapi.Entityhub)2 EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)2 ArrayList (java.util.ArrayList)1 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)1 Graph (org.apache.clerezza.commons.rdf.Graph)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 Triple (org.apache.clerezza.commons.rdf.Triple)1 ContentItemMentionBuilder (org.apache.stanbol.enhancer.engines.entitycomention.impl.ContentItemMentionBuilder)1 Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)1 LabelTokenizer (org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer)1 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)1 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)1