Search in sources :

Example 21 with Entity

use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.

the class NamedEntityTaggingEngine method computeEntityRecommentations.

/**
     * Computes the Enhancements
     * 
     * @param site
     *            The {@link SiteException} id or <code>null</code> to use the {@link Entityhub}
     * @param literalFactory
     *            the {@link LiteralFactory} used to create RDF Literals
     * @param contentItemId
     *            the id of the contentItem
     * @param textAnnotation
     *            the text annotation to enhance
     * @param subsumedAnnotations
     *            other text annotations for the same entity
     * @param language
     *            the language of the analysed text or <code>null</code> if not available.
     * @return the suggestions for the parsed {@link NamedEntity}
     * @throws EntityhubException
     *             On any Error while looking up Entities via the Entityhub
     */
protected final List<Suggestion> computeEntityRecommentations(Site site, NamedEntity namedEntity, List<IRI> subsumedAnnotations, String language) throws EntityhubException {
    // First get the required properties for the parsed textAnnotation
    // ... and check the values
    log.debug("Process {}", namedEntity);
    // if site is NULL use
    // the Entityhub
    FieldQueryFactory queryFactory = site == null ? entityhub.getQueryFactory() : site.getQueryFactory();
    log.trace("Will use a query-factory of type [{}].", queryFactory.getClass().toString());
    FieldQuery query = queryFactory.createFieldQuery();
    // replace spaces with plus to create an AND search for all words in the
    // name!
    Constraint labelConstraint;
    // TODO: make case sensitivity configurable
    boolean casesensitive = false;
    String namedEntityLabel = casesensitive ? namedEntity.getName() : namedEntity.getName().toLowerCase();
    if (language != null) {
        // search labels in the language and without language
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive, language, null);
    } else {
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive);
    }
    query.setConstraint(nameField, labelConstraint);
    if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
        if (personState) {
            if (personType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(personType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    } else if (DBPEDIA_ORGANISATION.equals(namedEntity.getType())) {
        if (orgState) {
            if (orgType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(orgType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    } else if (OntologicalClasses.DBPEDIA_PLACE.equals(namedEntity.getType())) {
        if (this.placeState) {
            if (this.placeType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(placeType));
            }
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
        }
    }
    query.setLimit(Math.max(20, this.numSuggestions * 3));
    log.trace("A query has been created of type [{}] and the following settings:\n{}", query.getClass().toString(), query.toString());
    if (null == site)
        log.trace("A query will be sent to the entity-hub of type [{}].", entityhub.getClass());
    else
        log.trace("A query will be sent to a site [id :: {}][type :: {}].", site.getId(), site.getClass());
    QueryResultList<Entity> results = // if site is NULL
    site == null ? entityhub.findEntities(query) : // use the Entityhub
    site.findEntities(// else the referenced site
    query);
    log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
    if (results.isEmpty()) {
        // no results nothing to do
        return Collections.emptyList();
    }
    // we need to normalise the confidence values from [0..1]
    // * levenshtein distance as absolute (1.0 for exact match)
    // * Solr scores * levenshtein to rank entities relative to each other
    Float maxScore = null;
    Float maxExactScore = null;
    List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
    // assumes entities are sorted by score
    for (Iterator<Entity> guesses = results.iterator(); guesses.hasNext(); ) {
        Suggestion match = new Suggestion(guesses.next());
        Representation rep = match.getEntity().getRepresentation();
        Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
        if (maxScore == null) {
            maxScore = score;
        }
        Iterator<Text> labels = rep.getText(nameField);
        while (labels.hasNext() && match.getLevenshtein() < 1.0) {
            Text label = labels.next();
            if (// if the content language is unknown ->
            language == null || // accept all labels
            label.getLanguage() == // accept labels with no
            null || // and labels in the same language as the content
            (language != null && label.getLanguage().startsWith(language))) {
                double actMatch = levenshtein(casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
                if (actMatch > match.getLevenshtein()) {
                    match.setLevenshtein(actMatch);
                    match.setMatchedLabel(label);
                }
            }
        }
        if (match.getMatchedLabel() != null) {
            if (match.getLevenshtein() == 1.0) {
                if (maxExactScore == null) {
                    maxExactScore = score;
                }
                // normalise exact matches against the best exact score
                match.setScore(score.doubleValue() / maxExactScore.doubleValue());
            } else {
                // normalise partial matches against the best match and the
                // Levenshtein similarity with the label
                match.setScore(score.doubleValue() * match.getLevenshtein() / maxScore.doubleValue());
            }
            matches.add(match);
        } else {
            log.debug("No value of {} for Entity {}!", nameField, match.getEntity().getId());
        }
    }
    // now sort the results
    Collections.sort(matches);
    return matches.subList(0, Math.min(matches.size(), numSuggestions));
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ArrayList(java.util.ArrayList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) FieldQueryFactory(org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 22 with Entity

use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.

the class MockEntityhub method findEntities.

@Override
public QueryResultList<Entity> findEntities(FieldQuery query) throws EntityhubException {
    log.info("Performing Query: {}", query);
    QueryResultList<Representation> results = yard.findRepresentation(query);
    log.info("  ... {} results", results.size());
    Collection<Entity> entities = new ArrayList<Entity>(results.size());
    for (Representation r : results) {
        log.info("    > {}", r.getId());
        entities.add(new EntityImpl("dbpedia", r, null));
    }
    return new QueryResultListImpl<Entity>(results.getQuery(), entities, Entity.class);
}
Also used : Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) EntityImpl(org.apache.stanbol.entityhub.core.model.EntityImpl) ArrayList(java.util.ArrayList) QueryResultListImpl(org.apache.stanbol.entityhub.core.query.QueryResultListImpl) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation)

Example 23 with Entity

use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.

the class CoreferenceFinder method lookupEntity.

/**
     * Gets an Entity from the configured {@link Site} based on the NER text and type.
     * 
     * @param ner
     * @param language
     * @return
     * @throws EngineException
     */
private Entity lookupEntity(Span ner, String language) throws EngineException {
    Site site = getReferencedSite();
    FieldQueryFactory queryFactory = site == null ? entityHub.getQueryFactory() : site.getQueryFactory();
    FieldQuery query = queryFactory.createFieldQuery();
    Constraint labelConstraint;
    String namedEntityLabel = ner.getSpan();
    labelConstraint = new TextConstraint(namedEntityLabel, false, language, null);
    query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint);
    query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType().getUnicodeString()));
    query.setLimit(1);
    QueryResultList<Entity> results = // if site is NULL
    site == null ? entityHub.findEntities(query) : // use the Entityhub
    site.findEntities(// else the referenced site
    query);
    if (results.isEmpty())
        return null;
    // We set the limit to 1 so if it found anything it should contain just 1 entry
    return results.iterator().next();
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) FieldQueryFactory(org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)

Example 24 with Entity

use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.

the class SiteManagerImpl method getEntity.

@Override
public Entity getEntity(String entityId) {
    Collection<Site> sites = getSitesByEntityPrefix(entityId);
    if (sites.isEmpty()) {
        log.info("No Referenced Site registered for Entity {}", entityId);
        log.debug("Registered Prefixes {}", prefixList);
        return null;
    }
    for (Site site : sites) {
        Entity entity;
        try {
            entity = site.getEntity(entityId);
            if (entity != null) {
                log.debug("Return Representation of Site {} for Entity {}", site.getConfiguration().getName(), entityId);
                return entity;
            }
        } catch (SiteException e) {
            log.warn("Unable to access Site " + site.getConfiguration().getName() + " (id = " + site.getId() + ")", e);
        }
    }
    log.debug("Entity {} not found on any of the following Sites {}", entityId, sites);
    return null;
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) SiteException(org.apache.stanbol.entityhub.servicesapi.site.SiteException)

Example 25 with Entity

use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.

the class ReferencedSiteImpl method getEntity.

@Override
public Entity getEntity(String id) throws SiteException {
    Representation rep = null;
    Boolean cachedVersion = Boolean.FALSE;
    long start = System.currentTimeMillis();
    if (cache != null) {
        try {
            rep = cache.getRepresentation(id);
            if (rep == null) {
                if (siteConfiguration.getCacheStrategy() == CacheStrategy.all) {
                    // do no remote lookups on CacheStrategy.all!!
                    return null;
                }
            } else {
                cachedVersion = Boolean.TRUE;
            }
        } catch (YardException e) {
            if (dereferencer == null) {
                throw new SiteException(String.format("Unable to get Represetnation %s form Cache %s", id, siteConfiguration.getCacheId()), e);
            } else {
                log.warn(String.format("Unable to get Represetnation %s form Cache %s. Will dereference from remote site %s", id, siteConfiguration.getCacheId(), siteConfiguration.getAccessUri()), e);
            }
        }
    }
    if (rep == null && dereferencer != null) {
        try {
            rep = dereferencer.dereference(id);
        } catch (IOException e) {
            throw new SiteException(String.format("Unable to load Representation for entity %s form remote site %s with dereferencer %s", id, siteConfiguration.getAccessUri(), siteConfiguration.getEntityDereferencerType()), e);
        }
        // representation loaded from remote site and cache is available
        if (rep != null && cache != null) {
            // -> cache the representation
            try {
                start = System.currentTimeMillis();
                // return the the cached version
                rep = cache.store(rep);
                cachedVersion = Boolean.TRUE;
                log.debug("  - cached Representation {} in {} ms", id, (System.currentTimeMillis() - start));
            } catch (YardException e) {
                log.warn(String.format("Unable to cache Represetnation %s in Cache %s! Representation not cached!", id, siteConfiguration.getCacheId()), e);
            }
        }
    }
    if (rep != null) {
        Entity entity = new EntityImpl(getId(), rep, null);
        initEntityMetadata(entity, siteMetadata, singletonMap(RdfResourceEnum.isChached.getUri(), (Object) cachedVersion));
        return entity;
    } else {
        return null;
    }
}
Also used : Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) YardException(org.apache.stanbol.entityhub.servicesapi.yard.YardException) EntityImpl(org.apache.stanbol.entityhub.core.model.EntityImpl) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) IOException(java.io.IOException) SiteException(org.apache.stanbol.entityhub.servicesapi.site.SiteException)

Aggregations

Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)41 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)16 HashSet (java.util.HashSet)12 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)12 MediaType (javax.ws.rs.core.MediaType)11 EntityhubLDPath (org.apache.stanbol.entityhub.ldpath.EntityhubLDPath)11 MediaTypeUtil.getAcceptableMediaType (org.apache.stanbol.commons.web.base.utils.MediaTypeUtil.getAcceptableMediaType)10 Path (javax.ws.rs.Path)9 EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)9 Site (org.apache.stanbol.entityhub.servicesapi.site.Site)9 SiteException (org.apache.stanbol.entityhub.servicesapi.site.SiteException)9 GET (javax.ws.rs.GET)7 Viewable (org.apache.stanbol.commons.web.viewable.Viewable)7 QueryResultListImpl (org.apache.stanbol.entityhub.core.query.QueryResultListImpl)7 ArrayList (java.util.ArrayList)6 WebApplicationException (javax.ws.rs.WebApplicationException)6 IRI (org.apache.clerezza.commons.rdf.IRI)6 EntityImpl (org.apache.stanbol.entityhub.core.model.EntityImpl)6 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)6 Produces (javax.ws.rs.Produces)5