use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.
the class NamedEntityTaggingEngine method computeEntityRecommentations.
/**
* Computes the Enhancements
*
* @param site
* The {@link SiteException} id or <code>null</code> to use the {@link Entityhub}
* @param literalFactory
* the {@link LiteralFactory} used to create RDF Literals
* @param contentItemId
* the id of the contentItem
* @param textAnnotation
* the text annotation to enhance
* @param subsumedAnnotations
* other text annotations for the same entity
* @param language
* the language of the analysed text or <code>null</code> if not available.
* @return the suggestions for the parsed {@link NamedEntity}
* @throws EntityhubException
* On any Error while looking up Entities via the Entityhub
*/
protected final List<Suggestion> computeEntityRecommentations(Site site, NamedEntity namedEntity, List<IRI> subsumedAnnotations, String language) throws EntityhubException {
// First get the required properties for the parsed textAnnotation
// ... and check the values
log.debug("Process {}", namedEntity);
// if site is NULL use
// the Entityhub
FieldQueryFactory queryFactory = site == null ? entityhub.getQueryFactory() : site.getQueryFactory();
log.trace("Will use a query-factory of type [{}].", queryFactory.getClass().toString());
FieldQuery query = queryFactory.createFieldQuery();
// replace spaces with plus to create an AND search for all words in the
// name!
Constraint labelConstraint;
// TODO: make case sensitivity configurable
boolean casesensitive = false;
String namedEntityLabel = casesensitive ? namedEntity.getName() : namedEntity.getName().toLowerCase();
if (language != null) {
// search labels in the language and without language
labelConstraint = new TextConstraint(namedEntityLabel, casesensitive, language, null);
} else {
labelConstraint = new TextConstraint(namedEntityLabel, casesensitive);
}
query.setConstraint(nameField, labelConstraint);
if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
if (personState) {
if (personType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(personType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
} else if (DBPEDIA_ORGANISATION.equals(namedEntity.getType())) {
if (orgState) {
if (orgType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(orgType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
} else if (OntologicalClasses.DBPEDIA_PLACE.equals(namedEntity.getType())) {
if (this.placeState) {
if (this.placeType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(placeType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
}
query.setLimit(Math.max(20, this.numSuggestions * 3));
log.trace("A query has been created of type [{}] and the following settings:\n{}", query.getClass().toString(), query.toString());
if (null == site)
log.trace("A query will be sent to the entity-hub of type [{}].", entityhub.getClass());
else
log.trace("A query will be sent to a site [id :: {}][type :: {}].", site.getId(), site.getClass());
QueryResultList<Entity> results = // if site is NULL
site == null ? entityhub.findEntities(query) : // use the Entityhub
site.findEntities(// else the referenced site
query);
log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
if (results.isEmpty()) {
// no results nothing to do
return Collections.emptyList();
}
// we need to normalise the confidence values from [0..1]
// * levenshtein distance as absolute (1.0 for exact match)
// * Solr scores * levenshtein to rank entities relative to each other
Float maxScore = null;
Float maxExactScore = null;
List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
// assumes entities are sorted by score
for (Iterator<Entity> guesses = results.iterator(); guesses.hasNext(); ) {
Suggestion match = new Suggestion(guesses.next());
Representation rep = match.getEntity().getRepresentation();
Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
if (maxScore == null) {
maxScore = score;
}
Iterator<Text> labels = rep.getText(nameField);
while (labels.hasNext() && match.getLevenshtein() < 1.0) {
Text label = labels.next();
if (// if the content language is unknown ->
language == null || // accept all labels
label.getLanguage() == // accept labels with no
null || // and labels in the same language as the content
(language != null && label.getLanguage().startsWith(language))) {
double actMatch = levenshtein(casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
if (actMatch > match.getLevenshtein()) {
match.setLevenshtein(actMatch);
match.setMatchedLabel(label);
}
}
}
if (match.getMatchedLabel() != null) {
if (match.getLevenshtein() == 1.0) {
if (maxExactScore == null) {
maxExactScore = score;
}
// normalise exact matches against the best exact score
match.setScore(score.doubleValue() / maxExactScore.doubleValue());
} else {
// normalise partial matches against the best match and the
// Levenshtein similarity with the label
match.setScore(score.doubleValue() * match.getLevenshtein() / maxScore.doubleValue());
}
matches.add(match);
} else {
log.debug("No value of {} for Entity {}!", nameField, match.getEntity().getId());
}
}
// now sort the results
Collections.sort(matches);
return matches.subList(0, Math.min(matches.size(), numSuggestions));
}
use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.
the class MockEntityhub method findEntities.
@Override
public QueryResultList<Entity> findEntities(FieldQuery query) throws EntityhubException {
log.info("Performing Query: {}", query);
QueryResultList<Representation> results = yard.findRepresentation(query);
log.info(" ... {} results", results.size());
Collection<Entity> entities = new ArrayList<Entity>(results.size());
for (Representation r : results) {
log.info(" > {}", r.getId());
entities.add(new EntityImpl("dbpedia", r, null));
}
return new QueryResultListImpl<Entity>(results.getQuery(), entities, Entity.class);
}
use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.
the class CoreferenceFinder method lookupEntity.
/**
* Gets an Entity from the configured {@link Site} based on the NER text and type.
*
* @param ner
* @param language
* @return
* @throws EngineException
*/
private Entity lookupEntity(Span ner, String language) throws EngineException {
Site site = getReferencedSite();
FieldQueryFactory queryFactory = site == null ? entityHub.getQueryFactory() : site.getQueryFactory();
FieldQuery query = queryFactory.createFieldQuery();
Constraint labelConstraint;
String namedEntityLabel = ner.getSpan();
labelConstraint = new TextConstraint(namedEntityLabel, false, language, null);
query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint);
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType().getUnicodeString()));
query.setLimit(1);
QueryResultList<Entity> results = // if site is NULL
site == null ? entityHub.findEntities(query) : // use the Entityhub
site.findEntities(// else the referenced site
query);
if (results.isEmpty())
return null;
// We set the limit to 1 so if it found anything it should contain just 1 entry
return results.iterator().next();
}
use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.
the class SiteManagerImpl method getEntity.
@Override
public Entity getEntity(String entityId) {
Collection<Site> sites = getSitesByEntityPrefix(entityId);
if (sites.isEmpty()) {
log.info("No Referenced Site registered for Entity {}", entityId);
log.debug("Registered Prefixes {}", prefixList);
return null;
}
for (Site site : sites) {
Entity entity;
try {
entity = site.getEntity(entityId);
if (entity != null) {
log.debug("Return Representation of Site {} for Entity {}", site.getConfiguration().getName(), entityId);
return entity;
}
} catch (SiteException e) {
log.warn("Unable to access Site " + site.getConfiguration().getName() + " (id = " + site.getId() + ")", e);
}
}
log.debug("Entity {} not found on any of the following Sites {}", entityId, sites);
return null;
}
use of org.apache.stanbol.entityhub.servicesapi.model.Entity in project stanbol by apache.
the class ReferencedSiteImpl method getEntity.
@Override
public Entity getEntity(String id) throws SiteException {
Representation rep = null;
Boolean cachedVersion = Boolean.FALSE;
long start = System.currentTimeMillis();
if (cache != null) {
try {
rep = cache.getRepresentation(id);
if (rep == null) {
if (siteConfiguration.getCacheStrategy() == CacheStrategy.all) {
// do no remote lookups on CacheStrategy.all!!
return null;
}
} else {
cachedVersion = Boolean.TRUE;
}
} catch (YardException e) {
if (dereferencer == null) {
throw new SiteException(String.format("Unable to get Represetnation %s form Cache %s", id, siteConfiguration.getCacheId()), e);
} else {
log.warn(String.format("Unable to get Represetnation %s form Cache %s. Will dereference from remote site %s", id, siteConfiguration.getCacheId(), siteConfiguration.getAccessUri()), e);
}
}
}
if (rep == null && dereferencer != null) {
try {
rep = dereferencer.dereference(id);
} catch (IOException e) {
throw new SiteException(String.format("Unable to load Representation for entity %s form remote site %s with dereferencer %s", id, siteConfiguration.getAccessUri(), siteConfiguration.getEntityDereferencerType()), e);
}
// representation loaded from remote site and cache is available
if (rep != null && cache != null) {
// -> cache the representation
try {
start = System.currentTimeMillis();
// return the the cached version
rep = cache.store(rep);
cachedVersion = Boolean.TRUE;
log.debug(" - cached Representation {} in {} ms", id, (System.currentTimeMillis() - start));
} catch (YardException e) {
log.warn(String.format("Unable to cache Represetnation %s in Cache %s! Representation not cached!", id, siteConfiguration.getCacheId()), e);
}
}
}
if (rep != null) {
Entity entity = new EntityImpl(getId(), rep, null);
initEntityMetadata(entity, siteMetadata, singletonMap(RdfResourceEnum.isChached.getUri(), (Object) cachedVersion));
return entity;
} else {
return null;
}
}
Aggregations