Search in sources :

Example 6 with Site

use of org.apache.stanbol.entityhub.servicesapi.site.Site in project stanbol by apache.

the class SiteManagerImpl method findIds.

@Override
public QueryResultList<String> findIds(FieldQuery query) {
    log.debug("findIds for query{}", query);
    // We need to search all referenced Sites
    Set<String> entityIds = new HashSet<String>();
    // TODO: The QueryResultList expects that the query as executed is added
    // to the response. However when executing queries on multiple site they
    // might support a different set of features and therefore execute
    // different variants. For now I return simple the query as executed by
    // the first Site that contributes results
    FieldQuery processedQuery = null;
    FieldQuery queryWithResults = null;
    for (Site site : referencedSites) {
        if (site.supportsSearch()) {
            log.debug(" > query site {}", site.getId());
            try {
                QueryResultList<String> results = site.findReferences(query);
                if (processedQuery == null) {
                    processedQuery = results.getQuery();
                }
                if (!results.isEmpty() && queryWithResults == null) {
                    processedQuery = results.getQuery();
                }
                for (String entityId : results) {
                    entityIds.add(entityId);
                }
            } catch (SiteException e) {
                log.warn("Unable to access Site " + site.getConfiguration().getName() + " (id = " + site.getId() + ")", e);
            }
        } else {
            log.debug(" > Site {} does not support queries", site.getId());
        }
    }
    return new QueryResultListImpl<String>(// use the query with results
    queryWithResults != null ? // use the query with results
    queryWithResults : // if not a processed
    processedQuery != null ? // if not a processed
    processedQuery : // else the parsed one
    query, entityIds.iterator(), String.class);
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Site(org.apache.stanbol.entityhub.servicesapi.site.Site) QueryResultListImpl(org.apache.stanbol.entityhub.core.query.QueryResultListImpl) SiteException(org.apache.stanbol.entityhub.servicesapi.site.SiteException) HashSet(java.util.HashSet)

Example 7 with Site

use of org.apache.stanbol.entityhub.servicesapi.site.Site in project stanbol by apache.

the class SiteManagerImpl method getContent.

@Override
public InputStream getContent(String entityId, String contentType) {
    Collection<Site> sites = getSitesByEntityPrefix(entityId);
    if (sites.isEmpty()) {
        log.info("No Referenced Site registered for Entity {}", entityId);
        log.debug("Registered Prefixes {}", prefixList);
        return null;
    }
    for (Site site : sites) {
        InputStream content;
        try {
            content = site.getContent(entityId, contentType);
            if (content != null) {
                log.debug("Return Content of type {} for Entity {} from referenced site {}", new Object[] { contentType, entityId, site.getConfiguration().getName() });
                return content;
            }
        } catch (SiteException e) {
            log.warn("Unable to access Site " + site.getConfiguration().getName() + " (id = " + site.getId() + ")", e);
        }
    }
    log.debug("Entity {} not found on any of the following Sites {}", entityId, sites);
    return null;
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) InputStream(java.io.InputStream) SiteException(org.apache.stanbol.entityhub.servicesapi.site.SiteException)

Example 8 with Site

use of org.apache.stanbol.entityhub.servicesapi.site.Site in project stanbol by apache.

the class SiteManagerImpl method findEntities.

@Override
public QueryResultList<Entity> findEntities(FieldQuery query) {
    log.debug("findEntities for query{}", query);
    // TODO: The QueryResultList expects that the query as executed is added
    // to the response. However when executing queries on multiple site they
    // might support a different set of features and therefore execute
    // different variants. For now I return simple the query as executed by
    // the first Site that contributes results
    FieldQuery processedQuery = null;
    FieldQuery queryWithResults = null;
    Set<Entity> entities = new HashSet<Entity>();
    for (Site site : referencedSites) {
        if (site.supportsSearch()) {
            // do not search on sites that do not support it
            log.debug(" > query site {}", site.getId());
            try {
                QueryResultList<Entity> results = site.findEntities(query);
                if (processedQuery == null) {
                    processedQuery = results.getQuery();
                }
                if (!results.isEmpty() && queryWithResults == null) {
                    processedQuery = results.getQuery();
                }
                for (Entity rep : results) {
                    if (!entities.contains(rep)) {
                        // do not override
                        entities.add(rep);
                    } else {
                        // TODO: find a solution for this problem
                        // e.g. allow to add the site for entities
                        log.info("Entity {} found on more than one Referenced Site" + " -> Representation of Site {} is ignored", rep.getId(), site.getConfiguration().getName());
                    }
                }
            } catch (SiteException e) {
                log.warn("Unable to access Site " + site.getConfiguration().getName() + " (id = " + site.getId() + ")", e);
            }
        } else {
            log.debug(" > Site {} does not support queries", site.getId());
        }
    }
    return new QueryResultListImpl<Entity>(// use the query with results
    queryWithResults != null ? // use the query with results
    queryWithResults : // if not a processed
    processedQuery != null ? // if not a processed
    processedQuery : // else the parsed one
    query, entities, Entity.class);
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Site(org.apache.stanbol.entityhub.servicesapi.site.Site) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) QueryResultListImpl(org.apache.stanbol.entityhub.core.query.QueryResultListImpl) SiteException(org.apache.stanbol.entityhub.servicesapi.site.SiteException) HashSet(java.util.HashSet)

Example 9 with Site

use of org.apache.stanbol.entityhub.servicesapi.site.Site in project stanbol by apache.

the class DisambiguatorEngine method computeEnhancements.

/*
     * This function first evaluates all the possible ambiguations of each text annotation detected. the text
     * of all entities detected is used for making a Dbpedia query with all string for MLT that contain all
     * the other entities. The results obtained are used to calcualte new confidence values which are updated
     * in the metadata.
     */
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    String textContent;
    Entry<IRI, Blob> textBlob = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (textBlob != null) {
        try {
            textContent = ContentItemHelper.getText(textBlob.getValue());
        } catch (IOException e) {
            log.warn("Unable to retieve plain text content for ContentItem " + ci.getUri(), e);
            textContent = null;
        }
    } else {
        textContent = null;
    }
    Graph graph = ci.getMetadata();
    // (1) read the data from the content item
    String contentLangauge;
    DisambiguationData disData;
    ci.getLock().readLock().lock();
    try {
        contentLangauge = EnhancementEngineHelper.getLanguage(ci);
        // NOTE (rwesten): moved the parsing of the information from the
        // contentItem to static method of the Class holding those information
        // (similar as it already was for SavedEntity)
        // readEntities(loseConfidence, allEntities, textAnnotations, graph);
        disData = DisambiguationData.createFromContentItem(ci);
    } finally {
        ci.getLock().readLock().unlock();
    }
    // (2) Disambiguate the SavedEntities
    for (SavedEntity savedEntity : disData.textAnnotations.values()) {
        if (savedEntity.getSuggestions().size() <= 1) {
            // we need not to disambiguate if only one suggestion is present
            continue;
        }
        // NOTE: the site is determined from the
        // fise:TextAnnotation <-- dc:relation --
        // fise:EntityAnnotation -- entityhub:ste --> "{siteName}"^^xsd:string
        // data.
        // TODO: add configuration to include/exclude Sites by name
        Site site = siteManager.getSite(savedEntity.getSite());
        // potential types of entities
        Collection<String> types = null;
        // TODO: make configurable
        boolean casesensitive = false;
        String savedEntityLabel = casesensitive ? savedEntity.getName() : savedEntity.getName().toLowerCase();
        // Determine the context used for disambiguation
        // TODO: make this configurable options
        String disambiguationContext;
        // (0.a) The easiest way is to just use the selection context
        // disambiguationContext = savedEntity.getContext();
        // (0.b) Calculate a context based on a moving window
        String window = getDisambiguationContext(textContent, savedEntity.getName(), savedEntity.getStart(), 100);
        log.info("Use Window: '{}' for '{}'", window, savedEntity.getName());
        // (1) The contextSelections:
        // All other selected text within the selection context
        List<String> contextSelections = getSelectionsInContext(savedEntity.getName(), disData.allSelectedTexts, window);
        // savedEntity.getContext());
        disambiguationContext = unionString(false, contextSelections);
        // (2) I do not understand this variant (see comment for the
        // EntitiesInRange(..) method
        // List<String> L = EntitiesInRange(disData.directoryTextAnotation,
        // (savedEntity.getStart() + savedEntity.getEnd()) / 2);
        // disambiguationContext = unionString(false,contextSelections);
        // (3) one can build a combination of the above
        // disambiguationContext = unionString(true, //unique adds
        // Collections.singleton(savedEntity.getName()), //the selected text
        // Collections.singleton(context), //the context
        // contextSelections); //other selected parsed in the context
        // or just the name of the entity AND the context
        // disambiguationContext = unionString(false,
        // Collections.singleton(savedEntity.getName()),
        // contextSelections);
        // (4) TODO: I would also like to have the possibility to disambiguate
        // using URIs of Entities suggested for other TextAnnotations
        // within the context.
        // make the similarity query on the Entityhub using the collected
        // information
        QueryResultList<Entity> results;
        log.info(" - Query '{}' for {}@{} with context '{}'", new Object[] { site.getId(), savedEntityLabel, contentLangauge, disambiguationContext });
        if (!StringUtils.isBlank(disambiguationContext)) {
            try {
                results = query(site, savedEntityLabel, contentLangauge, disambiguationContext);
            } catch (SiteException e) {
                // TODO we could also try to catch those errors ...
                throw new EngineException("Unable to disambiguate Mention of '" + savedEntity.getName() + "' on Entityhub Site '" + site.getId() + "!", e);
            }
            log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
            // match the results with the suggestions
            disambiguateSuggestions(results, savedEntity);
        } else {
            log.debug(" - not disambiguated because of empty context!");
        }
    }
    // (3) Write back the Results of the Disambiguation process
    // NOTE (rwesten): In the original version of Kritarth this was done as
    // part of (2) - disambiguation. This is now changed as in (2) the
    // disambiguation results are stored in the Suggestions and only
    // applied to the EnhancementStructure in (3). This allows to reduce the
    // coverage of the wirte lock needed to be applied to the ContentItem.
    ci.getLock().writeLock().lock();
    try {
        applyDisambiguationResults(graph, disData);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) IRI(org.apache.clerezza.commons.rdf.IRI) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) Graph(org.apache.clerezza.commons.rdf.Graph) SiteException(org.apache.stanbol.entityhub.servicesapi.site.SiteException)

Example 10 with Site

use of org.apache.stanbol.entityhub.servicesapi.site.Site in project stanbol by apache.

the class CoreferenceFinder method buildEntityTypeLabels.

/**
 * Builds a Set of Entity Type labels given the Entity type uris.
 *
 * @param entity
 * @param language
 * @return
 * @throws EngineException
 */
private Set<String> buildEntityTypeLabels(Entity entity, String language) throws EngineException {
    Iterator<Object> typeUris = entity.getRepresentation().get(RDF_TYPE.getUnicodeString());
    Set<String> allTypeLabels = new HashSet<String>();
    while (typeUris.hasNext()) {
        String typeUri = typeUris.next().toString();
        if (this.config.shouldExcludeClass(typeUri))
            continue;
        // First try the in memory index
        Set<String> labels = this.entityTypeIndex.lookupEntityType(new IRI(typeUri), language);
        if (labels == null) {
            Site site = getReferencedSite();
            Entity entityType = (site == null) ? this.entityHub.getEntity(typeUri) : site.getEntity(typeUri);
            if (entityType != null) {
                labels = new HashSet<String>();
                Iterator<Text> labelIterator = entityType.getRepresentation().get(RDFS_LABEL.getUnicodeString(), language);
                while (labelIterator.hasNext()) {
                    labels.add(labelIterator.next().getText());
                }
                this.entityTypeIndex.addEntityType(new IRI(typeUri), language, labels);
            }
        }
        if (labels != null)
            allTypeLabels.addAll(labels);
    }
    return allTypeLabels;
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) IRI(org.apache.clerezza.commons.rdf.IRI) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) HashSet(java.util.HashSet)

Aggregations

Site (org.apache.stanbol.entityhub.servicesapi.site.Site)20 SiteException (org.apache.stanbol.entityhub.servicesapi.site.SiteException)12 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)9 HashSet (java.util.HashSet)8 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)6 Path (javax.ws.rs.Path)5 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)5 EntityhubLDPath (org.apache.stanbol.entityhub.ldpath.EntityhubLDPath)5 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)5 ManagedSite (org.apache.stanbol.entityhub.servicesapi.site.ManagedSite)5 MediaType (javax.ws.rs.core.MediaType)4 MediaTypeUtil.getAcceptableMediaType (org.apache.stanbol.commons.web.base.utils.MediaTypeUtil.getAcceptableMediaType)4 IRI (org.apache.clerezza.commons.rdf.IRI)3 QueryResultListImpl (org.apache.stanbol.entityhub.core.query.QueryResultListImpl)3 ArrayList (java.util.ArrayList)2 GET (javax.ws.rs.GET)2 WebApplicationException (javax.ws.rs.WebApplicationException)2 Graph (org.apache.clerezza.commons.rdf.Graph)2 Viewable (org.apache.stanbol.commons.web.viewable.Viewable)2 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)2