Search in sources :

Example 1 with SurfaceForm

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm in project stanbol by apache.

the class DBPSpotlightSpotEnhancementEngine method createEnhancements.

/**
	 * The method adds the returned DBpedia Spotlight surface forms to the
	 * content item's metadata. For each one an TextAnnotation is created.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String content, Language lang) {
    HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>();
    Graph model = ci.getMetadata();
    for (SurfaceForm occ : occs) {
        IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ, this, ci, content, lang);
        if (entityAnnotationMap.containsKey(occ.name)) {
            model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
        } else {
            entityAnnotationMap.put(occ.name, textAnnotation);
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 2 with SurfaceForm

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm in project stanbol by apache.

the class DBPSpotlightSpotEnhancementEngine method computeEnhancements.

/**
	 * Calculate the enhancements by doing a POST request to the DBpedia
	 * Spotlight endpoint and processing the results
	 * 
	 * @param ci
	 *            the {@link ContentItem}
	 */
public void computeEnhancements(ContentItem ci) throws EngineException {
    Language language = SpotlightEngineUtils.getContentLanguage(ci);
    String text = SpotlightEngineUtils.getPlainContent(ci);
    Collection<SurfaceForm> dbpslGraph = doPostRequest(text, ci.getUri());
    if (dbpslGraph != null) {
        // Acquire a write lock on the ContentItem when adding the
        // enhancements
        ci.getLock().writeLock().lock();
        try {
            createEnhancements(dbpslGraph, ci, text, language);
            if (log.isDebugEnabled()) {
                Serializer serializer = Serializer.getInstance();
                ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
                serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
                try {
                    log.debug("DBpedia Spotlight Spot Enhancements:\n{}", debugStream.toString("UTF-8"));
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
            }
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : Language(org.apache.clerezza.commons.rdf.Language) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Serializer(org.apache.clerezza.rdf.core.serializedform.Serializer)

Example 3 with SurfaceForm

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm in project stanbol by apache.

the class DBPSpotlightAnnotateEnhancementEngine method computeEnhancements.

/**
	 * Calculate the enhancements by doing a POST request to the DBpedia
	 * Spotlight endpoint and processing the results
	 * 
	 * @param ci
	 *            the {@link ContentItem}
	 */
public void computeEnhancements(ContentItem ci) throws EngineException {
    Language language = SpotlightEngineUtils.getContentLanguage(ci);
    String text = SpotlightEngineUtils.getPlainContent(ci);
    Collection<Annotation> dbpslGraph = doPostRequest(text, ci.getUri());
    Map<SurfaceForm, IRI> surfaceForm2TextAnnotation = new HashMap<SurfaceForm, IRI>();
    if (dbpslGraph != null) {
        // Acquire a write lock on the ContentItem when adding the
        // enhancements
        ci.getLock().writeLock().lock();
        try {
            createEnhancements(dbpslGraph, ci, text, language, surfaceForm2TextAnnotation);
            if (log.isDebugEnabled()) {
                Serializer serializer = Serializer.getInstance();
                ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
                serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
                try {
                    log.debug("DBPedia Spotlight Enhancements:\n{}", debugStream.toString("UTF-8"));
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
            }
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Language(org.apache.clerezza.commons.rdf.Language) HashMap(java.util.HashMap) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation) Serializer(org.apache.clerezza.rdf.core.serializedform.Serializer)

Example 4 with SurfaceForm

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm in project stanbol by apache.

the class DBPSpotlightCandidatesEnhancementEngine method computeEnhancements.

/**
	 * Calculate the enhancements by doing a POST request to the DBpedia
	 * Spotlight endpoint and processing the results
	 * 
	 * @param ci
	 *            the {@link ContentItem}
	 */
public void computeEnhancements(ContentItem ci) throws EngineException {
    Language language = SpotlightEngineUtils.getContentLanguage(ci);
    String text = SpotlightEngineUtils.getPlainContent(ci);
    Collection<SurfaceForm> dbpslGraph = doPostRequest(text, ci.getUri());
    if (dbpslGraph != null) {
        // Acquire a write lock on the ContentItem when adding the
        // enhancements
        ci.getLock().writeLock().lock();
        try {
            createEnhancements(dbpslGraph, ci, text, language);
            if (log.isDebugEnabled()) {
                Serializer serializer = Serializer.getInstance();
                ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
                serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
                try {
                    log.debug("DBpedia Spotlight Spot Enhancements:\n{}", debugStream.toString("UTF-8"));
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
            }
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : Language(org.apache.clerezza.commons.rdf.Language) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Serializer(org.apache.clerezza.rdf.core.serializedform.Serializer)

Example 5 with SurfaceForm

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm in project stanbol by apache.

the class DBPSpotlightCandidatesEnhancementEngine method createEnhancements.

/**
	 * This generates enhancement structures for the entities from DBPedia
	 * Spotlight and adds them to the content item's metadata. For each surface
	 * form a TextAnnotation and the according EntityAnnotations are created.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String text, Language language) {
    // TODO create TextEnhancement (form, start, end, type?)
    HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>();
    Graph model = ci.getMetadata();
    for (SurfaceForm occ : occs) {
        IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ, this, ci, text, language);
        Iterator<CandidateResource> resources = occ.resources.iterator();
        while (resources.hasNext()) {
            CandidateResource resource = resources.next();
            IRI entityAnnotation = SpotlightEngineUtils.createEntityAnnotation(resource, this, ci, textAnnotation);
            entityAnnotationMap.put(resource.localName, entityAnnotation);
        }
        if (entityAnnotationMap.containsKey(occ.name)) {
            model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
        } else {
            entityAnnotationMap.put(occ.name, textAnnotation);
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) CandidateResource(org.apache.stanbol.enhancer.engines.dbpspotlight.model.CandidateResource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

SurfaceForm (org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 HashMap (java.util.HashMap)3 IRI (org.apache.clerezza.commons.rdf.IRI)3 Language (org.apache.clerezza.commons.rdf.Language)3 Serializer (org.apache.clerezza.rdf.core.serializedform.Serializer)3 Graph (org.apache.clerezza.commons.rdf.Graph)2 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)2 Annotation (org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)1 CandidateResource (org.apache.stanbol.enhancer.engines.dbpspotlight.model.CandidateResource)1