Search in sources :

Example 1 with Annotation

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method computeEnhancements.

/**
	 * Calculate the enhancements by doing a POST request to the DBpedia
	 * Spotlight endpoint and processing the results
	 * 
	 * @param ci
	 *            the {@link ContentItem}
	 */
public void computeEnhancements(ContentItem ci) throws EngineException {
    Language language = SpotlightEngineUtils.getContentLanguage(ci);
    String text = SpotlightEngineUtils.getPlainContent(ci);
    // Retrieve the existing text annotations (requires read lock)
    Graph graph = ci.getMetadata();
    String xmlTextAnnotations = this.getSpottedXml(text, graph);
    Collection<Annotation> dbpslGraph = doPostRequest(text, xmlTextAnnotations, ci.getUri());
    if (dbpslGraph != null) {
        // Acquire a write lock on the ContentItem when adding the
        // enhancements
        ci.getLock().writeLock().lock();
        try {
            createEnhancements(dbpslGraph, ci, language);
            if (log.isDebugEnabled()) {
                Serializer serializer = Serializer.getInstance();
                ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
                serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
                try {
                    log.debug("DBpedia Enhancements:\n{}", debugStream.toString("UTF-8"));
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
            }
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation) Serializer(org.apache.clerezza.rdf.core.serializedform.Serializer)

Example 2 with Annotation

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.

/**
	 * The method adds the returned DBpedia Spotlight annotations to the content
	 * item's metadata. For each DBpedia resource an EntityAnnotation is created
	 * and linked to the according TextAnnotation.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
    HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
    for (Annotation occ : occs) {
        if (textAnnotationsMap.get(occ.surfaceForm) != null) {
            IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
            Graph model = ci.getMetadata();
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            entityAnnotationMap.put(occ.uri, entityAnnotation);
            Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
            model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
            Collection<String> t = occ.getTypeNames();
            if (t != null) {
                Iterator<String> it = t.iterator();
                while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
            }
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)

Example 3 with Annotation

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation in project stanbol by apache.

the class DBPSpotlightAnnotateEnhancementEngine method createEnhancements.

/**
	 * This generates enhancement structures for the entities from DBPedia
	 * Spotlight and adds them to the content item's metadata. For each entity a
	 * TextAnnotation and an EntityAnnotation are created. An EntityAnnotation
	 * can relate to several TextAnnotations.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
protected void createEnhancements(Collection<Annotation> occs, ContentItem ci, String text, Language language, Map<SurfaceForm, IRI> surfaceForm2TextAnnotation) {
    for (Annotation occ : occs) {
        IRI textAnnotation = surfaceForm2TextAnnotation.get(occ.surfaceForm);
        if (textAnnotation == null) {
            //not yet written ... create a new
            textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ.surfaceForm, this, ci, text, language);
            surfaceForm2TextAnnotation.put(occ.surfaceForm, textAnnotation);
        }
        SpotlightEngineUtils.createEntityAnnotation(occ, this, ci, textAnnotation, language);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)

Example 4 with Annotation

use of org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation in project stanbol by apache.

the class DBPSpotlightAnnotateEnhancementEngine method computeEnhancements.

/**
	 * Calculate the enhancements by doing a POST request to the DBpedia
	 * Spotlight endpoint and processing the results
	 * 
	 * @param ci
	 *            the {@link ContentItem}
	 */
public void computeEnhancements(ContentItem ci) throws EngineException {
    Language language = SpotlightEngineUtils.getContentLanguage(ci);
    String text = SpotlightEngineUtils.getPlainContent(ci);
    Collection<Annotation> dbpslGraph = doPostRequest(text, ci.getUri());
    Map<SurfaceForm, IRI> surfaceForm2TextAnnotation = new HashMap<SurfaceForm, IRI>();
    if (dbpslGraph != null) {
        // Acquire a write lock on the ContentItem when adding the
        // enhancements
        ci.getLock().writeLock().lock();
        try {
            createEnhancements(dbpslGraph, ci, text, language, surfaceForm2TextAnnotation);
            if (log.isDebugEnabled()) {
                Serializer serializer = Serializer.getInstance();
                ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
                serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
                try {
                    log.debug("DBPedia Spotlight Enhancements:\n{}", debugStream.toString("UTF-8"));
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
            }
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Language(org.apache.clerezza.commons.rdf.Language) HashMap(java.util.HashMap) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation) Serializer(org.apache.clerezza.rdf.core.serializedform.Serializer)

Aggregations

Annotation (org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)4 IRI (org.apache.clerezza.commons.rdf.IRI)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 HashMap (java.util.HashMap)2 Graph (org.apache.clerezza.commons.rdf.Graph)2 Language (org.apache.clerezza.commons.rdf.Language)2 Serializer (org.apache.clerezza.rdf.core.serializedform.Serializer)2 Literal (org.apache.clerezza.commons.rdf.Literal)1 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)1 SurfaceForm (org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm)1