Search in sources :

Example 11 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class ContentItemResource method initOccurrences.

private void initOccurrences() {
    Graph graph = contentItem.getMetadata();
    LiteralFactory lf = LiteralFactory.getInstance();
    Map<IRI, Collection<BlankNodeOrIRI>> suggestionMap = new HashMap<IRI, Collection<BlankNodeOrIRI>>();
    // 1) get Entity Annotations
    Map<BlankNodeOrIRI, Map<EAProps, Object>> entitySuggestionMap = new HashMap<BlankNodeOrIRI, Map<EAProps, Object>>();
    Iterator<Triple> entityAnnotations = graph.filter(null, RDF.type, ENHANCER_ENTITYANNOTATION);
    while (entityAnnotations.hasNext()) {
        BlankNodeOrIRI entityAnnotation = entityAnnotations.next().getSubject();
        // to avoid multiple lookups (e.g. if one entityAnnotation links to+
        // several TextAnnotations) we cache the data in an intermediate Map
        Map<EAProps, Object> eaData = new EnumMap<EAProps, Object>(EAProps.class);
        eaData.put(EAProps.entity, getReference(graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE));
        eaData.put(EAProps.label, getString(graph, entityAnnotation, ENHANCER_ENTITY_LABEL));
        eaData.put(EAProps.confidence, EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf));
        entitySuggestionMap.put(entityAnnotation, eaData);
        Iterator<IRI> textAnnotations = getReferences(graph, entityAnnotation, DC_RELATION);
        while (textAnnotations.hasNext()) {
            IRI textAnnotation = textAnnotations.next();
            Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
            if (suggestions == null) {
                suggestions = new ArrayList<BlankNodeOrIRI>();
                suggestionMap.put(textAnnotation, suggestions);
            }
            suggestions.add(entityAnnotation);
        }
    }
    // 2) get the TextAnnotations
    Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION);
    while (textAnnotations.hasNext()) {
        BlankNodeOrIRI textAnnotation = textAnnotations.next().getSubject();
        // we need to process those to show multiple mentions
        // if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
        // // this is not the most specific occurrence of this name: skip
        // continue;
        // }
        String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT);
        // TextAnnotations without fise:selected-text are no longer ignored
        // if(text == null){
        // //ignore text annotations without text
        // continue;
        // }
        Integer start = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_START, Integer.class, lf);
        Integer end = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_END, Integer.class, lf);
        Double confidence = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
        Iterator<IRI> types = getReferences(graph, textAnnotation, DC_TYPE);
        if (!types.hasNext()) {
            // create an iterator over null in case no types are present
            types = Collections.singleton((IRI) null).iterator();
        }
        while (types.hasNext()) {
            IRI type = types.next();
            Map<EntityExtractionSummary, EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
            if (occurrenceMap == null) {
                occurrenceMap = new TreeMap<EntityExtractionSummary, EntityExtractionSummary>();
                extractionsByTypeMap.put(type, occurrenceMap);
            }
            // in case of a language annotation use the detected language as label
            if (DC_LINGUISTIC_SYSTEM.equals(type)) {
                text = EnhancementEngineHelper.getString(graph, textAnnotation, DC_LANGUAGE);
            }
            EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start, end, confidence, defaultThumbnails);
            Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
            if (suggestions != null) {
                for (BlankNodeOrIRI entityAnnotation : suggestions) {
                    Map<EAProps, Object> eaData = entitySuggestionMap.get(entityAnnotation);
                    entity.addSuggestion((IRI) eaData.get(EAProps.entity), (String) eaData.get(EAProps.label), (Double) eaData.get(EAProps.confidence), graph);
                }
            }
            EntityExtractionSummary existingSummary = occurrenceMap.get(entity);
            if (existingSummary == null) {
                // new extraction summary
                occurrenceMap.put(entity, entity);
            } else {
                // extraction summary with this text and suggestions already
                // present ... only add a mention to the existing
                existingSummary.addMention(new Mention(text, start, end, confidence));
            }
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) EnhancementEngineHelper.getString(org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString) EnumMap(java.util.EnumMap) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Triple(org.apache.clerezza.commons.rdf.Triple) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Collection(java.util.Collection) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 12 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class EnhancementEngineHelper method createEnhancement.

/**
 * Create a new enhancement instance in the metadata-graph of the content
 * item along with default properties (dc:creator and dc:created) and return
 * the IRI of the extraction so that engines can further add. <p>
 * <i>NOTE:</i> This method was protected prior to <code>0.12.1</code> (see
 * <a href="https://issues.apache.org/jira/browse/STANBOL-1321">STANBOL-1321</a>)
 *
 * @param ci the ContentItem being under analysis
 * @param engine the Engine performing the analysis
 *
 * @return the URI of the new enhancement instance
 * @since 0.12.1
 */
public static IRI createEnhancement(Graph metadata, EnhancementEngine engine, IRI contentItemId) {
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    IRI enhancement = new IRI("urn:enhancement-" + EnhancementEngineHelper.randomUUID());
    // add the Enhancement Type
    metadata.add(new TripleImpl(enhancement, RDF_TYPE, ENHANCER_ENHANCEMENT));
    // add the extracted from content item
    metadata.add(new TripleImpl(enhancement, ENHANCER_EXTRACTED_FROM, contentItemId));
    // creation date
    metadata.add(new TripleImpl(enhancement, DC_CREATED, literalFactory.createTypedLiteral(new Date())));
    // the engines that extracted the data
    // TODO: add some kind of versioning info for the extractor?
    // TODO: use a public dereferencing URI instead? that would allow for
    // explicit versioning too
    /* NOTE (Rupert Westenthaler 2010-05-26):
         * The Idea is to use the  ComponentContext in the activate() method of
         * an Enhancer to get the bundle name/version and use that as an
         * URI for the creator.
         * We would need to add getEnhancerID() method to the enhancer interface
         * to access this information
          */
    metadata.add(new TripleImpl(enhancement, DC_CREATOR, literalFactory.createTypedLiteral(engine.getClass().getName())));
    return enhancement;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Date(java.util.Date) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 13 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class EnhancementEngineHelper method createNewExtraction.

/**
 * Create a new extraction instance in the metadata-graph of the content
 * item along with default properties (dc:creator and dc:created) and return
 * the IRI of the extraction so that engines can further add
 *
 * @param ci the ContentItem being under analysis
 * @param engine the Engine performing the analysis
 * @return the URI of the new extraction instance
 * @deprecated will be remove with 1.0
 * @see EnhancementEngineHelper#createEntityEnhancement(ContentItem, EnhancementEngine)
 * @see EnhancementEngineHelper#createTextEnhancement(ContentItem, EnhancementEngine)
 */
@Deprecated
public static IRI createNewExtraction(ContentItem ci, EnhancementEngine engine) {
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    Graph metadata = ci.getMetadata();
    IRI extraction = new IRI("urn:extraction-" + EnhancementEngineHelper.randomUUID());
    metadata.add(new TripleImpl(extraction, RDF_TYPE, ENHANCER_EXTRACTION));
    // relate the extraction to the content item
    metadata.add(new TripleImpl(extraction, ENHANCER_RELATED_CONTENT_ITEM, new IRI(ci.getUri().getUnicodeString())));
    // creation date
    metadata.add(new TripleImpl(extraction, DC_CREATED, literalFactory.createTypedLiteral(new Date())));
    // the engines that extracted the data
    // TODO: add some kind of versioning info for the extractor?
    // TODO: use a public dereferencing URI instead? that would allow for
    // explicit versioning too
    metadata.add(new TripleImpl(extraction, DC_CREATOR, literalFactory.createTypedLiteral(engine.getClass().getName())));
    return extraction;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Graph(org.apache.clerezza.commons.rdf.Graph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Date(java.util.Date) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 14 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class GraphMultiplexer method buildResource.

/**
 * Creates an {@link IRI} out of an {@link OWLOntologyID}, so it can be used as an identifier. This
 * does NOT necessarily correspond to the IRI that identifies the stored graph. In order to obtain
 * that, check the objects of any MAPS_TO_GRAPH assertions.
 *
 * @param publicKey
 * @return
 */
protected IRI buildResource(final OWLOntologyID publicKey) {
    if (publicKey == null)
        throw new IllegalArgumentException("Cannot build a IRI resource on a null public key!");
    // The IRI is of the form ontologyIRI[:::versionIRI] (TODO use something less conventional?)
    // XXX should versionIRI also include the version IRI set by owners? Currently not
    // Remember not to sanitize logical identifiers.
    org.semanticweb.owlapi.model.IRI ontologyIri = publicKey.getOntologyIRI(), versionIri = publicKey.getVersionIRI();
    if (ontologyIri == null)
        throw new IllegalArgumentException("Cannot build a IRI resource on an anonymous public key!");
    log.debug("Searching for a meta graph entry for public key:");
    log.debug(" -- {}", publicKey);
    IRI match = null;
    LiteralFactory lf = LiteralFactory.getInstance();
    Literal oiri = lf.createTypedLiteral(new IRI(ontologyIri.toString()));
    Literal viri = versionIri == null ? null : lf.createTypedLiteral(new IRI(versionIri.toString()));
    for (Iterator<Triple> it = meta.filter(null, HAS_ONTOLOGY_IRI_URIREF, oiri); it.hasNext(); ) {
        RDFTerm subj = it.next().getSubject();
        log.debug(" -- Ontology IRI match found. Scanning");
        log.debug(" -- RDFTerm : {}", subj);
        if (!(subj instanceof IRI)) {
            log.debug(" ---- (uncomparable: skipping...)");
            continue;
        }
        if (viri != null) {
            // Must find matching versionIRI
            if (meta.contains(new TripleImpl((IRI) subj, HAS_VERSION_IRI_URIREF, viri))) {
                log.debug(" ---- Version IRI match!");
                match = (IRI) subj;
                // Found
                break;
            } else {
                log.debug(" ---- Expected version IRI match not found.");
                // There could be another with the right versionIRI.
                continue;
            }
        } else {
            // Must find unversioned resource
            if (meta.filter((IRI) subj, HAS_VERSION_IRI_URIREF, null).hasNext()) {
                log.debug(" ---- Unexpected version IRI found. Skipping.");
                continue;
            } else {
                log.debug(" ---- Unversioned match!");
                match = (IRI) subj;
                // Found
                break;
            }
        }
    }
    log.debug("Matching IRI in graph : {}", match);
    if (match == null)
        return new IRI(OntologyUtils.encode(publicKey));
    else
        return match;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 15 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class MetaGraphManager method buildResource.

protected IRI buildResource(final OWLOntologyID publicKey) {
    if (publicKey == null)
        throw new IllegalArgumentException("Cannot build a IRI resource on a null public key!");
    // The IRI is of the form ontologyIRI[:::versionIRI] (TODO use something less conventional?)
    // XXX should versionIRI also include the version IRI set by owners? Currently not
    // Remember not to sanitize logical identifiers.
    org.semanticweb.owlapi.model.IRI ontologyIri = publicKey.getOntologyIRI(), versionIri = publicKey.getVersionIRI();
    if (ontologyIri == null)
        throw new IllegalArgumentException("Cannot build a IRI resource on an anonymous public key!");
    log.debug("Searching for a meta graph entry for public key:");
    log.debug(" -- {}", publicKey);
    IRI match = null;
    LiteralFactory lf = LiteralFactory.getInstance();
    Literal oiri = lf.createTypedLiteral(new IRI(ontologyIri.toString()));
    Literal viri = versionIri == null ? null : lf.createTypedLiteral(new IRI(versionIri.toString()));
    for (Iterator<Triple> it = graph.filter(null, HAS_ONTOLOGY_IRI_URIREF, oiri); it.hasNext(); ) {
        RDFTerm subj = it.next().getSubject();
        log.debug(" -- Ontology IRI match found. Scanning");
        log.debug(" -- RDFTerm : {}", subj);
        if (!(subj instanceof IRI)) {
            log.debug(" ---- (uncomparable: skipping...)");
            continue;
        }
        if (viri != null) {
            // Must find matching versionIRI
            if (graph.contains(new TripleImpl((IRI) subj, HAS_VERSION_IRI_URIREF, viri))) {
                log.debug(" ---- Version IRI match!");
                match = (IRI) subj;
                // Found
                break;
            } else {
                log.debug(" ---- Expected version IRI match not found.");
                // There could be another with the right versionIRI.
                continue;
            }
        } else {
            // Must find unversioned resource
            if (graph.filter((IRI) subj, HAS_VERSION_IRI_URIREF, null).hasNext()) {
                log.debug(" ---- Unexpected version IRI found. Skipping.");
                continue;
            } else {
                log.debug(" ---- Unversioned match!");
                match = (IRI) subj;
                // Found
                break;
            }
        }
    }
    log.debug("Matching IRI in graph : {}", match);
    if (match == null)
        return new IRI(OntologyUtils.encode(publicKey));
    else
        return match;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Aggregations

LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)24 IRI (org.apache.clerezza.commons.rdf.IRI)22 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)20 Graph (org.apache.clerezza.commons.rdf.Graph)15 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)10 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)9 Language (org.apache.clerezza.commons.rdf.Language)8 Triple (org.apache.clerezza.commons.rdf.Triple)7 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)5 Literal (org.apache.clerezza.commons.rdf.Literal)4 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)4 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)4 ArrayList (java.util.ArrayList)3 Date (java.util.Date)3 Map (java.util.Map)3 SOAPException (javax.xml.soap.SOAPException)3 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)3