Search in sources :

Example 16 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method validateLemmaFormProperty.

/**
     * [1..*] values of an {@link PlainLiteral} in the same language as the
     * analyzed text
     * @param enhancements The graph with the enhancements
     * @param textAnnotation the TextAnnotation to check
     * @param lang the language of the analyzed text
     * @return The number of lemma forms found
     */
private int validateLemmaFormProperty(Graph enhancements, BlankNodeOrIRI textAnnotation, String lang) {
    Iterator<Triple> lemmaFormsIterator = enhancements.filter(textAnnotation, hasLemmaForm, null);
    assertTrue("No lemma form value found for TextAnnotation " + textAnnotation + "!", lemmaFormsIterator.hasNext());
    int lemmaFormCount = 0;
    while (lemmaFormsIterator.hasNext()) {
        lemmaFormCount++;
        RDFTerm lemmaForms = lemmaFormsIterator.next().getObject();
        assertTrue("Lemma Forms value are expected of type Literal", lemmaForms instanceof Literal);
        assertFalse("Lemma forms MUST NOT be empty", ((Literal) lemmaForms).getLexicalForm().isEmpty());
        assertNotNull("Language of the Lemma Form literal MUST BE not null", ((Literal) lemmaForms).getLanguage());
        assertEquals("Language of the Lemma Form literal MUST BE the same as for the parsed text", lang, ((Literal) lemmaForms).getLanguage().toString());
    }
    return lemmaFormCount;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 17 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class SpotlightEngineUtils method createEntityAnnotation.

/**
	 * Creates a fise:EntityAnnotation for the parsed parameters and
     * adds it the the {@link ContentItem#getMetadata()}. <p>
     * This method assumes a write lock on the parsed content item.
	 * @param resource the candidate resource
	 * @param engine the engine
	 * @param ci the content item
	 * @param textAnnotation the fise:TextAnnotation to dc:relate the
	 * created fise:EntityAnnotation
	 * @return the URI of the created fise:TextAnnotation
	 */
public static IRI createEntityAnnotation(CandidateResource resource, EnhancementEngine engine, ContentItem ci, IRI textAnnotation) {
    IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
    Graph model = ci.getMetadata();
    Literal label = new PlainLiteralImpl(resource.label, new Language("en"));
    model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, resource.getUri()));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE, literalFactory.createTypedLiteral(resource.contextualScore)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(resource.support)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory.createTypedLiteral(resource.priorScore)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory.createTypedLiteral(resource.finalScore)));
    return entityAnnotation;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 18 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.

/**
	 * The method adds the returned DBpedia Spotlight annotations to the content
	 * item's metadata. For each DBpedia resource an EntityAnnotation is created
	 * and linked to the according TextAnnotation.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
    HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
    for (Annotation occ : occs) {
        if (textAnnotationsMap.get(occ.surfaceForm) != null) {
            IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
            Graph model = ci.getMetadata();
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            entityAnnotationMap.put(occ.uri, entityAnnotation);
            Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
            model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
            Collection<String> t = occ.getTypeNames();
            if (t != null) {
                Iterator<String> it = t.iterator();
                while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
            }
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)

Example 19 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class OpenCalaisEngine method queryModel.

/**
     * Extracts the relevant entity information from the Calais RDF data.
     * The entities and the relted information is extracted by a Sparql query.
     *
     * @param model the Graph representing the Calais data
     *
     * @return a Collection of entity information
     * @throws EngineException on a {@link ParseException} while processing the
     * Sparql query.
     */
public Collection<CalaisEntityOccurrence> queryModel(Graph model) throws EngineException {
    //TODO extract also Geo info (latitude/longitude)?
    String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> " + "PREFIX p: <http://s.opencalais.com/1/pred/> " + "PREFIX t: <http://s.opencalais.com/1/type/em/e/> " + "SELECT DISTINCT ?id ?did ?name ?type ?dtype ?offset ?length ?exact ?context ?score WHERE { " + "?id p:name ?name ." + "?id rdf:type ?type ." + "?y p:subject ?id ." + "?y p:offset ?offset ." + "?y p:length ?length ." + "?y p:exact ?exact ." + "?y p:detection ?context ." + " OPTIONAL { ?z p:subject ?id . ?z p:relevance ?score . } " + // get disambiguated entity references if available
    " OPTIONAL { ?did p:subject ?id . ?did p:name ?name . ?did rdf:type ?dtype . } " + "FILTER (" + "?type = t:Person || " + "?type = t:City || " + "?type = t:Continent || " + "?type = t:Country || " + "?type = t:ProvinceOrState || " + "?type = t:Region || " + "?type = t:Company || " + "?type = t:Facility || " + "?type = t:Organization " + ")" + "} ";
    Collection<CalaisEntityOccurrence> result = new ArrayList<CalaisEntityOccurrence>();
    try {
        SelectQuery sQuery = (SelectQuery) QueryParser.getInstance().parse(query);
        ResultSet rs = tcManager.executeSparqlQuery(sQuery, model);
        while (rs.hasNext()) {
            SolutionMapping row = rs.next();
            CalaisEntityOccurrence occ = new CalaisEntityOccurrence();
            RDFTerm disambiguated = row.get("did");
            occ.id = (disambiguated == null ? row.get("id") : disambiguated);
            if (onlyNERMode) {
                occ.type = row.get("type");
            } else {
                occ.type = (disambiguated == null ? row.get("type") : row.get("dtype"));
            }
            if (calaisTypeMap != null) {
                IRI mappedType = calaisTypeMap.get(occ.type);
                if (mappedType != null) {
                    occ.type = mappedType;
                }
            }
            occ.name = ((Literal) row.get("name")).getLexicalForm();
            occ.exact = ((Literal) row.get("exact")).getLexicalForm();
            //TODO for html the offsets might not be those of the original document but refer to a cleaned up version?
            occ.offset = Integer.valueOf(((Literal) row.get("offset")).getLexicalForm());
            // remove brackets
            occ.context = ((Literal) row.get("context")).getLexicalForm().replaceAll("[\\[\\]]", "");
            occ.length = Integer.valueOf(((Literal) row.get("length")).getLexicalForm());
            if (row.get("score") != null) {
                occ.relevance = Double.valueOf(((Literal) row.get("score")).getLexicalForm());
            }
            result.add(occ);
        }
    } catch (ParseException e) {
        throw new EngineException("Unable to parse SPARQL query for processing OpenCalais results", e);
    }
    log.info("Found {} occurences", result.size());
    return result;
}
Also used : SelectQuery(org.apache.clerezza.rdf.core.sparql.query.SelectQuery) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SolutionMapping(org.apache.clerezza.rdf.core.sparql.SolutionMapping) Literal(org.apache.clerezza.commons.rdf.Literal) ArrayList(java.util.ArrayList) ResultSet(org.apache.clerezza.rdf.core.sparql.ResultSet) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ParseException(org.apache.clerezza.rdf.core.sparql.ParseException)

Example 20 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class AbstractOntologyCollectorImpl method exportToGraph.

/**
     * This method has no conversion calls, to it can be invoked by subclasses that wish to modify it
     * afterwards.
     * 
     * @param merge
     * @return
     */
protected Graph exportToGraph(boolean merge, org.semanticweb.owlapi.model.IRI prefix) {
    // if (merge) throw new UnsupportedOperationException(
    // "Merge not implemented yet for Clerezza triple collections.");
    long before = System.currentTimeMillis();
    // No need to store, give it a name, or anything.
    Graph root = new SimpleGraph();
    IRI iri = new IRI(prefix + _id);
    // Add the import declarations for directly managed ontologies.
    if (root != null) {
        // Set the ontology ID
        root.add(new TripleImpl(iri, RDF.type, OWL.Ontology));
        if (merge) {
            log.warn("Merging of Clerezza triple collections is only implemented one level down. Import statements will be preserved for further levels.");
            Iterator<Triple> it;
            Set<RDFTerm> importTargets = new HashSet<RDFTerm>();
            for (OWLOntologyID ontologyId : managedOntologies) {
                ImmutableGraph g = getOntology(ontologyId, ImmutableGraph.class, false);
                root.addAll(g);
                it = g.filter(null, OWL.imports, null);
                while (it.hasNext()) {
                    org.semanticweb.owlapi.model.IRI tgt;
                    RDFTerm r = it.next().getObject();
                    try {
                        if (r instanceof IRI)
                            tgt = org.semanticweb.owlapi.model.IRI.create(((IRI) r).getUnicodeString());
                        else if (r instanceof Literal)
                            tgt = org.semanticweb.owlapi.model.IRI.create(((Literal) r).getLexicalForm());
                        else
                            tgt = org.semanticweb.owlapi.model.IRI.create(r.toString());
                        tgt = URIUtils.sanitize(tgt);
                        importTargets.add(new IRI(tgt.toString()));
                    } catch (Exception ex) {
                        log.error("FAILED to obtain import target from resource {}", r);
                        continue;
                    }
                }
                it = g.filter(null, RDF.type, OWL.Ontology);
                while (it.hasNext()) {
                    BlankNodeOrIRI ontology = it.next().getSubject();
                    log.debug("Removing all triples related to {} from {}", ontology, iri);
                    Iterator<Triple> it2 = g.filter(ontology, null, null);
                    while (it2.hasNext()) root.remove(it2.next());
                }
                /*
                     * Reinstate import statements, though. If imported ontologies were not merged earlier, we
                     * are not doing it now anyway.
                     */
                for (RDFTerm target : importTargets) root.add(new TripleImpl(iri, OWL.imports, target));
            }
        } else {
            String base = prefix + getID();
            for (int i = 0; i < backwardPathLength; i++) base = URIUtils.upOne(URI.create(base)).toString();
            base += "/";
            // The key set of managedOntologies contains the ontology IRIs, not their storage keys.
            for (OWLOntologyID ontologyId : managedOntologies) {
                org.semanticweb.owlapi.model.IRI physIRI = // .create(base + ontologyId.getVersionIRI()));
                org.semanticweb.owlapi.model.IRI.create(base + OntologyUtils.encode(ontologyId));
                root.add(new TripleImpl(iri, OWL.imports, new IRI(physIRI.toString())));
            }
        }
        log.debug("Clerezza export of {} completed in {} ms.", getID(), System.currentTimeMillis() - before);
    }
    return root;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) OWLOntologyCreationException(org.semanticweb.owlapi.model.OWLOntologyCreationException) MissingOntologyException(org.apache.stanbol.ontologymanager.servicesapi.collector.MissingOntologyException) OWLOntologyAlreadyExistsException(org.semanticweb.owlapi.model.OWLOntologyAlreadyExistsException) UnmodifiableOntologyCollectorException(org.apache.stanbol.ontologymanager.servicesapi.collector.UnmodifiableOntologyCollectorException) OntologyCollectorModificationException(org.apache.stanbol.ontologymanager.servicesapi.collector.OntologyCollectorModificationException) Triple(org.apache.clerezza.commons.rdf.Triple) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) OWLOntologyID(org.semanticweb.owlapi.model.OWLOntologyID) Literal(org.apache.clerezza.commons.rdf.Literal) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) HashSet(java.util.HashSet)

Aggregations

Literal (org.apache.clerezza.commons.rdf.Literal)71 IRI (org.apache.clerezza.commons.rdf.IRI)35 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)35 Triple (org.apache.clerezza.commons.rdf.Triple)30 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)22 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)20 ArrayList (java.util.ArrayList)16 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 Language (org.apache.clerezza.commons.rdf.Language)12 Graph (org.apache.clerezza.commons.rdf.Graph)11 Test (org.junit.Test)10 HashSet (java.util.HashSet)9 Date (java.util.Date)8 Lock (java.util.concurrent.locks.Lock)6 Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)5 HashMap (java.util.HashMap)4 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)4 NoConvertorException (org.apache.clerezza.rdf.core.NoConvertorException)4 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)4 Collection (java.util.Collection)3