Search in sources :

Example 81 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class TrackingDereferencerBase method copyLdPath.

/**
     * Executes the {@link #ldpathProgram} using the parsed URI as context and
     * writes the the results to the parsed ImmutableGraph
     * @param uri the context
     * @param rdfBackend the RdfBackend the LDPath program is executed on
     * @param ldpathProgram The {@link Program} parsed via the dereference context
     * @param langs the set of languages to dereference
     * @param graph the graph to store the results
     * @param writeLock the write lock for the graph
     * @throws DereferenceException on any {@link EntityhubException} while
     * executing the LDPath program
     */
private void copyLdPath(IRI uri, RDFBackend<Object> rdfBackend, Program<Object> ldpathProgram, Set<String> langs, Graph graph, Lock writeLock) throws DereferenceException {
    //A RdfReference needs to be used as context
    RdfReference context = valueFactory.createReference(uri);
    //create the representation that stores results in an intermediate
    //graph (we do not want partial results on an error
    Graph ldPathResults = new SimpleGraph();
    RdfRepresentation result = valueFactory.createRdfRepresentation(uri, ldPathResults);
    //execute the LDPath Program and write results to the RDF ImmutableGraph
    try {
        for (org.apache.marmotta.ldpath.model.fields.FieldMapping<?, Object> mapping : ldpathProgram.getFields()) {
            Collection<?> values;
            try {
                values = mapping.getValues(rdfBackend, context);
            } catch (RuntimeException e) {
                throw new DereferenceException(uri, e);
            }
            if (values != null && !values.isEmpty()) {
                String fieldName = mapping.getFieldName();
                if (langs == null || langs.isEmpty()) {
                    result.add(fieldName, values);
                } else {
                    //filter for languages
                    for (Object value : values) {
                        if ((!(value instanceof Text)) || langs.contains(((Text) value).getLanguage())) {
                            result.add(fieldName, value);
                        }
                    //else text with filtered language ... do not add
                    }
                }
            }
        }
    } catch (EntityhubException e) {
        throw new DereferenceException(uri, e);
    }
    if (log.isTraceEnabled()) {
        log.trace("dereferenced via LDPath {}", ModelUtils.getRepresentationInfo(result));
    }
    if (!ldPathResults.isEmpty()) {
        //copy the results
        writeLock.lock();
        try {
            graph.addAll(ldPathResults);
        } finally {
            writeLock.unlock();
        }
    }
}
Also used : Text(org.apache.stanbol.entityhub.servicesapi.model.Text) DereferenceException(org.apache.stanbol.enhancer.engines.dereference.DereferenceException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) RdfRepresentation(org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) RdfReference(org.apache.stanbol.entityhub.model.clerezza.RdfReference)

Example 82 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class CeliNamedEntityExtractionEnhancementEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (language == null) {
        throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    //used for the palin literals in TextAnnotations
    Language lang = new Language(language);
    try {
        List<NamedEntity> lista = this.client.extractEntities(text, language);
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        Graph g = ci.getMetadata();
        for (NamedEntity ne : lista) {
            try {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                //add selected text as PlainLiteral in the language extracted from the text
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(ne.getFormKind(), lang)));
                g.add(new TripleImpl(textAnnotation, DC_TYPE, getEntityRefForType(ne.type)));
                if (ne.getFrom() != null && ne.getTo() != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(ne.getFrom().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(ne.getTo().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, ne.getFormKind(), ne.getFrom().intValue()), lang)));
                }
            } catch (NoConvertorException e) {
                log.error(e.getMessage(), e);
            }
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI NER (Named Entity Recognition)" + " service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI NER (Named Entity Recognition) service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 83 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class DBPSpotlightSpotEnhancementEngine method createEnhancements.

/**
	 * The method adds the returned DBpedia Spotlight surface forms to the
	 * content item's metadata. For each one an TextAnnotation is created.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String content, Language lang) {
    HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>();
    Graph model = ci.getMetadata();
    for (SurfaceForm occ : occs) {
        IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ, this, ci, content, lang);
        if (entityAnnotationMap.containsKey(occ.name)) {
            model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
        } else {
            entityAnnotationMap.put(occ.name, textAnnotation);
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 84 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class SpotlightEngineUtils method createEntityAnnotation.

/**
	 * Creates a fise:EntityAnnotation for the parsed parameters and
     * adds it the the {@link ContentItem#getMetadata()}. <p>
     * This method assumes a write lock on the parsed content item.
	 * @param resource the candidate resource
	 * @param engine the engine
	 * @param ci the content item
	 * @param textAnnotation the fise:TextAnnotation to dc:relate the
	 * created fise:EntityAnnotation
	 * @return the URI of the created fise:TextAnnotation
	 */
public static IRI createEntityAnnotation(CandidateResource resource, EnhancementEngine engine, ContentItem ci, IRI textAnnotation) {
    IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
    Graph model = ci.getMetadata();
    Literal label = new PlainLiteralImpl(resource.label, new Language("en"));
    model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, resource.getUri()));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE, literalFactory.createTypedLiteral(resource.contextualScore)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(resource.support)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory.createTypedLiteral(resource.priorScore)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory.createTypedLiteral(resource.finalScore)));
    return entityAnnotation;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 85 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class DereferenceEngineTest method validateDereferencedEntities.

private void validateDereferencedEntities(Graph metadata, IRI... entityReferenceFields) {
    Graph expected = new IndexedGraph();
    for (IRI entityReferenceField : entityReferenceFields) {
        Iterator<Triple> referenced = metadata.filter(null, entityReferenceField, null);
        while (referenced.hasNext()) {
            IRI entity = (IRI) referenced.next().getObject();
            Iterator<Triple> entityTriples = testData.filter(entity, null, null);
            while (entityTriples.hasNext()) {
                expected.add(entityTriples.next());
            }
        }
    }
    Graph notExpected = new IndexedGraph(testData);
    notExpected.removeAll(expected);
    Assert.assertTrue(metadata.containsAll(expected));
    Assert.assertTrue(Collections.disjoint(metadata, notExpected));
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph)

Aggregations

Graph (org.apache.clerezza.commons.rdf.Graph)172 IRI (org.apache.clerezza.commons.rdf.IRI)110 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)66 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)57 Triple (org.apache.clerezza.commons.rdf.Triple)45 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)43 Test (org.junit.Test)38 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)36 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)34 IOException (java.io.IOException)27 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)26 HashSet (java.util.HashSet)24 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)24 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)24 InputStream (java.io.InputStream)21 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)17 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)17 ArrayList (java.util.ArrayList)16 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)15