Search in sources :

Example 76 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class TestEntityLinkingEnhancementEngine method validateAllEntityAnnotations.

private static int validateAllEntityAnnotations(NamedEntityTaggingEngine entityLinkingEngine, ContentItem ci) {
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(entityLinkingEngine.getClass().getName()));
    Iterator<Triple> entityAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_ENTITYANNOTATION);
    // adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    int entityAnnotationCount = 0;
    while (entityAnnotationIterator.hasNext()) {
        IRI entityAnnotation = (IRI) entityAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateEntityAnnotation(ci.getMetadata(), entityAnnotation, expectedValues);
        // fise:confidence now checked by EnhancementStructureHelper (STANBOL-630)
        // Iterator<Triple> confidenceIterator = ci.getMetadata().filter(entityAnnotation, ENHANCER_CONFIDENCE, null);
        // assertTrue("Expected fise:confidence value is missing (entityAnnotation "
        // +entityAnnotation+")",confidenceIterator.hasNext());
        // Double confidence = LiteralFactory.getInstance().createObject(Double.class,
        // (TypedLiteral)confidenceIterator.next().getObject());
        // assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence
        // + "',entityAnnotation " +entityAnnotation+")",
        // 1.0 >= confidence.doubleValue());
        // assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence
        // +"',entityAnnotation "+entityAnnotation+")",
        // 0.0 <= confidence.doubleValue());
        // Test the entityhub:site property (STANBOL-625)
        IRI ENTITYHUB_SITE = new IRI(RdfResourceEnum.site.getUri());
        Iterator<Triple> entitySiteIterator = ci.getMetadata().filter(entityAnnotation, ENTITYHUB_SITE, null);
        assertTrue("Expected entityhub:site value is missing (entityAnnotation " + entityAnnotation + ")", entitySiteIterator.hasNext());
        RDFTerm siteResource = entitySiteIterator.next().getObject();
        assertTrue("entityhub:site values MUST BE Literals", siteResource instanceof Literal);
        assertEquals("'dbpedia' is expected as entityhub:site value", "dbpedia", ((Literal) siteResource).getLexicalForm());
        assertFalse("entityhub:site MUST HAVE only a single value", entitySiteIterator.hasNext());
        entityAnnotationCount++;
    }
    return entityAnnotationCount;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) HashMap(java.util.HashMap) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 77 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class EntityLinkingEngine method writeEnhancements.

/**
 * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
 * extracted from the parsed ContentItem
 * @param ci
 * @param linkedEntities
 * @param language
 */
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language, boolean writeRankings) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Set<IRI> dereferencedEntitis = new HashSet<IRI>();
    Graph metadata = ci.getMetadata();
    for (LinkedEntity linkedEntity : linkedEntities) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
        // first create the TextAnnotations for the Occurrences
        for (Occurrence occurrence : linkedEntity.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            // search for existing text annotation
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            IRI textAnnotation = null;
            while (it.hasNext()) {
                Triple t = it.next();
                if (metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() && metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()) {
                    textAnnotation = (IRI) t.getSubject();
                    break;
                }
            }
            if (textAnnotation == null) {
                // not found ... create a new one
                textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
            } else {
                // if existing add this engine as contributor
                metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
            }
            // add dc:types (even to existing)
            for (IRI dcType : linkedEntity.getTypes()) {
                metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
            }
            textAnnotations.add(textAnnotation);
        }
        // now the EntityAnnotations for the Suggestions
        for (Suggestion suggestion : linkedEntity.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            // should we use the label used for the match, or search the
            // representation for the best label ... currently its the matched one
            Literal label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
            Entity entity = suggestion.getEntity();
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label));
            metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entity.getUri()));
            Iterator<IRI> suggestionTypes = entity.getReferences(linkerConfig.getTypeField());
            while (suggestionTypes.hasNext()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, suggestionTypes.next()));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
            for (IRI textAnnotation : textAnnotations) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            }
            // add origin information of the EntiySearcher
            for (Entry<IRI, Collection<RDFTerm>> originInfo : entitySearcher.getOriginInformation().entrySet()) {
                for (RDFTerm value : originInfo.getValue()) {
                    metadata.add(new TripleImpl(entityAnnotation, originInfo.getKey(), value));
                }
            }
            if (writeRankings) {
                Float ranking = suggestion.getEntity().getEntityRanking();
                if (ranking != null) {
                    metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_RANKING, // write the float as double
                    new TypedLiteralImpl(ranking.toString(), XSD_DOUBLE)));
                }
            }
            // add the RDF data for entities
            if (linkerConfig.isDereferenceEntitiesEnabled() && dereferencedEntitis.add(entity.getUri())) {
                // NOTE: do not add all triples as there might be other data in the graph
                for (Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null); triples.hasNext(); metadata.add(triples.next())) ;
            }
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) Triple(org.apache.clerezza.commons.rdf.Triple) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) Collection(java.util.Collection) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet)

Example 78 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class EntityCoMentionEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    AnalysedText at = getAnalysedText(this, ci, true);
    String language = getLanguage(this, ci, true);
    LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
    if (languageConfig == null) {
        throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
    }
    if (log.isDebugEnabled()) {
        log.debug("compute co-mentions for ContentItem {} language {}  text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
    }
    LabelTokenizer labelTokenizer = (LabelTokenizer) labelTokenizerTracker.getService();
    if (labelTokenizer == null) {
        throw new EngineException(this, ci, "No LabelTokenizer available!", null);
    }
    // create the in-memory database for the mentioned Entities
    ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(labelTokenizer, language, linkerConfig.getDefaultLanguage());
    Graph metadata = ci.getMetadata();
    Set<IRI> textAnnotations = new HashSet<IRI>();
    ci.getLock().readLock().lock();
    try {
        // iterate over all TextAnnotations (mentions of Entities)
        for (Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
            IRI ta = (IRI) it.next().getSubject();
            entityMentionIndex.registerTextAnnotation(ta, metadata);
            // store the registered text annotations
            textAnnotations.add(ta);
        }
    } finally {
        ci.getLock().readLock().unlock();
    }
    EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entityMentionIndex, linkerConfig, labelTokenizer, entityMentionIndex);
    // process
    try {
        entityLinker.process();
    } catch (EntitySearcherException e) {
        log.error("Unable to link Entities with " + entityLinker, e);
        throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
    }
    // TODO: write results
    ci.getLock().writeLock().lock();
    try {
        writeComentions(ci, entityLinker.getLinkedEntities().values(), language, textAnnotations);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LanguageProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) EntitySearcherException(org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException) EntityLinker(org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker) Triple(org.apache.clerezza.commons.rdf.Triple) NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) Graph(org.apache.clerezza.commons.rdf.Graph) LabelTokenizer(org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer) ContentItemMentionBuilder(org.apache.stanbol.enhancer.engines.entitycomention.impl.ContentItemMentionBuilder) HashSet(java.util.HashSet)

Example 79 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class RdfSerializingWriter method getAdditionalExpansionResources.

private Set<RDFTerm> getAdditionalExpansionResources(Graph tc, GraphNode recipe) {
    final Set<IRI> subjectExpansionProperties = getSubjectExpansionProperties(recipe);
    final Set<IRI> objectExpansionProperties = getObjectExpansionProperties(recipe);
    final Set<RDFTerm> result = new HashSet<RDFTerm>();
    if ((subjectExpansionProperties.size() > 0) || (objectExpansionProperties.size() > 0)) {
        for (Triple triple : tc) {
            final IRI predicate = triple.getPredicate();
            if (subjectExpansionProperties.contains(predicate)) {
                result.add(triple.getSubject());
            }
            if (objectExpansionProperties.contains(predicate)) {
                result.add(triple.getObject());
            }
        }
    }
    return result;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet)

Example 80 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class EventJobManagerImpl method logExecutionMetadata.

/**
 * Logs the ExecutionMetadata
 * @param logger the logger to log the execution metadata to
 * @param job the enhancement job to log the execution metadata for
 * @param isWarn if <code>true</code> the data are logged with <code>WARN</code> level.
 * If <code>false</code> the <code>DEBUG</code> level is used
 */
protected void logExecutionMetadata(Logger logger, EnhancementJob job, boolean isWarn) {
    if (log.isDebugEnabled() || (isWarn && log.isWarnEnabled())) {
        StringBuilder message = new StringBuilder(1024);
        message.append("ExecutionMetadata for ContentItem ").append(job.getContentItem().getUri()).append(" and Chain ").append(job.getChainName()).append(": \n");
        boolean serialized = false;
        if (serializer != null) {
            ByteArrayOutputStream bout = new ByteArrayOutputStream();
            try {
                serializer.serialize(bout, job.getExecutionMetadata(), SupportedFormat.TURTLE);
                message.append(bout.toString("utf-8"));
                serialized = true;
            } catch (RuntimeException e) {
                log.warn("   ... unable to serialize Execution Metadata | {}: {}", e.getClass(), e.getMessage());
            } catch (UnsupportedEncodingException e) {
                log.warn("   ... unable to serialize Execution Metadata | {}: {}", e.getClass(), e.getMessage());
            }
        }
        if (!serialized) {
            // TURTLE serialization not possible ... use the toString method of triple
            for (Triple t : job.getExecutionMetadata()) {
                message.append(t.toString()).append('\n');
            }
        }
        // finally write the serialized graph to the logger
        if (isWarn) {
            logger.warn(message.toString());
        } else {
            logger.debug(message.toString());
        }
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Aggregations

Triple (org.apache.clerezza.commons.rdf.Triple)151 IRI (org.apache.clerezza.commons.rdf.IRI)88 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)84 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)70 Graph (org.apache.clerezza.commons.rdf.Graph)45 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)41 HashSet (java.util.HashSet)34 Literal (org.apache.clerezza.commons.rdf.Literal)30 ArrayList (java.util.ArrayList)27 Lock (java.util.concurrent.locks.Lock)21 HashMap (java.util.HashMap)20 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)19 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)19 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)15 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)12 Test (org.junit.Test)12 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)10 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)9 GraphNode (org.apache.clerezza.rdf.utils.GraphNode)8 IOException (java.io.IOException)7