Search in sources :

Example 96 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class SpotlightEngineUtils method createTextEnhancement.

/**
     * Creates a fise:TextAnnotation for the parsed parameters and
     * adds it the the {@link ContentItem#getMetadata()}. <p>
     * This method assumes a write lock on the parsed content item.
     * @param occ the SurfaceForm
     * @param engine the Engine
     * @param ci the ContentITem
     * @param content the content 
     * @param lang the language of the content or <code>null</code>
     * @return the URI of the created fise:TextAnnotation
     */
public static IRI createTextEnhancement(SurfaceForm occ, EnhancementEngine engine, ContentItem ci, String content, Language lang) {
    Graph model = ci.getMetadata();
    IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, engine);
    model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.name, lang)));
    model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
    model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.name.length())));
    if (occ.type != null && !occ.type.isEmpty()) {
        model.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(occ.type)));
    }
    model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(content, occ.name, occ.offset), lang)));
    return textAnnotation;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 97 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class DBPSpotlightCandidatesEnhancementEngine method createEnhancements.

/**
	 * This generates enhancement structures for the entities from DBPedia
	 * Spotlight and adds them to the content item's metadata. For each surface
	 * form a TextAnnotation and the according EntityAnnotations are created.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String text, Language language) {
    // TODO create TextEnhancement (form, start, end, type?)
    HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>();
    Graph model = ci.getMetadata();
    for (SurfaceForm occ : occs) {
        IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ, this, ci, text, language);
        Iterator<CandidateResource> resources = occ.resources.iterator();
        while (resources.hasNext()) {
            CandidateResource resource = resources.next();
            IRI entityAnnotation = SpotlightEngineUtils.createEntityAnnotation(resource, this, ci, textAnnotation);
            entityAnnotationMap.put(resource.localName, entityAnnotation);
        }
        if (entityAnnotationMap.containsKey(occ.name)) {
            model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
        } else {
            entityAnnotationMap.put(occ.name, textAnnotation);
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) CandidateResource(org.apache.stanbol.enhancer.engines.dbpspotlight.model.CandidateResource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 98 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class SpotlightEngineUtils method createEntityAnnotation.

/**
	 * Creates a fise:EntityAnnotation for the parsed parameter and
     * adds it the the {@link ContentItem#getMetadata()}. <p>
     * This method assumes a write lock on the parsed content item.
	 * @param annotation the Annotation
	 * @param engine the engine
	 * @param ci the language
	 * @param textAnnotation the TextAnnotation the created
	 * EntityAnnotation links by using dc:relation
	 * @param language the language of the label of the referenced
	 * Entity (or <code>null</code> if none).
	 */
public static void createEntityAnnotation(Annotation annotation, EnhancementEngine engine, ContentItem ci, IRI textAnnotation, Language language) {
    Graph model = ci.getMetadata();
    IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
    Literal label = new PlainLiteralImpl(annotation.surfaceForm.name, language);
    model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, annotation.uri));
    //set the fise:entity-type
    for (String type : annotation.getTypeNames()) {
        IRI annotationType = new IRI(type);
        model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, annotationType));
    }
    //TODO (rwesten): Pleas check: I use the similarityScore as fise:confidence value
    model.add(new TripleImpl(entityAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(annotation.similarityScore)));
    //add spotlight specific information
    model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(annotation.percentageOfSecondRank)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(annotation.support)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_SIMILARITY_SCORE, literalFactory.createTypedLiteral(annotation.similarityScore)));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 99 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class TextAnnotationsNewModelEngine method computeEnhancements.

/**
     * Computes the enhancements on the provided ContentItem.
     */
@Override
public void computeEnhancements(ContentItem contentItem) throws EngineException {
    Entry<IRI, Blob> textBlob = getBlob(contentItem, supportedMimeTypes);
    if (textBlob == null) {
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(contentItem);
    Language lang = language == null ? null : new Language(language);
    String text;
    try {
        text = ContentItemHelper.getText(textBlob.getValue());
    } catch (IOException e) {
        throw new EngineException(this, contentItem, "Unable to read Plain Text Blob", e);
    }
    Set<Triple> addedTriples = new HashSet<Triple>();
    Graph metadata = contentItem.getMetadata();
    //extract all the necessary information within a read lock
    contentItem.getLock().readLock().lock();
    try {
        Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
        while (it.hasNext()) {
            BlankNodeOrIRI ta = it.next().getSubject();
            boolean hasPrefix = metadata.filter(ta, ENHANCER_SELECTION_PREFIX, null).hasNext();
            boolean hasSuffix = metadata.filter(ta, ENHANCER_SELECTION_SUFFIX, null).hasNext();
            boolean hasSelected = metadata.filter(ta, ENHANCER_SELECTED_TEXT, null).hasNext();
            if (hasPrefix && hasSuffix && hasSelected) {
                //this TextAnnotation already uses the new model
                continue;
            }
            Integer start;
            if (!hasPrefix) {
                start = EnhancementEngineHelper.get(metadata, ta, ENHANCER_START, Integer.class, lf);
                if (start == null) {
                    log.debug("unable to add fise:selection-prefix to TextAnnotation {} " + "because fise:start is not present", ta);
                } else if (start < 0) {
                    log.warn("fise:start {} of TextAnnotation {} < 0! " + "Will not transform this TextAnnotation", start, ta);
                    start = 0;
                }
            } else {
                start = null;
            }
            Integer end;
            if (!hasSuffix) {
                end = EnhancementEngineHelper.get(metadata, ta, ENHANCER_END, Integer.class, lf);
                if (end == null) {
                    log.debug("unable to add fise:selection-suffix to TextAnnotation {} " + "because fise:end is not present", ta);
                } else if (end > text.length()) {
                    log.warn("fise:end {} of TextAnnotation {} > as the content length {}! " + "Will not transform this TextAnnotation", end, ta, text.length());
                    end = null;
                } else if (start != null && end < start) {
                    log.warn("fise:end {} < fise:start {} of TextAnnotation {}! " + "Will not transform this TextAnnotation", end, start, ta);
                    end = null;
                    start = null;
                }
            } else {
                end = null;
            }
            if (!hasPrefix && start != null) {
                addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_PREFIX, new PlainLiteralImpl(text.substring(Math.max(0, start - prefixSuffixSize), start), lang)));
            }
            if (!hasSuffix && end != null) {
                addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_SUFFIX, new PlainLiteralImpl(text.substring(end, Math.min(text.length(), end + prefixSuffixSize)), lang)));
            }
            if (!hasSelected && start != null && end != null) {
                //This adds missing fise:selected or fise:head/fise:tail if the selected text is to long
                int length = end - start;
                if (length > 3 * prefixSuffixSize) {
                    //add prefix/suffix
                    addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_HEAD, new PlainLiteralImpl(text.substring(start, start + prefixSuffixSize), lang)));
                    addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_TAIL, new PlainLiteralImpl(text.substring(end - prefixSuffixSize, end), lang)));
                } else {
                    //add missing fise:selected
                    String selection = text.substring(start, end);
                    addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(selection, lang)));
                    //check if we should also add an selection context
                    if (!metadata.filter(ta, ENHANCER_SELECTION_CONTEXT, null).hasNext()) {
                        addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(EnhancementEngineHelper.getSelectionContext(text, selection, start), lang)));
                    }
                }
            }
        }
    } finally {
        contentItem.getLock().readLock().unlock();
    }
    //finally write the prefix/suffix triples within a write lock
    if (!addedTriples.isEmpty()) {
        contentItem.getLock().writeLock().lock();
        try {
            metadata.addAll(addedTriples);
        } finally {
            contentItem.getLock().writeLock().unlock();
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItemHelper.getBlob(org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.getBlob) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) IOException(java.io.IOException) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) HashSet(java.util.HashSet)

Example 100 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class SentimentSummarizationEngine method writeSentimentEnhancements.

private void writeSentimentEnhancements(ContentItem ci, List<SentimentPhrase> sentimentPhrases, AnalysedText at, Language lang) {
    // TODO Auto-generated method stub
    Graph metadata = ci.getMetadata();
    Sentence currentSentence = null;
    final List<SentimentPhrase> sentencePhrases = new ArrayList<SentimentPhrase>();
    for (SentimentPhrase sentPhrase : sentimentPhrases) {
        Sentence sentence = sentPhrase.getSentence();
        if (log.isDebugEnabled()) {
            //debug sentiment info
            CharSequence phraseText = at.getText().subSequence(sentPhrase.getStartIndex(), sentPhrase.getEndIndex());
            log.debug("Write SentimentPhrase for {} (sentence: {})", phraseText, sentence == null ? "none" : sentence.getSpan().length() > 17 ? (sentence.getSpan().subSequence(0, 17) + "...") : sentence.getSpan());
            List<Sentiment> sentiments = sentPhrase.getSentiments();
            log.debug(" > {} Sentiments:", sentiments.size());
            for (int i = 0; i < sentiments.size(); i++) {
                log.debug("    {}. {}", i + 1, sentiments.get(i));
            }
        }
        if (writeSentimentPhrases) {
            IRI enh = createTextEnhancement(ci, this);
            String phraseText = at.getSpan().substring(sentPhrase.getStartIndex(), sentPhrase.getEndIndex());
            metadata.add(new TripleImpl(enh, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(phraseText, lang)));
            if (sentPhrase.getSentence() == null) {
                metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(at.getSpan(), phraseText, sentPhrase.getStartIndex()), lang)));
            } else {
                metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(sentPhrase.getSentence().getSpan(), lang)));
            }
            metadata.add(new TripleImpl(enh, ENHANCER_START, lf.createTypedLiteral(sentPhrase.getStartIndex())));
            metadata.add(new TripleImpl(enh, ENHANCER_END, lf.createTypedLiteral(sentPhrase.getEndIndex())));
            if (sentPhrase.getPositiveSentiment() != null) {
                metadata.add(new TripleImpl(enh, POSITIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getPositiveSentiment())));
            }
            if (sentPhrase.getNegativeSentiment() != null) {
                metadata.add(new TripleImpl(enh, NEGATIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getNegativeSentiment())));
            }
            metadata.add(new TripleImpl(enh, SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getSentiment())));
            //add the Sentiment type as well as the type of the SSO Ontology
            metadata.add(new TripleImpl(enh, DC_TYPE, SENTIMENT_TYPE));
            IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(SpanTypeEnum.Chunk);
            if (ssoType != null) {
                metadata.add(new TripleImpl(enh, DC_TYPE, ssoType));
            }
        }
        if (writeSentencesSentimet && sentence != null) {
            if (sentence.equals(currentSentence)) {
                sentencePhrases.add(sentPhrase);
            } else {
                writeSentiment(ci, currentSentence, sentencePhrases);
                //reset
                currentSentence = sentence;
                sentencePhrases.clear();
                sentencePhrases.add(sentPhrase);
            }
        }
    }
    if (!sentencePhrases.isEmpty()) {
        writeSentiment(ci, currentSentence, sentencePhrases);
    }
    if (writeDocumentSentiment) {
        writeSentiment(ci, at, sentimentPhrases);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) Graph(org.apache.clerezza.commons.rdf.Graph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Sentence(org.apache.stanbol.enhancer.nlp.model.Sentence)

Aggregations

TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)143 IRI (org.apache.clerezza.commons.rdf.IRI)104 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)69 Graph (org.apache.clerezza.commons.rdf.Graph)66 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)49 Triple (org.apache.clerezza.commons.rdf.Triple)41 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)26 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)23 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)20 Literal (org.apache.clerezza.commons.rdf.Literal)20 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)20 IOException (java.io.IOException)18 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)17 Test (org.junit.Test)16 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)15 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)14 HashSet (java.util.HashSet)13 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)11