use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliMorphoFeatures method featuresAsTriples.
public Collection<? extends Triple> featuresAsTriples(IRI textAnnotation, Language lang) {
Collection<TripleImpl> result = new Vector<TripleImpl>();
result.add(new TripleImpl(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, new PlainLiteralImpl(getLemma(), lang)));
for (PosTag pos : getPosList()) {
if (pos.isMapped()) {
for (LexicalCategory cat : pos.getCategories()) {
result.add(new TripleImpl(textAnnotation, RDF_TYPE, cat.getUri()));
}
}
}
for (NumberTag num : getNumberList()) {
if (num.getNumber() != null) {
result.add(new TripleImpl(textAnnotation, HAS_NUMBER, num.getNumber().getUri()));
}
}
for (Person pers : getPersonList()) {
result.add(new TripleImpl(textAnnotation, HAS_PERSON, pers.getUri()));
}
for (GenderTag gender : getGenderList()) {
if (gender.getGender() != null) {
result.add(new TripleImpl(textAnnotation, HAS_GENDER, gender.getGender().getUri()));
}
}
for (Definitness def : getDefinitnessList()) {
result.add(new TripleImpl(textAnnotation, HAS_DEFINITENESS, def.getUri()));
}
for (CaseTag caseFeat : getCaseList()) {
if (caseFeat.getCase() != null) {
result.add(new TripleImpl(textAnnotation, HAS_CASE, caseFeat.getCase().getUri()));
}
}
for (VerbMoodTag vf : getVerbMoodList()) {
if (vf.getVerbForm() != null) {
result.add(new TripleImpl(textAnnotation, HAS_MOOD, vf.getVerbForm().getUri()));
}
}
for (TenseTag tense : getTenseList()) {
if (tense.getTense() != null) {
result.add(new TripleImpl(textAnnotation, HAS_TENSE, tense.getTense().getUri()));
}
}
return result;
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliClassificationEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
//NOTE: in the computeEnhancements Method on can check metadata already
// checked within the canEnhance method. THis is not required, but it
// may help to identify potential bugs in the EnhancementJobManager
// implementation
String language = EnhancementEngineHelper.getLanguage(ci);
if (!isLangSupported(language)) {
throw new IllegalStateException("Call to computeEnhancement with unsupported language '" + language + " for ContentItem " + ci.getUri() + ": This is also checked " + "in the canEnhance method! -> This indicated an Bug in the " + "implementation of the " + "EnhancementJobManager!");
}
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicates an Bug in the implementation of " + "the EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {} of ContentItem {}", contentPart.getKey(), ci.getUri());
return;
}
//NOTE: EnhancementEngine implementations should pass all Exceptions
// (RuntimeExceptions as is and others wrapped as EngineExceptions).
// The EnhancementJobManager implementation has to catch and
// process all those. Handling depends on the configuration of the
// EnhancementChain (e.g. if this engine is optional enhancement of
// the ContentItem will continue).
// This is important as otherwise Users would get "200 ok" replies
// for failed enhancement requests that have failed!
//
// This means that:
// * Http clients should pass on IOExceptions and SOAPExceptions
// * No try/catch that also includes RuntimeExceptions
List<Concept> lista;
try {
lista = this.client.extractConcepts(text, language);
} catch (IOException e) {
//re-throw exceptions as EngineException
throw new EngineException("Error while calling the CELI classification" + " service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI classification service!", e);
}
if (lista.isEmpty()) {
//nothing to do
return;
}
Graph g = ci.getMetadata();
//NOTE: EnhancementEngines that use "ENHANCE_ASYNC" need to acquire a
// writeLock before modifications to the enhancement metadata
ci.getLock().writeLock().lock();
try {
//see STANBOL-617 for rules how to encode extracted topics
//we need a single TextAnnotation to link all TopicAnnotations
IRI textAnnotation = createTextEnhancement(ci, this);
// add the dc:type skos:Concept
g.add(new TripleImpl(textAnnotation, DC_TYPE, SKOS_CONCEPT));
//not create the fise:TopicAnnotations
for (Concept ne : lista) {
IRI topicAnnotation = EnhancementEngineHelper.createTopicEnhancement(ci, this);
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_REFERENCE, ne.getUri()));
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(ne.getLabel())));
//TODO: currently I use owl:class as entity-type, because that is
// what the linked dbpedia ontology resources are.
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_TYPE, OWL_CLASS));
g.add(new TripleImpl(topicAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(ne.getConfidence())));
//link to the TextAnnotation
g.add(new TripleImpl(topicAnnotation, DC_RELATION, textAnnotation));
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class ZemantaEnhancementEngine method processRecognition.
/**
* Processes all Zemanta Recognitions and converts them to the according
* FISE enhancements
*
* @param results the results of the Zemanta enhancement process
* @param enhancements the graph containing the current Stanbol Enhancer
* enhancements
* @param text the content of the content item as string
*/
protected void processRecognition(Graph results, Graph enhancements, String text, IRI ciId) {
Iterator<Triple> recognitions = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Recognition.getUri());
while (recognitions.hasNext()) {
BlankNodeOrIRI recognition = recognitions.next().getSubject();
log.debug("process recognition " + recognition);
//first get everything we need for the textAnnotations
Double confidence = parseConfidence(results, recognition);
log.debug(" > confidence :" + confidence);
String anchor = EnhancementEngineHelper.getString(results, recognition, ZemantaOntologyEnum.anchor.getUri());
log.debug(" > anchor :" + anchor);
Collection<BlankNodeOrIRI> textAnnotations = processTextAnnotation(enhancements, text, ciId, anchor, confidence);
log.debug(" > number of textAnnotations :" + textAnnotations.size());
//second we need to create the EntityAnnotation that represent the
//recognition
BlankNodeOrIRI object = EnhancementEngineHelper.getReference(results, recognition, ZemantaOntologyEnum.object.getUri());
log.debug(" > object :" + object);
//The targets represent the linked entities
// ... and yes there can be more of them!
//TODO: can we create an EntityAnnotation with several referred entities?
// Should we use the owl:sameAs to decide that!
Set<IRI> sameAsSet = new HashSet<IRI>();
for (Iterator<IRI> sameAs = getReferences(results, object, ZemantaOntologyEnum.owlSameAs.getUri()); sameAs.hasNext(); sameAsSet.add(sameAs.next())) ;
log.debug(" > sameAs :" + sameAsSet);
//now parse the targets and look if there are others than the one
//merged by using sameAs
Iterator<IRI> targets = EnhancementEngineHelper.getReferences(results, object, ZemantaOntologyEnum.target.getUri());
String title = null;
while (targets.hasNext()) {
//the entityRef is the URL of the target
IRI entity = targets.next();
log.debug(" - target :" + entity);
IRI targetType = EnhancementEngineHelper.getReference(results, entity, ZemantaOntologyEnum.targetType.getUri());
log.debug(" o type :" + targetType);
if (ZemantaOntologyEnum.targetType_RDF.getUri().equals(targetType)) {
String targetTitle = EnhancementEngineHelper.getString(results, entity, ZemantaOntologyEnum.title.getUri());
log.debug(" o title :" + targetTitle);
if (sameAsSet.contains(entity)) {
if (title == null) {
title = targetTitle;
} else if (!title.equals(targetTitle)) {
log.warn("Entities marked with owl:sameAs do use different labels '" + title + "' != '" + targetTitle + "'!");
}
//else the same label used by both -> thats expected
} else {
//maybe we should create an second entityEnhancement, but I think, that such a case should
//not happen. So write an warning for now
log.warn("Found Target with type RDF, that is not linked with owl:sameAs to the others (this: '" + entity + " | sameAs: " + sameAsSet + ")");
log.warn(" - no Enhancement for " + entity + " will be created");
}
}
//else -> do not process -> RDF Entities only
//TODO: targetTypes are not parsed by Zemanta, therefore we can not set
// any entity types!
}
//create the entityEnhancement
IRI entityEnhancement = EnhancementEngineHelper.createEntityEnhancement(enhancements, this, ciId);
if (confidence != null) {
enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
}
for (BlankNodeOrIRI relatedTextAnnotation : textAnnotations) {
enhancements.add(new TripleImpl(entityEnhancement, DC_RELATION, relatedTextAnnotation));
}
for (IRI entity : sameAsSet) {
enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_ENTITY_REFERENCE, entity));
}
enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(title)));
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class ZemantaEnhancementEngine method processTextAnnotation.
/**
* This Methods searches/creates text annotations for anchor points of Zemanta
* extractions.
* <p>
* First this method searches for text annotations that do use the anchor as
* selected text. Second it searches for occurrences of the anchor within the
* content of the content and checks if there is an text annotation for that
* occurrence. If not it creates an new one.
*
* @param enhancements the graph containing the meta data
* @param text the content as string
* @param ciId the ID of the content item
* @param anchor the anchor text
* @param confidence the confidence to be used for newly created text annotations
*
* @return a collection of all existing/created text annotations for the parsed anchor
*/
private Collection<BlankNodeOrIRI> processTextAnnotation(Graph enhancements, String text, IRI ciId, String anchor, Double confidence) {
Collection<BlankNodeOrIRI> textAnnotations = new ArrayList<BlankNodeOrIRI>();
int anchorLength = anchor.length();
Literal anchorLiteral = new PlainLiteralImpl(anchor);
//first search for existing TextAnnotations for the anchor
Map<Integer, Collection<BlankNodeOrIRI>> existingTextAnnotationsMap = searchExistingTextAnnotations(enhancements, anchorLiteral);
for (int current = text.indexOf(anchor); current >= 0; current = text.indexOf(anchor, current + 1)) {
Collection<BlankNodeOrIRI> existingTextAnnotations = existingTextAnnotationsMap.get(current);
if (existingTextAnnotations != null) {
//use the existing once
textAnnotations.addAll(existingTextAnnotations);
} else {
//we need to create an new one!
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(enhancements, this, ciId);
textAnnotations.add(textAnnotation);
//write the selection
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(current)));
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(current + anchorLength)));
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, anchorLiteral));
//extract the selection context
int beginPos;
if (current <= SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE) {
beginPos = 0;
} else {
int start = current - SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
beginPos = text.indexOf(' ', start);
if (beginPos < 0 || beginPos >= current) {
//no words
//begin within a word
beginPos = start;
}
}
int endPos;
if (current + anchorLength + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= text.length()) {
endPos = text.length();
} else {
int start = current + anchorLength + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
endPos = text.lastIndexOf(' ', start);
if (endPos <= current + anchorLength) {
//end within a word;
endPos = start;
}
}
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(text.substring(beginPos, endPos))));
// related to the annotated Entity rather to the selected text.
if (confidence != null) {
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
}
//TODO: No idea about the type of the Annotation, because we do not have an type of the entity!
// One would need to get the types from the referred Source
}
}
return textAnnotations;
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class ExistingClerezzaGraphTest method initTestData.
/**
* Initialises the {@link #entityData} used for this test (called in BeforeClass)
*/
private static void initTestData() {
IRI entity1 = new IRI("http://www.test.org/entity1");
Graph entity1Data = new SimpleGraph();
entity1Data.add(new TripleImpl(entity1, RDF.type, SKOS.Concept));
entity1Data.add(new TripleImpl(entity1, SKOS.prefLabel, new PlainLiteralImpl("test", EN)));
entity1Data.add(new TripleImpl(entity1, SKOS.prefLabel, new PlainLiteralImpl("Test", DE)));
entityData.put(entity1, entity1Data);
Graph entity2Data = new SimpleGraph();
IRI entity2 = new IRI("http://www.test.org/entity2");
entity2Data.add(new TripleImpl(entity2, RDF.type, SKOS.Concept));
entity2Data.add(new TripleImpl(entity2, SKOS.prefLabel, new PlainLiteralImpl("sub-test", EN)));
entity2Data.add(new TripleImpl(entity2, SKOS.prefLabel, new PlainLiteralImpl("Untertest", DE)));
entity2Data.add(new TripleImpl(entity2, SKOS.broader, entity1));
entityData.put(entity2, entity2Data);
}
Aggregations