use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class TopicClassificationEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (!(acceptedLanguageSet.isEmpty() || acceptedLanguageSet.contains(language) || acceptedLanguageSet.contains(""))) {
throw new IllegalStateException("The language '" + language + "' of the ContentItem is not configured as " + " active for this Engine (active: " + acceptedLanguageSet + ").");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(String.format("Unable to extract " + " textual content from ContentPart %s of ContentItem %s!", contentPart.getKey(), ci.getUri()), e);
}
if (text.trim().isEmpty()) {
log.warn("ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from", contentPart.getKey(), ci.getUri());
return;
}
Graph metadata = ci.getMetadata();
List<TopicSuggestion> topics;
try {
topics = suggestTopics(text);
if (topics.isEmpty()) {
return;
}
} catch (ClassifierException e) {
throw new EngineException(e);
}
IRI precision = new IRI(NamespaceEnum.fise + "classifier/precision");
IRI recall = new IRI(NamespaceEnum.fise + "classifier/recall");
IRI f1 = new IRI(NamespaceEnum.fise + "classifier/f1");
LiteralFactory lf = LiteralFactory.getInstance();
ci.getLock().writeLock().lock();
try {
// Global text annotation to attach all the topic annotation to it.
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textAnnotation, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
for (TopicSuggestion topic : topics) {
IRI enhancement = EnhancementEngineHelper.createEntityEnhancement(ci, this);
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TOPICANNOTATION));
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION, textAnnotation));
// add link to entity
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE, new IRI(topic.conceptUri)));
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
// add confidence information
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(topic.score))));
// add performance estimates of the classifier if available
ClassificationReport perf = getPerformanceEstimates(topic.conceptUri);
if (perf.uptodate) {
metadata.add(new TripleImpl(enhancement, precision, lf.createTypedLiteral(Double.valueOf(perf.precision))));
metadata.add(new TripleImpl(enhancement, recall, lf.createTypedLiteral(Double.valueOf(perf.recall))));
metadata.add(new TripleImpl(enhancement, f1, lf.createTypedLiteral(Double.valueOf(perf.f1))));
}
// fetch concept label from the entityhub or a referenced site if available
Entity entity = entityhub.getEntity(topic.conceptUri);
if (entity == null) {
entity = referencedSiteManager.getEntity(topic.conceptUri);
}
if (entity != null) {
Representation representation = entity.getRepresentation();
// TODO: extract all languages based on some configuration instead of hardcoding English
Text label = representation.getFirst(NamespaceEnum.skos + "prefLabel", "en", "en-US", "en-GB");
if (label == null) {
label = representation.getFirst(NamespaceEnum.rdfs + "label", "en", "en-US", "en-GB");
}
if (label != null) {
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(label.getText())));
}
}
}
} catch (ClassifierException e) {
throw new EngineException(e);
} catch (IllegalArgumentException e) {
throw new EngineException(e);
} catch (EntityhubException e) {
throw new EngineException(e);
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class UIMAToTriples method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
FeatureStructureListHolder holder;
LiteralFactory literalFactory = LiteralFactory.getInstance();
try {
IRI uimaIRI = new IRI(uimaUri);
logger.info(new StringBuilder("Trying to load holder for ref:").append(uimaUri).toString());
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
for (String source : sourceNames) {
logger.info(new StringBuilder("Processing UIMA source:").append(source).toString());
List<FeatureStructure> sourceList = holder.getFeatureStructureList(source);
if (sourceList != null) {
logger.info(new StringBuilder("UIMA source:").append(source).append(" contains ").append(sourceList.size()).append(" annotations.").toString());
} else {
logger.info(new StringBuilder("Source list is null:").append(source).toString());
continue;
}
for (FeatureStructure fs : sourceList) {
String typeName = fs.getTypeName();
logger.debug(new StringBuilder("Checking ").append(typeName).toString());
if (tnfs.checkFeatureStructureAllowed(typeName, fs.getFeatures())) {
logger.debug(new StringBuilder("Adding ").append(typeName).toString());
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
Graph metadata = ci.getMetadata();
String uriRefStr = uimaUri + ":" + typeName;
if (mappings.containsKey(typeName)) {
uriRefStr = mappings.get(typeName);
}
metadata.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(uriRefStr)));
if (fs.getFeature("begin") != null) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(fs.getFeature("begin").getValueAsInteger())));
}
if (fs.getFeature("end") != null) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(fs.getFeature("end").getValueAsInteger())));
}
if (fs.getCoveredText() != null && !fs.getCoveredText().isEmpty()) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(fs.getCoveredText())));
}
for (Feature f : fs.getFeatures()) {
if (!f.getName().equals("begin") && !f.getName().equals("end") && tnfs.checkFeatureToConvert(typeName, f)) {
String predRefStr = uimaUri + ":" + f.getName();
if (mappings.containsKey(f.getName())) {
predRefStr = mappings.get(f.getName());
}
IRI predicate = new IRI(predRefStr);
metadata.add(new TripleImpl(textAnnotation, predicate, new PlainLiteralImpl(f.getValueAsString())));
}
}
}
}
}
} catch (NoSuchPartException e) {
logger.error(new StringBuilder("No UIMA results found with ref:").append(uimaUri).toString(), e);
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class CeliSentimentAnalysisEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
// used for the palin literals in TextAnnotations
Language lang = new Language(language);
try {
List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language);
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
for (SentimentExpression se : lista) {
try {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
// add selected text as PlainLiteral in the language extracted from the text
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(), lang)));
g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION));
if (se.getStartSnippet() != null && se.getEndSnippet() != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang)));
g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue())));
}
} catch (NoConvertorException e) {
log.error(e.getMessage(), e);
}
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
// used for the palin literals in TextAnnotations
Language lang = new Language(language);
try {
List<NamedEntity> lista = this.client.extractEntities(text, language);
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
for (NamedEntity ne : lista) {
try {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
// add selected text as PlainLiteral in the language extracted from the text
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(ne.getFormKind(), lang)));
g.add(new TripleImpl(textAnnotation, DC_TYPE, getEntityRefForType(ne.type)));
if (ne.getFrom() != null && ne.getTo() != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(ne.getFrom().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(ne.getTo().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, ne.getFormKind(), ne.getFrom().intValue()), lang)));
}
} catch (NoConvertorException e) {
log.error(e.getMessage(), e);
}
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI NER (Named Entity Recognition)" + " service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI NER (Named Entity Recognition) service!", e);
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class ContentItemResource method getPlacesAsJSON.
/**
* @return an RDF/JSON descriptions of places for the word map widget
*/
public String getPlacesAsJSON() throws ParseException, UnsupportedEncodingException {
Graph g = new IndexedGraph();
LiteralFactory lf = LiteralFactory.getInstance();
Graph metadata = contentItem.getMetadata();
for (EntityExtractionSummary p : getPlaceOccurrences()) {
EntitySuggestion bestGuess = p.getBestGuess();
if (bestGuess == null) {
continue;
}
IRI uri = new IRI(bestGuess.getUri());
Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
if (latitudes.hasNext()) {
g.add(latitudes.next());
}
Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
if (longitutes.hasNext()) {
g.add(longitutes.next());
g.add(new TripleImpl(uri, Properties.RDFS_LABEL, lf.createTypedLiteral(bestGuess.getLabel())));
}
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
serializer.serialize(out, g, SupportedFormat.RDF_JSON);
String rdfString = out.toString("utf-8");
return rdfString;
}
Aggregations