use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class MetaGraphManager method updateCreateEntry.
public void updateCreateEntry(OWLOntologyID publicKey) {
if (publicKey == null || publicKey.isAnonymous())
throw new IllegalArgumentException("An anonymous ontology cannot be mapped. A non-anonymous ontology ID must be forged in these cases.");
Triple tType, tHasOiri = null, tHasViri = null;
org.semanticweb.owlapi.model.IRI ontologyIRI = publicKey.getOntologyIRI(), versionIri = publicKey.getVersionIRI();
IRI entry = buildResource(publicKey);
tType = new TripleImpl(entry, RDF.type, ENTRY_URIREF);
LiteralFactory lf = LiteralFactory.getInstance();
tHasOiri = new TripleImpl(entry, HAS_ONTOLOGY_IRI_URIREF, lf.createTypedLiteral(new IRI(ontologyIRI.toString())));
if (versionIri != null)
tHasViri = new TripleImpl(entry, HAS_VERSION_IRI_URIREF, lf.createTypedLiteral(new IRI(versionIri.toString())));
synchronized (graph) {
graph.add(tType);
if (tHasViri != null)
graph.add(tHasViri);
graph.add(tHasOiri);
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class RestfulNlpAnalysisEngine method computeEnhancements.
/**
* Compute enhancements for supplied ContentItem. The results of the process
* are expected to be stored in the metadata of the content item.
* <p/>
* The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
* persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
* <p/>
* This method creates a new POSContentPart using {@link org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} from a text/plain part and
* stores it as a new part in the content item. The metadata is not changed.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(final ContentItem ci) throws EngineException {
// validate that the service is active
checkRESTfulNlpAnalysisService();
// get/create the AnalysedText
final AnalysedText at = NlpEngineHelper.initAnalysedText(this, analysedTextFactory, ci);
final Blob blob = at.getBlob();
// send the text to the server
final String language = getLanguage(this, ci, true);
final HttpPost request = new HttpPost(analysisServiceUrl);
request.addHeader(HttpHeaders.CONTENT_LANGUAGE, language);
request.setEntity(new InputStreamEntity(blob.getStream(), blob.getContentLength(), ContentType.create(blob.getMimeType(), blob.getParameter().get("charset"))));
// execute the request
try {
AccessController.doPrivileged(new PrivilegedExceptionAction<AnalysedText>() {
public AnalysedText run() throws ClientProtocolException, IOException {
return httpClient.execute(request, new AnalysisResponseHandler(at));
}
});
} catch (PrivilegedActionException pae) {
Exception e = pae.getException();
if (e instanceof ClientProtocolException) {
// force re-initialisation upon error
setRESTfulNlpAnalysisServiceUnavailable();
throw new EngineException(this, ci, "Exception while executing Request " + "on RESTful NLP Analysis Service at " + analysisServiceUrl, e);
} else if (e instanceof IOException) {
// force re-initialisation upon error
setRESTfulNlpAnalysisServiceUnavailable();
throw new EngineException(this, ci, "Exception while executing Request " + "on RESTful NLP Analysis Service at " + analysisServiceUrl, e);
} else {
throw RuntimeException.class.cast(e);
}
}
if (writeTextAnnotations) {
// if enabled fise:TextAnnotations are created for Named Entities and Sentiments
double positiveSent = 0.0;
int positiveCount = 0;
double negativeSent = 0.0;
int negativeCount = 0;
int sentimentCount = 0;
Iterator<Span> spans = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Chunk));
Sentence context = null;
Graph metadata = ci.getMetadata();
Language lang = new Language(language);
LiteralFactory lf = LiteralFactory.getInstance();
ci.getLock().writeLock().lock();
try {
// write TextAnnotations for Named Entities
while (spans.hasNext()) {
Span span = spans.next();
switch(span.getType()) {
case Sentence:
context = (Sentence) span;
// FALLThrough intended!!
default:
Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
if (nerAnno != null) {
IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
// add span related data
metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(span.getSpan(), lang)));
metadata.add(new TripleImpl(ta, ENHANCER_START, lf.createTypedLiteral(span.getStart())));
metadata.add(new TripleImpl(ta, ENHANCER_END, lf.createTypedLiteral(span.getEnd())));
metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(context == null ? getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) : context.getSpan(), lang)));
// add the NER type
if (nerAnno.value().getType() != null) {
metadata.add(new TripleImpl(ta, DC_TYPE, nerAnno.value().getType()));
}
if (nerAnno.probability() >= 0) {
metadata.add(new TripleImpl(ta, ENHANCER_CONFIDENCE, lf.createTypedLiteral(nerAnno.probability())));
}
}
Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION);
if (sentimentAnnotation != null) {
// this span has a sentiment assigned
Double sentiment = sentimentAnnotation.value();
// Create a fise:TextAnnotation for the sentiment
IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(ta, ENHANCER_START, lf.createTypedLiteral(span.getStart())));
metadata.add(new TripleImpl(ta, ENHANCER_END, lf.createTypedLiteral(span.getEnd())));
metadata.add(new TripleImpl(ta, SENTIMENT_PROPERTY, lf.createTypedLiteral(sentiment)));
// add the generic dc:type used for all Sentiment annotation
metadata.add(new TripleImpl(ta, DC_TYPE, SENTIMENT_TYPE));
// determine the specific dc:type for the sentiment annotation
IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(span.getType());
if (ssoType != null) {
metadata.add(new TripleImpl(ta, DC_TYPE, ssoType));
}
// keep statistics for the overall sentiment for the Document
sentimentCount++;
if (sentiment > 0) {
positiveSent += sentiment;
positiveCount++;
} else if (sentiment < 0) {
negativeSent += sentiment;
negativeCount++;
}
}
break;
}
}
// Add the annotation for the overall sentiment of the document
if (sentimentCount > 0) {
IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
// calculate the average sentiment for a document
// TODO: Think on a better way to calculate a general sentiment value for a document.
metadata.add(new TripleImpl(ta, SENTIMENT_PROPERTY, lf.createTypedLiteral((positiveSent + negativeSent) / sentimentCount)));
if (positiveCount > 0) {
// average positive sentiment calculation for the document
metadata.add(new TripleImpl(ta, POSITIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(positiveSent / positiveCount)));
}
if (negativeCount > 0) {
// average negative sentiment calculation for the document
metadata.add(new TripleImpl(ta, NEGATIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(negativeSent / negativeCount)));
}
metadata.add(new TripleImpl(ta, DC_TYPE, SENTIMENT_TYPE));
metadata.add(new TripleImpl(ta, DC_TYPE, DOCUMENT_SENTIMENT_TYPE));
}
// no sentiment annotation present ... nothing to do
} finally {
ci.getLock().writeLock().unlock();
}
}
// else do not write fise:TextAnnotations
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class OntologyMappings method addMediaResourceOntologyMappings.
public static void addMediaResourceOntologyMappings(OntologyMappings mappings) {
mappings.addMappings(new PropertyMapping(ma + "hasContributor", DublinCore.CONTRIBUTOR.getName(), XMPDM.ARTIST.getName(), XMPDM.COMPOSER.getName()));
mappings.addMapping(new ResourceMapping(ma + "hasLocation", new TypeMapping(ma + "Location"), new PropertyMapping(ma + "locationName", DublinCore.COVERAGE.getName())));
mappings.addMappings(new PropertyMapping(ma + "hasCreator", DublinCore.CREATOR.getName(), MSOffice.AUTHOR, "initial-creator"));
mappings.addMappings(new PropertyMapping(ma + "description", DublinCore.DESCRIPTION.getName()));
mappings.addMappings(new PropertyMapping(ma + "hasFormat", DublinCore.FORMAT.getName(), HttpHeaders.CONTENT_TYPE));
/*
* Excerpt of the MA recommendation:
* The identifier of a media resource is represented in RDF by the URI
* of the node representing that media resource. If a resource is
* identified by several URI, owl:sameAs should be used.
*/
mappings.addMappings(new PropertyMapping(OWL.sameAs, RDFS.Resource, DublinCore.IDENTIFIER.getName()));
mappings.addMappings(new PropertyMapping(ma + "hasLanguage", DublinCore.LANGUAGE.getName(), HttpHeaders.CONTENT_LANGUAGE));
mappings.addMappings(new PropertyMapping(ma + "editDate", XSD.dateTime, DublinCore.MODIFIED.getName(), MSOffice.LAST_SAVED.getName()));
mappings.addMappings(new PropertyMapping(ma + "hasPublisher", DublinCore.PUBLISHER.getName()));
mappings.addMappings(new PropertyMapping(ma + "hasRelatedResource", DublinCore.RELATION.getName()));
mappings.addMappings(new PropertyMapping(ma + "copyright", RDFS.Resource, // DC:rights and cc:license
DublinCore.RIGHTS.getName(), CreativeCommons.LICENSE_LOCATION, CreativeCommons.LICENSE_URL, XMPDM.COPYRIGHT.getName()));
mappings.addMappings(new PropertyMapping(ma + "isMemberOf", DublinCore.SOURCE.getName()));
mappings.addMappings(new PropertyMapping(ma + "hasKeyword", DublinCore.SUBJECT.getName(), MSOffice.KEYWORDS));
mappings.addMappings(new PropertyMapping(ma + "title", DublinCore.TITLE.getName(), XMPDM.SCENE.getName(), XMPDM.TAPE_NAME.getName(), XMPDM.SHOT_NAME.getName()));
mappings.addMapping(new PropertyMapping(ma + "alternativeTitle", XMPDM.ALT_TAPE_NAME.getName()));
mappings.addMapping(new PropertyMapping(ma + "mainOriginalTitle", XMPDM.ALBUM.getName()));
mappings.addMappings(new PropertyMapping(ma + "hasGenre", DublinCore.TYPE.getName(), XMPDM.GENRE.getName()));
mappings.addMappings(new PropertyMapping(ma + "creationDate", XSD.dateTime, DublinCore.DATE.getName(), MSOffice.CREATION_DATE.getName(), "created"));
mappings.addMapping(new PropertyMapping(ma + "description", DublinCore.DESCRIPTION.getName(), MSOffice.COMMENTS));
mappings.addMappings(new PropertyMapping(ma + "hasContributor", MSOffice.LAST_AUTHOR, MSOffice.AUTHOR, XMPDM.ENGINEER.getName()));
// other properties -> Media Ontology
mappings.addMappings(new PropertyMapping(ma + "hasCreator", "producer", "initial-creator"));
// EXIF -> Media Ontology
mappings.addMappings(new PropertyMapping(ma + "frameHeight", XSD.int_, TIFF.IMAGE_LENGTH.getName()));
mappings.addMappings(new PropertyMapping(ma + "frameWidth", XSD.int_, TIFF.IMAGE_WIDTH.getName()));
mappings.addMappings(new PropertyMapping(ma + "creationDate", XSD.dateTime, TIFF.ORIGINAL_DATE.getName(), XMPDM.SHOT_DATE.getName()));
// XMP -> Media Ontology
// here we need to split up the metadata for the audio and video
mappings.addMapping(new PropertyMapping(ma + "releaseDate", XSD.dateTime, XMPDM.RELEASE_DATE.getName()));
mappings.addMapping(new ResourceMapping(ma + "hasTrack", new Mapping[] {/* no required */
}, new Mapping[] { // optional
new PropertyMapping(ma + "hasFormat", XSD.string, XMPDM.AUDIO_CHANNEL_TYPE.getName()), new PropertyMapping(ma + "hasCompression", XSD.string, XMPDM.AUDIO_COMPRESSOR.getName()), new PropertyMapping(ma + "editDate", XMPDM.AUDIO_MOD_DATE.getName()), new PropertyMapping(ma + "samplingRate", XSD.int_, XMPDM.AUDIO_SAMPLE_RATE.getName()) }, new Mapping[] { new TypeMapping(ma + "MediaFragment"), new TypeMapping(ma + "Track"), new TypeMapping(ma + "AudioTrack") }));
mappings.addMapping(new ResourceMapping(ma + "hasTrack", new Mapping[] {/* no required */
}, new Mapping[] { // optional
new PropertyMapping(ma + "hasCompression", XSD.string, XMPDM.VIDEO_COMPRESSOR.getName()), new PropertyMapping(ma + "editDate", XMPDM.VIDEO_MOD_DATE.getName()), new PropertyMapping(ma + "frameRate", XSD.double_, XMPDM.VIDEO_FRAME_RATE.getName()) }, new Mapping[] { // additioanl
new TypeMapping(ma + "MediaFragment"), new TypeMapping(ma + "Track"), new TypeMapping(ma + "VideoTrack"), new PropertyMapping(ma + "frameHeight", XSD.int_, TIFF.IMAGE_LENGTH.getName()), new PropertyMapping(ma + "frameWidth", XSD.int_, TIFF.IMAGE_WIDTH.getName()) }));
mappings.addMapping(new PropertyMapping(ma + "numberOfTracks", XSD.int_, XMPDM.TRACK_NUMBER.getName()));
mappings.addMapping(new PropertyMapping(ma + "averageBitRate", XSD.double_, new // we need to convert from MByte/min to kByte/sec
Mapping.Converter() {
@Override
public RDFTerm convert(RDFTerm value) {
if (value instanceof Literal && XSD.double_.equals(((Literal) value).getDataType())) {
LiteralFactory lf = LiteralFactory.getInstance();
double mm = lf.createObject(Double.class, (Literal) value);
return lf.createTypedLiteral(Double.valueOf(mm * 1024 / 60));
} else {
// do not convert
return value;
}
}
}, XMPDM.FILE_DATA_RATE.getName()));
// GEO -> Media RDFTerm Ontology
mappings.addMapping(new ResourceMapping(ma + "hasLocation", new Mapping[] { // required
new PropertyMapping(ma + "locationLatitude", XSD.double_, Geographic.LATITUDE.getName()), new PropertyMapping(ma + "locationLongitude", XSD.double_, Geographic.LONGITUDE.getName()) }, new Mapping[] { // optional
new PropertyMapping(ma + "locationAltitude", XSD.double_, Geographic.ALTITUDE.getName()) }, new Mapping[] { // additional
new TypeMapping(ma + "Location") }));
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class NamedEntityTaggingEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
final Site site;
if (referencedSiteID != null) {
// lookup the referenced site
site = siteManager.getSite(referencedSiteID);
// ensure that it is present
if (site == null) {
String msg = String.format("Unable to enhance %s because Referenced Site %s is currently not active!", ci.getUri().getUnicodeString(), referencedSiteID);
log.warn(msg);
// throw new EngineException(msg);
return;
}
// and that it supports offline mode if required
if (isOfflineMode() && !site.supportsLocalMode()) {
log.warn("Unable to enhance ci {} because OfflineMode is not supported by ReferencedSite {}.", ci.getUri().getUnicodeString(), site.getId());
return;
}
} else {
// null indicates to use the Entityhub to lookup Entities
site = null;
}
Graph graph = ci.getMetadata();
LiteralFactory literalFactory = LiteralFactory.getInstance();
// Retrieve the existing text annotations (requires read lock)
Map<NamedEntity, List<IRI>> textAnnotations = new HashMap<NamedEntity, List<IRI>>();
// the language extracted for the parsed content or NULL if not
// available
String contentLangauge;
ci.getLock().readLock().lock();
try {
contentLangauge = EnhancementEngineHelper.getLanguage(ci);
for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
IRI uri = (IRI) it.next().getSubject();
if (graph.filter(uri, Properties.DC_RELATION, null).hasNext()) {
// skip
continue;
}
NamedEntity namedEntity = NamedEntity.createFromTextAnnotation(graph, uri);
if (namedEntity != null) {
// This is a first occurrence, collect any subsumed
// annotations
List<IRI> subsumed = new ArrayList<IRI>();
for (Iterator<Triple> it2 = graph.filter(null, Properties.DC_RELATION, uri); it2.hasNext(); ) {
subsumed.add((IRI) it2.next().getSubject());
}
textAnnotations.put(namedEntity, subsumed);
}
}
} finally {
ci.getLock().readLock().unlock();
}
// search the suggestions
Map<NamedEntity, List<Suggestion>> suggestions = new HashMap<NamedEntity, List<Suggestion>>(textAnnotations.size());
for (Entry<NamedEntity, List<IRI>> entry : textAnnotations.entrySet()) {
try {
List<Suggestion> entitySuggestions = computeEntityRecommentations(site, entry.getKey(), entry.getValue(), contentLangauge);
if (entitySuggestions != null && !entitySuggestions.isEmpty()) {
suggestions.put(entry.getKey(), entitySuggestions);
}
} catch (EntityhubException e) {
throw new EngineException(this, ci, e);
}
}
// now write the results (requires write lock)
ci.getLock().writeLock().lock();
try {
RdfValueFactory factory = RdfValueFactory.getInstance();
Map<String, Representation> entityData = new HashMap<String, Representation>();
for (Entry<NamedEntity, List<Suggestion>> entitySuggestions : suggestions.entrySet()) {
List<IRI> subsumed = textAnnotations.get(entitySuggestions.getKey());
List<BlankNodeOrIRI> annotationsToRelate = new ArrayList<BlankNodeOrIRI>(subsumed);
annotationsToRelate.add(entitySuggestions.getKey().getEntity());
for (Suggestion suggestion : entitySuggestions.getValue()) {
log.debug("Add Suggestion {} for {}", suggestion.getEntity().getId(), entitySuggestions.getKey());
EnhancementRDFUtils.writeEntityAnnotation(this, literalFactory, graph, ci.getUri(), annotationsToRelate, suggestion, nameField, // header)?!
contentLangauge == null ? DEFAULT_LANGUAGE : contentLangauge);
if (dereferenceEntities) {
entityData.put(suggestion.getEntity().getId(), suggestion.getEntity().getRepresentation());
}
}
}
// Representations to add! If false entityData will be empty
for (Representation rep : entityData.values()) {
graph.addAll(factory.toRdfRepresentation(rep).getRdfGraph());
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class DBPSpotlightDisambiguateEnhancementTest method initTest.
@Before
public void initTest() throws IOException {
// create the contentItem for testing
ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
assertNotNull(ci);
textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
assertNotNull(textContentPart);
// add the language of the text
ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
LiteralFactory lf = LiteralFactory.getInstance();
// we need also to create a fise:TextAnnotation to test disambiguation
String selected = "Angela Merkel";
Language en = new Language("en");
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, new DBPSpotlightSpotEnhancementEngine());
Graph model = ci.getMetadata();
model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(selected, en)));
model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(TEST_TEXT, en)));
model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, lf.createTypedLiteral(TEST_TEXT.indexOf(selected))));
model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, lf.createTypedLiteral(TEST_TEXT.indexOf(selected) + selected.length())));
model.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, OntologicalClasses.DBPEDIA_PERSON));
// validate that the created TextAnnotation is valid (test the test ...)
EnhancementStructureHelper.validateAllTextAnnotations(model, TEST_TEXT, null);
}
Aggregations