use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class EnhancementEngineHelper method addContributingEngine.
/**
* Adds the parsed {@link EnhancementEngine} as dc:contributer to the
* enhancement and also sets the dc:modified property accordingly
* @param metadata the {@link ContentItem#getMetadata()}
* @param enhancement the enhancement
* @param engine the engine
*/
public static void addContributingEngine(Graph metadata, IRI enhancement, EnhancementEngine engine) {
LiteralFactory literalFactory = LiteralFactory.getInstance();
// TODO: use a public dereferencing URI instead?
metadata.add(new TripleImpl(enhancement, DC_CONTRIBUTOR, literalFactory.createTypedLiteral(engine.getClass().getName())));
// set the modification date to the current date.
set(metadata, enhancement, DC_MODIFIED, new Date(), literalFactory);
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class NEREngineCore method findNamedEntities.
protected void findNamedEntities(final ContentItem ci, final AnalysedText at, final String text, final String lang, final TokenNameFinderModel nameFinderModel) {
if (ci == null) {
throw new IllegalArgumentException("Parsed ContentItem MUST NOT be NULL");
}
if (at == null && text == null) {
log.warn("NULL was parsed as AnalysedText AND Text for content item " + ci.getUri() + ". One of the two MUST BE present! -> call ignored");
return;
}
final Language language;
if (lang != null && !lang.isEmpty()) {
language = new Language(lang);
} else {
language = null;
}
if (log.isDebugEnabled()) {
log.debug("findNamedEntities model={}, language={}, text=", new Object[] { nameFinderModel, language, StringUtils.abbreviate(at != null ? at.getSpan() : text, 100) });
}
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
Map<String, List<NameOccurrence>> entityNames;
if (at != null) {
entityNames = extractNameOccurrences(nameFinderModel, at, lang);
} else {
entityNames = extractNameOccurrences(nameFinderModel, text, lang);
}
// lock the ContentItem while writing the RDF data for found Named Entities
ci.getLock().writeLock().lock();
try {
Map<String, IRI> previousAnnotations = new LinkedHashMap<String, IRI>();
for (Map.Entry<String, List<NameOccurrence>> nameInContext : entityNames.entrySet()) {
String name = nameInContext.getKey();
List<NameOccurrence> occurrences = nameInContext.getValue();
IRI firstOccurrenceAnnotation = null;
for (NameOccurrence occurrence : occurrences) {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(name, language)));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.context, language)));
if (occurrence.type != null) {
g.add(new TripleImpl(textAnnotation, DC_TYPE, occurrence.type));
}
if (occurrence.confidence != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(occurrence.confidence)));
}
if (occurrence.start != null && occurrence.end != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occurrence.start)));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occurrence.end)));
}
// name
if (firstOccurrenceAnnotation == null) {
// specific occurrence
for (Map.Entry<String, IRI> entry : previousAnnotations.entrySet()) {
if (entry.getKey().contains(name)) {
// we have found a most specific previous
// occurrence, use it as subsumption target
firstOccurrenceAnnotation = entry.getValue();
g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
break;
}
}
if (firstOccurrenceAnnotation == null) {
// no most specific previous occurrence, I am the first,
// most specific occurrence to be later used as a target
firstOccurrenceAnnotation = textAnnotation;
previousAnnotations.put(name, textAnnotation);
}
} else {
// I am referring to a most specific first occurrence of the
// same name
g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
}
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class IndexedGraphTest method createGraph.
private static void createGraph(Collection<Triple> tc, int triples, Long seed) {
Random rnd = new Random();
if (seed != null) {
rnd.setSeed(seed);
}
LiteralFactory lf = LiteralFactory.getInstance();
// randoms are in the range [0..3]
// literal
double l = 1.0;
// int
double i = l / 3;
// double
double d = l * 2 / 3;
// bNode
double b = 2.0;
// create new bNode
double nb = b - (l * 2 / 3);
double random;
BlankNodeOrIRI subject = null;
IRI predicate = null;
List<IRI> predicateList = new ArrayList<IRI>();
predicateList.add(RDF.first);
predicateList.add(RDF.rest);
predicateList.add(RDF.type);
predicateList.add(RDFS.label);
predicateList.add(RDFS.comment);
predicateList.add(RDFS.range);
predicateList.add(RDFS.domain);
predicateList.add(FOAF.name);
predicateList.add(FOAF.nick);
predicateList.add(FOAF.homepage);
predicateList.add(FOAF.age);
predicateList.add(FOAF.depiction);
String URI_PREFIX = "http://www.test.org/bigGraph/ref";
Language DE = new Language("de");
Language EN = new Language("en");
Iterator<IRI> predicates = predicateList.iterator();
List<BlankNode> bNodes = new ArrayList<BlankNode>();
bNodes.add(new BlankNode());
for (int count = 0; tc.size() < triples; count++) {
random = rnd.nextDouble() * 3;
if (random >= 2.5 || count == 0) {
if (random <= 2.75) {
subject = new IRI(URI_PREFIX + count);
} else {
int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
subject = bNodes.get(rndIndex);
}
}
if (random > 2.0 || count == 0) {
if (!predicates.hasNext()) {
Collections.shuffle(predicateList, rnd);
predicates = predicateList.iterator();
}
predicate = predicates.next();
}
if (random <= l) {
// literal
if (random <= i) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
} else if (random <= d) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
} else {
Literal text;
if (random <= i) {
text = new PlainLiteralImpl("Literal for " + count);
} else if (random <= d) {
text = new PlainLiteralImpl("An English literal for " + count, EN);
} else {
text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
}
tc.add(new TripleImpl(subject, predicate, text));
}
} else if (random <= b) {
// bnode
BlankNode bnode;
if (random <= nb) {
bnode = new BlankNode();
bNodes.add(bnode);
} else {
// >nb <b
int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
bnode = bNodes.get(rndIndex);
}
tc.add(new TripleImpl(subject, predicate, bnode));
} else {
// IRI
tc.add(new TripleImpl(subject, predicate, new IRI(URI_PREFIX + count * random)));
}
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class LocationEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
IRI contentItemId = ci.getUri();
Graph graph = ci.getMetadata();
LiteralFactory literalFactory = LiteralFactory.getInstance();
// get all the textAnnotations
/*
* this Map holds the name as key and all the text annotations of
* dc:type dbpedia:Place that select this name as value
* this map is used to avoid multiple lookups for text annotations
* selecting the same name.
*/
Map<String, Collection<BlankNodeOrIRI>> name2placeEnhancementMap = new HashMap<String, Collection<BlankNodeOrIRI>>();
Iterator<Triple> iterator = graph.filter(null, DC_TYPE, DBPEDIA_PLACE);
while (iterator.hasNext()) {
// the enhancement annotating an place
BlankNodeOrIRI placeEnhancement = iterator.next().getSubject();
// this can still be an TextAnnotation of an EntityAnnotation
// so we need to filter TextAnnotation
Triple isTextAnnotation = new TripleImpl(placeEnhancement, RDF_TYPE, ENHANCER_TEXTANNOTATION);
if (graph.contains(isTextAnnotation)) {
// now get the name
String name = EnhancementEngineHelper.getString(graph, placeEnhancement, ENHANCER_SELECTED_TEXT);
if (name == null) {
log.warn("Unable to process TextAnnotation " + placeEnhancement + " because property" + ENHANCER_SELECTED_TEXT + " is not present");
} else {
Collection<BlankNodeOrIRI> placeEnhancements = name2placeEnhancementMap.get(name);
if (placeEnhancements == null) {
placeEnhancements = new ArrayList<BlankNodeOrIRI>();
name2placeEnhancementMap.put(name, placeEnhancements);
}
placeEnhancements.add(placeEnhancement);
}
} else {
// TODO: if we also ant to process EntityAnnotations with the dc:type dbpedia:Place
// than we need to parse the name based on the enhancer:entity-name property
}
}
// Now we do have all the names we need to lookup
Map<SearchRequestPropertyEnum, Collection<String>> requestParams = new EnumMap<SearchRequestPropertyEnum, Collection<String>>(SearchRequestPropertyEnum.class);
if (getMaxLocationEnhancements() != null) {
requestParams.put(SearchRequestPropertyEnum.maxRows, Collections.singleton(getMaxLocationEnhancements().toString()));
}
for (Map.Entry<String, Collection<BlankNodeOrIRI>> entry : name2placeEnhancementMap.entrySet()) {
List<Toponym> results;
try {
requestParams.put(SearchRequestPropertyEnum.name, Collections.singleton(entry.getKey()));
results = geonamesService.searchToponyms(requestParams);
} catch (Exception e) {
/*
* TODO: Review if it makes sense to catch here for each name, or
* to catch the whole loop.
* This depends if single requests can result in Exceptions
* (e.g. because of encoding problems) or if usually Exceptions
* are thrown because of general things like connection issues
* or service unavailability.
*/
throw new EngineException(this, ci, e);
}
if (results != null) {
Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
for (Toponym result : results) {
log.debug("process result {} {}", result.getGeoNameId(), result.getName());
Double score = getToponymScore(result, maxScore);
log.debug(" > score {}", score);
if (score != null) {
if (score < minScore) {
// if score is lower than the under bound, than stop
break;
}
} else {
log.warn("NULL returned as Score for " + result.getGeoNameId() + " " + result.getName());
/*
* NOTE: If score is not present all suggestions are
* added as enhancements to the metadata of the content
* item.
*/
}
// write the enhancement!
BlankNodeOrIRI locationEnhancement = writeEntityEnhancement(contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
log.debug(" > {} >= {}", score, minHierarchyScore);
if (score != null && score >= minHierarchyScore) {
log.debug(" > getHierarchy for {} {}", result.getGeoNameId(), result.getName());
// get the hierarchy
try {
Iterator<Toponym> hierarchy = getHierarchy(result).iterator();
for (int level = 0; hierarchy.hasNext(); level++) {
Toponym hierarchyEntry = hierarchy.next();
// maybe add an configuration
if (level == 0) {
// Mother earth -> ignore
continue;
}
// write it as dependent to the locationEnhancement
if (result.getGeoNameId() != hierarchyEntry.getGeoNameId()) {
// TODO: add additional checks based on possible
// configuration here!
log.debug(" - write hierarchy {} {}", hierarchyEntry.getGeoNameId(), hierarchyEntry.getName());
/*
* The hierarchy service dose not provide a score, because it would be 1.0
* so we need to set the score to this value.
* Currently is is set to the value of the suggested entry
*/
writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry, null, Collections.singletonList(locationEnhancement), 1.0);
}
}
} catch (Exception e) {
log.warn("Unable to get Hierarchy for " + result.getGeoNameId() + " " + result.getName(), e);
}
}
}
}
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class CeliLemmatizerEnhancementEngine method addMorphoAnalysisEnhancement.
private void addMorphoAnalysisEnhancement(ContentItem ci, String text, String language, Graph g) throws EngineException {
// clerezza language for PlainLiterals
Language lang = new Language(language);
List<LexicalEntry> terms;
try {
terms = this.client.performMorfologicalAnalysis(text, language);
} catch (IOException e) {
throw new EngineException("Error while calling the CELI Lemmatizer" + " service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI lemmatizer service!", e);
}
// get a write lock before writing the enhancements
ci.getLock().writeLock().lock();
try {
LiteralFactory literalFactory = LiteralFactory.getInstance();
for (LexicalEntry le : terms) {
List<CeliMorphoFeatures> mFeatures = this.convertLexicalEntryToMorphFeatures(le, language);
for (CeliMorphoFeatures feat : mFeatures) {
// Create a text annotation for each interpretation produced by the morphological analyzer
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(le.getWordForm(), lang)));
if (le.from >= 0 && le.to > 0) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(le.from)));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(le.to)));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, le.getWordForm(), le.from), lang)));
}
g.addAll(feat.featuresAsTriples(textAnnotation, lang));
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
Aggregations