use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class TestEntityLinkingEnhancementEngine method initContentItem.
/**
* Creates and initialises a new content item using {@link #CONTEXT} as
* content and
* @return
* @throws IOException
*/
private ContentItem initContentItem() throws IOException {
ContentItem ci = ciFactory.createContentItem(new IRI("urn:iks-project:enhancer:text:content-item:person"), new StringSource(CONTEXT));
// add three text annotations to be consumed by this test
getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
// add the language
ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
return ci;
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class LocationEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
IRI contentItemId = ci.getUri();
Graph graph = ci.getMetadata();
LiteralFactory literalFactory = LiteralFactory.getInstance();
// get all the textAnnotations
/*
* this Map holds the name as key and all the text annotations of
* dc:type dbpedia:Place that select this name as value
* this map is used to avoid multiple lookups for text annotations
* selecting the same name.
*/
Map<String, Collection<BlankNodeOrIRI>> name2placeEnhancementMap = new HashMap<String, Collection<BlankNodeOrIRI>>();
Iterator<Triple> iterator = graph.filter(null, DC_TYPE, DBPEDIA_PLACE);
while (iterator.hasNext()) {
// the enhancement annotating an place
BlankNodeOrIRI placeEnhancement = iterator.next().getSubject();
// this can still be an TextAnnotation of an EntityAnnotation
// so we need to filter TextAnnotation
Triple isTextAnnotation = new TripleImpl(placeEnhancement, RDF_TYPE, ENHANCER_TEXTANNOTATION);
if (graph.contains(isTextAnnotation)) {
// now get the name
String name = EnhancementEngineHelper.getString(graph, placeEnhancement, ENHANCER_SELECTED_TEXT);
if (name == null) {
log.warn("Unable to process TextAnnotation " + placeEnhancement + " because property" + ENHANCER_SELECTED_TEXT + " is not present");
} else {
Collection<BlankNodeOrIRI> placeEnhancements = name2placeEnhancementMap.get(name);
if (placeEnhancements == null) {
placeEnhancements = new ArrayList<BlankNodeOrIRI>();
name2placeEnhancementMap.put(name, placeEnhancements);
}
placeEnhancements.add(placeEnhancement);
}
} else {
// TODO: if we also ant to process EntityAnnotations with the dc:type dbpedia:Place
// than we need to parse the name based on the enhancer:entity-name property
}
}
// Now we do have all the names we need to lookup
Map<SearchRequestPropertyEnum, Collection<String>> requestParams = new EnumMap<SearchRequestPropertyEnum, Collection<String>>(SearchRequestPropertyEnum.class);
if (getMaxLocationEnhancements() != null) {
requestParams.put(SearchRequestPropertyEnum.maxRows, Collections.singleton(getMaxLocationEnhancements().toString()));
}
for (Map.Entry<String, Collection<BlankNodeOrIRI>> entry : name2placeEnhancementMap.entrySet()) {
List<Toponym> results;
try {
requestParams.put(SearchRequestPropertyEnum.name, Collections.singleton(entry.getKey()));
results = geonamesService.searchToponyms(requestParams);
} catch (Exception e) {
/*
* TODO: Review if it makes sense to catch here for each name, or
* to catch the whole loop.
* This depends if single requests can result in Exceptions
* (e.g. because of encoding problems) or if usually Exceptions
* are thrown because of general things like connection issues
* or service unavailability.
*/
throw new EngineException(this, ci, e);
}
if (results != null) {
Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
for (Toponym result : results) {
log.debug("process result {} {}", result.getGeoNameId(), result.getName());
Double score = getToponymScore(result, maxScore);
log.debug(" > score {}", score);
if (score != null) {
if (score < minScore) {
// if score is lower than the under bound, than stop
break;
}
} else {
log.warn("NULL returned as Score for " + result.getGeoNameId() + " " + result.getName());
/*
* NOTE: If score is not present all suggestions are
* added as enhancements to the metadata of the content
* item.
*/
}
// write the enhancement!
BlankNodeOrIRI locationEnhancement = writeEntityEnhancement(contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
log.debug(" > {} >= {}", score, minHierarchyScore);
if (score != null && score >= minHierarchyScore) {
log.debug(" > getHierarchy for {} {}", result.getGeoNameId(), result.getName());
// get the hierarchy
try {
Iterator<Toponym> hierarchy = getHierarchy(result).iterator();
for (int level = 0; hierarchy.hasNext(); level++) {
Toponym hierarchyEntry = hierarchy.next();
// maybe add an configuration
if (level == 0) {
// Mother earth -> ignore
continue;
}
// write it as dependent to the locationEnhancement
if (result.getGeoNameId() != hierarchyEntry.getGeoNameId()) {
// TODO: add additional checks based on possible
// configuration here!
log.debug(" - write hierarchy {} {}", hierarchyEntry.getGeoNameId(), hierarchyEntry.getName());
/*
* The hierarchy service dose not provide a score, because it would be 1.0
* so we need to set the score to this value.
* Currently is is set to the value of the suggested entry
*/
writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry, null, Collections.singletonList(locationEnhancement), 1.0);
}
}
} catch (Exception e) {
log.warn("Unable to get Hierarchy for " + result.getGeoNameId() + " " + result.getName(), e);
}
}
}
}
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class EntityCoReferenceEngineTest method testSpatialCoref.
@Test
public void testSpatialCoref() throws EngineException, IOException {
ContentItem ci = ciFactory.createContentItem(new StringSource(SPATIAL_TEXT));
Graph graph = ci.getMetadata();
IRI textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, engine);
graph.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl("en")));
graph.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, new PlainLiteralImpl("100.0")));
graph.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
Entry<IRI, Blob> textBlob = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
AnalysedText at = atFactory.createAnalysedText(ci, textBlob.getValue());
Sentence sentence1 = at.addSentence(0, SPATIAL_SENTENCE_1.indexOf(".") + 1);
Chunk angelaMerkel = sentence1.addChunk(0, "Angela Merkel".length());
angelaMerkel.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(new NerTag("Angela Merkel", OntologicalClasses.DBPEDIA_PERSON)));
Sentence sentence2 = at.addSentence(SPATIAL_SENTENCE_1.indexOf(".") + 1, SPATIAL_SENTENCE_1.length() + SPATIAL_SENTENCE_2.indexOf(".") + 1);
int theStartIdx = sentence2.getSpan().indexOf("The");
int germanStartIdx = sentence2.getSpan().indexOf("German");
int chancellorStartIdx = sentence2.getSpan().indexOf("politician");
Token the = sentence2.addToken(theStartIdx, theStartIdx + "The".length());
the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("The", LexicalCategory.PronounOrDeterminer, Pos.Determiner)));
Token german = sentence2.addToken(germanStartIdx, germanStartIdx + "German".length());
german.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("German", LexicalCategory.Adjective)));
Token politician = sentence2.addToken(chancellorStartIdx, chancellorStartIdx + "politician".length());
politician.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("politician", LexicalCategory.Noun)));
Chunk theGermanChancellor = sentence2.addChunk(theStartIdx, chancellorStartIdx + "politician".length());
theGermanChancellor.addAnnotation(NlpAnnotations.PHRASE_ANNOTATION, Value.value(new PhraseTag("The German politician", LexicalCategory.Noun)));
engine.computeEnhancements(ci);
Value<CorefFeature> representativeCorefValue = angelaMerkel.getAnnotation(NlpAnnotations.COREF_ANNOTATION);
Assert.assertNotNull(representativeCorefValue);
CorefFeature representativeCoref = representativeCorefValue.value();
Assert.assertTrue(representativeCoref.isRepresentative());
Assert.assertTrue(representativeCoref.getMentions().contains(theGermanChancellor));
Value<CorefFeature> subordinateCorefValue = theGermanChancellor.getAnnotation(NlpAnnotations.COREF_ANNOTATION);
Assert.assertNotNull(subordinateCorefValue);
CorefFeature subordinateCoref = subordinateCorefValue.value();
Assert.assertTrue(!subordinateCoref.isRepresentative());
Assert.assertTrue(subordinateCoref.getMentions().contains(angelaMerkel));
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class EntityLinkingEngineTest method setUpServices.
@BeforeClass
public static void setUpServices() throws IOException {
searcher = new TestSearcherImpl(TEST_REFERENCED_SITE_NAME, NAME, new SimpleLabelTokenizer());
// add some terms to the searcher
Graph graph = new IndexedGraph();
IRI uri = new IRI("urn:test:PatrickMarshall");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Patrick Marshall")));
graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PERSON));
searcher.addEntity(new Entity(uri, graph));
uri = new IRI("urn:test:Geologist");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologist")));
graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
graph.add(new TripleImpl(uri, REDIRECT, new IRI("urn:test:redirect:Geologist")));
searcher.addEntity(new Entity(uri, graph));
// a redirect
uri = new IRI("urn:test:redirect:Geologist");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologe (redirect)")));
graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
searcher.addEntity(new Entity(uri, graph));
uri = new IRI("urn:test:NewZealand");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("New Zealand")));
graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
searcher.addEntity(new Entity(uri, graph));
uri = new IRI("urn:test:UniversityOfOtago");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago")));
graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
searcher.addEntity(new Entity(uri, graph));
uri = new IRI("urn:test:University");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University")));
graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
searcher.addEntity(new Entity(uri, graph));
uri = new IRI("urn:test:Otago");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
searcher.addEntity(new Entity(uri, graph));
// add a 2nd Otago (Place and University
uri = new IRI("urn:test:Otago_Texas");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago (Texas)")));
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
searcher.addEntity(new Entity(uri, graph));
uri = new IRI("urn:test:UniversityOfOtago_Texas");
graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago (Texas)")));
graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
searcher.addEntity(new Entity(uri, graph));
TEST_ANALYSED_TEXT = AnalysedTextFactory.getDefaultInstance().createAnalysedText(ciFactory.createBlob(new StringSource(TEST_TEXT)));
TEST_ANALYSED_TEXT_WO = AnalysedTextFactory.getDefaultInstance().createAnalysedText(ciFactory.createBlob(new StringSource(TEST_TEXT_WO)));
initAnalyzedText(TEST_ANALYSED_TEXT);
TEST_ANALYSED_TEXT.addChunk(0, "Dr. Patrick Marshall".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
TEST_ANALYSED_TEXT.addToken(4, 11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
TEST_ANALYSED_TEXT.addToken(12, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
initAnalyzedText(TEST_ANALYSED_TEXT_WO);
TEST_ANALYSED_TEXT_WO.addChunk(0, "Dr. Marshall Patrick".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
TEST_ANALYSED_TEXT_WO.addToken(4, 12).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
TEST_ANALYSED_TEXT_WO.addToken(13, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class NIFHelper method writePhrase.
/**
* Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the
* parsed RDF graph by using the segmentUri as subject
* @param graph the graph
* @param annotated the annotated element (e.g. a {@link Chunk})
* @param segmentUri the URI of the resource representing the parsed
* annotated element in the graph
*/
public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) {
Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
if (phraseTag != null) {
IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory());
if (phraseTypeUri != null) {
// add the oliaLink for the Phrase
graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), phraseTypeUri));
graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(phraseTag.probability())));
}
}
}
Aggregations