use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class DBPSpotlightAnnotateEnhancementTest method initTest.
@Before
public void initTest() throws IOException {
//create the contentItem for testing
ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
assertNotNull(ci);
textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
assertNotNull(textContentPart);
//add the language of the text
ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class DBPSpotlightCandidatesEnhancementTest method initTest.
@Before
public void initTest() throws IOException {
//create the contentItem for testing
ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
assertNotNull(ci);
textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
assertNotNull(textContentPart);
//add the language of the text
ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliLanguageIdentifierEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
try {
String[] tmps = text.split(" ");
List<GuessedLanguage> lista = null;
if (tmps.length > 5)
lista = this.client.guessLanguage(text);
else
lista = this.client.guessQueryLanguage(text);
Graph g = ci.getMetadata();
//in ENHANCE_ASYNC we need to use read/write locks on the ContentItem
ci.getLock().writeLock().lock();
try {
GuessedLanguage gl = lista.get(0);
IRI textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(gl.getLang())));
g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(gl.getConfidence())));
g.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
} finally {
ci.getLock().writeLock().unlock();
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI language" + " identifier service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI language identifier service!", e);
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.
/**
* The method adds the returned DBpedia Spotlight annotations to the content
* item's metadata. For each DBpedia resource an EntityAnnotation is created
* and linked to the according TextAnnotation.
*
* @param occs
* a Collection of entity information
* @param ci
* the content item
*/
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
for (Annotation occ : occs) {
if (textAnnotationsMap.get(occ.surfaceForm) != null) {
IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
Graph model = ci.getMetadata();
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
entityAnnotationMap.put(occ.uri, entityAnnotation);
Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
Collection<String> t = occ.getTypeNames();
if (t != null) {
Iterator<String> it = t.iterator();
while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
}
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
}
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class NEREngineCore method findNamedEntities.
protected void findNamedEntities(final ContentItem ci, final AnalysedText at, final String text, final String lang, final TokenNameFinderModel nameFinderModel) {
if (ci == null) {
throw new IllegalArgumentException("Parsed ContentItem MUST NOT be NULL");
}
if (at == null && text == null) {
log.warn("NULL was parsed as AnalysedText AND Text for content item " + ci.getUri() + ". One of the two MUST BE present! -> call ignored");
return;
}
final Language language;
if (lang != null && !lang.isEmpty()) {
language = new Language(lang);
} else {
language = null;
}
if (log.isDebugEnabled()) {
log.debug("findNamedEntities model={}, language={}, text=", new Object[] { nameFinderModel, language, StringUtils.abbreviate(at != null ? at.getSpan() : text, 100) });
}
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
Map<String, List<NameOccurrence>> entityNames;
if (at != null) {
entityNames = extractNameOccurrences(nameFinderModel, at, lang);
} else {
entityNames = extractNameOccurrences(nameFinderModel, text, lang);
}
//lock the ContentItem while writing the RDF data for found Named Entities
ci.getLock().writeLock().lock();
try {
Map<String, IRI> previousAnnotations = new LinkedHashMap<String, IRI>();
for (Map.Entry<String, List<NameOccurrence>> nameInContext : entityNames.entrySet()) {
String name = nameInContext.getKey();
List<NameOccurrence> occurrences = nameInContext.getValue();
IRI firstOccurrenceAnnotation = null;
for (NameOccurrence occurrence : occurrences) {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(name, language)));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.context, language)));
if (occurrence.type != null) {
g.add(new TripleImpl(textAnnotation, DC_TYPE, occurrence.type));
}
if (occurrence.confidence != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(occurrence.confidence)));
}
if (occurrence.start != null && occurrence.end != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occurrence.start)));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occurrence.end)));
}
// name
if (firstOccurrenceAnnotation == null) {
// specific occurrence
for (Map.Entry<String, IRI> entry : previousAnnotations.entrySet()) {
if (entry.getKey().contains(name)) {
// we have found a most specific previous
// occurrence, use it as subsumption target
firstOccurrenceAnnotation = entry.getValue();
g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
break;
}
}
if (firstOccurrenceAnnotation == null) {
// no most specific previous occurrence, I am the first,
// most specific occurrence to be later used as a target
firstOccurrenceAnnotation = textAnnotation;
previousAnnotations.put(name, textAnnotation);
}
} else {
// I am referring to a most specific first occurrence of the
// same name
g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
}
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
Aggregations