use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class OpenCalaisEngine method createEnhancements.
/**
* This generates enhancement structures for the entities from OpenCalais
* and adds them to the content item's metadata.
* For each entity a TextAnnotation and an EntityAnnotation are created.
* An EntityAnnotation can relate to several TextAnnotations.
*
* @param occs a Collection of entity information
* @param ci the content item
*/
public void createEnhancements(Collection<CalaisEntityOccurrence> occs, ContentItem ci) {
LiteralFactory literalFactory = LiteralFactory.getInstance();
// used for plain literals representing parts fo the content
final Language language;
String langString = EnhancementEngineHelper.getLanguage(ci);
if (langString != null && !langString.isEmpty()) {
language = new Language(langString);
} else {
language = null;
}
// TODO create TextEnhancement (form, start, end, type?) and EntityAnnotation (id, name, type)
HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
for (CalaisEntityOccurrence occ : occs) {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
Graph model = ci.getMetadata();
model.add(new TripleImpl(textAnnotation, DC_TYPE, occ.type));
// for autotagger use the name instead of the matched term (that might be a pronoun!)
if (onlyNERMode) {
model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.name, language)));
} else {
model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.exact, language)));
}
model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.length)));
model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occ.context, language)));
// use the relevance as confidence
if (occ.relevance != null && Double.valueOf(0).compareTo(occ.relevance) <= 0) {
// we do not know if the relevance is available (may be NULL)
// or the relevance feature is activated (may be -1)
model.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(occ.relevance)));
}
// create EntityAnnotation only once but add a reference to the textAnnotation
if (entityAnnotationMap.containsKey(occ.id)) {
model.add(new TripleImpl(entityAnnotationMap.get(occ.id), DC_RELATION, textAnnotation));
} else {
if (onlyNERMode) {
// don't create Calais specific entity annotations; let the autotagger do its's own
// but add a pointer to the first text annotation with that name
entityAnnotationMap.put(occ.id, textAnnotation);
} else {
// IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
// entityAnnotationMap.put(occ.id, entityAnnotation);
// model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
// model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, occ.name));
// model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, occ.type));
// model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.id));
}
}
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class TestKuromojiNlpEngine method testEngine.
@Test
public void testEngine() throws EngineException {
LiteralFactory lf = LiteralFactory.getInstance();
Assert.assertEquals(EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(contentItem));
engine.computeEnhancements(contentItem);
// assert the results
Map<IRI, RDFTerm> expected = new HashMap<IRI, RDFTerm>();
expected.put(Properties.DC_CREATOR, lf.createTypedLiteral(engine.getClass().getName()));
expected.put(Properties.ENHANCER_EXTRACTED_FROM, contentItem.getUri());
Assert.assertEquals(16, EnhancementStructureHelper.validateAllTextAnnotations(contentItem.getMetadata(), text, expected));
AnalysedText at = AnalysedTextUtils.getAnalysedText(contentItem);
Assert.assertNotNull(at);
List<Sentence> sentences = AnalysedTextUtils.asList(at.getSentences());
Assert.assertNotNull(sentences);
Assert.assertEquals(7, sentences.size());
// TODO: values in the following arrays are based on the first run of the
// engine. So this is only to detect changes in results. It can not validate
// that the tokenization and NER detections are correct - sorry I do not
// speak Japanese ...
int[] expectedChunks = new int[] { 5, 3, 1, 0, 1, 2, 4 };
int[] expectedTokens = new int[] { 25, 25, 25, 24, 33, 17, 32 };
int sentIndex = 0;
for (Sentence sent : sentences) {
List<Chunk> sentenceNer = AnalysedTextUtils.asList(sent.getChunks());
Assert.assertEquals(expectedChunks[sentIndex], sentenceNer.size());
for (Chunk chunk : sentenceNer) {
Value<NerTag> nerValue = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION);
Assert.assertNotNull(nerValue);
Assert.assertNotNull(nerValue.value().getType());
}
List<Token> tokens = AnalysedTextUtils.asList(sent.getTokens());
Assert.assertEquals(expectedTokens[sentIndex], tokens.size());
for (Token token : tokens) {
Value<PosTag> posValue = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
Assert.assertNotNull(posValue);
}
sentIndex++;
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class ResourceAdapterTest method testFloat.
@Test
public void testFloat() {
Graph graph = new IndexedGraph();
IRI id = new IRI("http://www.example.org/test");
IRI doubleTestField = new IRI("http://www.example.org/field/double");
LiteralFactory lf = LiteralFactory.getInstance();
graph.add(new TripleImpl(id, doubleTestField, lf.createTypedLiteral(Float.NaN)));
graph.add(new TripleImpl(id, doubleTestField, lf.createTypedLiteral(Float.POSITIVE_INFINITY)));
graph.add(new TripleImpl(id, doubleTestField, lf.createTypedLiteral(Float.NEGATIVE_INFINITY)));
RdfValueFactory vf = new RdfValueFactory(graph);
Representation r = vf.createRepresentation(id.getUnicodeString());
Set<Float> expected = new HashSet<Float>(Arrays.asList(Float.NaN, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY));
Iterator<Float> dit = r.get(doubleTestField.getUnicodeString(), Float.class);
while (dit.hasNext()) {
Float val = dit.next();
Assert.assertNotNull(val);
Assert.assertTrue(expected.remove(val));
}
Assert.assertTrue(expected.isEmpty());
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class ResourceAdapterTest method testDouble.
/**
* Test related to STANBOL-698
*/
@Test
public void testDouble() {
Graph graph = new IndexedGraph();
IRI id = new IRI("http://www.example.org/test");
IRI doubleTestField = new IRI("http://www.example.org/field/double");
LiteralFactory lf = LiteralFactory.getInstance();
graph.add(new TripleImpl(id, doubleTestField, lf.createTypedLiteral(Double.NaN)));
graph.add(new TripleImpl(id, doubleTestField, lf.createTypedLiteral(Double.POSITIVE_INFINITY)));
graph.add(new TripleImpl(id, doubleTestField, lf.createTypedLiteral(Double.NEGATIVE_INFINITY)));
RdfValueFactory vf = new RdfValueFactory(graph);
Representation r = vf.createRepresentation(id.getUnicodeString());
Set<Double> expected = new HashSet<Double>(Arrays.asList(Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
Iterator<Double> dit = r.get(doubleTestField.getUnicodeString(), Double.class);
while (dit.hasNext()) {
Double val = dit.next();
Assert.assertNotNull(val);
Assert.assertTrue(expected.remove(val));
}
Assert.assertTrue(expected.isEmpty());
}
Aggregations