use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testEngine.
@Test
public void testEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
//unit test should not depend on each other (if possible)
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
//validate enhancement
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
//validate the enhancement metadata
validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
//validate the lemma form TextAnnotation
int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
//add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
nerEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.
/**
* The method adds the returned DBpedia Spotlight annotations to the content
* item's metadata. For each DBpedia resource an EntityAnnotation is created
* and linked to the according TextAnnotation.
*
* @param occs
* a Collection of entity information
* @param ci
* the content item
*/
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
for (Annotation occ : occs) {
if (textAnnotationsMap.get(occ.surfaceForm) != null) {
IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
Graph model = ci.getMetadata();
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
entityAnnotationMap.put(occ.uri, entityAnnotation);
Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
Collection<String> t = occ.getTypeNames();
if (t != null) {
Iterator<String> it = t.iterator();
while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
}
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
}
}
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class TestNamedEntityExtractionEnhancementEngine method testCustomModel.
@Test
public void testCustomModel() throws EngineException, IOException {
ContentItem ci = wrapAsContentItem("urn:test:content-item:single:sentence", EHEALTH, "en");
//this test does not use default models
nerEngine.config.getDefaultModelTypes().clear();
//but instead a custom model provided by the test data
nerEngine.config.addCustomNameFinderModel("en", "bionlp2004-DNA-en.bin");
nerEngine.config.setMappedType("DNA", new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
nerEngine.computeEnhancements(ci);
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//and dc:type values MUST be the URI set as mapped type
expectedValues.put(Properties.DC_TYPE, new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
Graph g = ci.getMetadata();
int textAnnotationCount = validateAllTextAnnotations(g, EHEALTH, expectedValues);
assertEquals(7, textAnnotationCount);
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class TestMetaxaCore method printTriples.
/**
* This prints out the Stanbol Enhancer triples that would be created for the metadata
* contained in the given model.
*
* @param m a {@link Model}
*
* @return an {@code int} with the number of added triples
*/
private int printTriples(Model m) {
int tripleCounter = 0;
HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
ClosableIterator<Statement> it = m.iterator();
while (it.hasNext()) {
Statement oneStmt = it.next();
BlankNodeOrIRI subject = (BlankNodeOrIRI) MetaxaEngine.asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) MetaxaEngine.asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = MetaxaEngine.asClerezzaResource(oneStmt.getObject(), blankNodeMap);
if (null != subject && null != predicate && null != object) {
Triple t = new TripleImpl(subject, predicate, object);
LOG.debug("adding " + t);
tripleCounter++;
} else {
LOG.debug("skipped " + oneStmt.toString());
}
}
it.close();
return tripleCounter;
}
Aggregations