use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.
@Test
public void testCompleteMorphoAnalysis() throws Exception {
ContentItem ci = wrapAsContentItem(TERM);
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
//validate enhancements
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
// test if a textAnnotation is present
//assertTrue(textAnnotationIterator.hasNext());
// -> this might be used to test that there are no TextAnnotations
int textAnnotationCount = 0;
while (textAnnotationIterator.hasNext()) {
IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
// test if selected Text is added
validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
textAnnotationCount++;
//perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
}
log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
//no EntityAnnotations expected
Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testEngine.
@Test
public void testEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
//unit test should not depend on each other (if possible)
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
//validate enhancement
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
//validate the enhancement metadata
validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
//validate the lemma form TextAnnotation
int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
//add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
nerEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class DereferenceEngineTest method testSyncDereferencing.
@Test
public void testSyncDereferencing() throws Exception {
ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(EnhancementEngine.PROPERTY_NAME, "sync");
dict.put(FILTER_CONTENT_LANGUAGES, false);
dict.put(FILTER_ACCEPT_LANGUAGES, false);
EntityDereferenceEngine engine = new EntityDereferenceEngine(syncDereferencer, new DereferenceEngineConfig(dict, null));
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
engine.computeEnhancements(ci);
validateDereferencedEntities(ci.getMetadata(), ENHANCER_ENTITY_REFERENCE);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class DereferenceEngineTest method testAsyncMultipleEntityReferenceDereferencing.
/**
* Test for <a href="https://issues.apache.org/jira/browse/STANBOL-1334">STANBOL-1334</a>
* @throws Exception
*/
@Test
public void testAsyncMultipleEntityReferenceDereferencing() throws Exception {
ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(EnhancementEngine.PROPERTY_NAME, "async");
dict.put(FILTER_CONTENT_LANGUAGES, false);
dict.put(FILTER_ACCEPT_LANGUAGES, false);
dict.put(ENTITY_REFERENCES, new String[] { OTHER_ENTITY_REFERENCE.getUnicodeString(), ENHANCER_ENTITY_REFERENCE.getUnicodeString() });
DereferenceEngineConfig config = new DereferenceEngineConfig(dict, null);
EntityDereferenceEngine engine = new EntityDereferenceEngine(asyncDereferencer, config);
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
engine.computeEnhancements(ci);
validateDereferencedEntities(ci.getMetadata(), OTHER_ENTITY_REFERENCE, ENHANCER_ENTITY_REFERENCE);
}
Aggregations