use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
//add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
nerEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class DereferenceEngineTest method testSyncDereferencing.
@Test
public void testSyncDereferencing() throws Exception {
ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(EnhancementEngine.PROPERTY_NAME, "sync");
dict.put(FILTER_CONTENT_LANGUAGES, false);
dict.put(FILTER_ACCEPT_LANGUAGES, false);
EntityDereferenceEngine engine = new EntityDereferenceEngine(syncDereferencer, new DereferenceEngineConfig(dict, null));
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
engine.computeEnhancements(ci);
validateDereferencedEntities(ci.getMetadata(), ENHANCER_ENTITY_REFERENCE);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class DereferenceEngineTest method testAsyncMultipleEntityReferenceDereferencing.
/**
* Test for <a href="https://issues.apache.org/jira/browse/STANBOL-1334">STANBOL-1334</a>
* @throws Exception
*/
@Test
public void testAsyncMultipleEntityReferenceDereferencing() throws Exception {
ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(EnhancementEngine.PROPERTY_NAME, "async");
dict.put(FILTER_CONTENT_LANGUAGES, false);
dict.put(FILTER_ACCEPT_LANGUAGES, false);
dict.put(ENTITY_REFERENCES, new String[] { OTHER_ENTITY_REFERENCE.getUnicodeString(), ENHANCER_ENTITY_REFERENCE.getUnicodeString() });
DereferenceEngineConfig config = new DereferenceEngineConfig(dict, null);
EntityDereferenceEngine engine = new EntityDereferenceEngine(asyncDereferencer, config);
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
engine.computeEnhancements(ci);
validateDereferencedEntities(ci.getMetadata(), OTHER_ENTITY_REFERENCE, ENHANCER_ENTITY_REFERENCE);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class DereferenceEngineTest method testOfflineMode.
/**
* Test {@link OfflineMode} functionality
* @throws Exception
*/
@Test
public void testOfflineMode() throws Exception {
ContentItem ci = getContentItem("urn:test:testOfflineMode");
EntityDereferencer onlineDereferencer = new TestDereferencer(null) {
@Override
public boolean supportsOfflineMode() {
return false;
}
};
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(EnhancementEngine.PROPERTY_NAME, "online");
dict.put(FILTER_CONTENT_LANGUAGES, false);
dict.put(FILTER_ACCEPT_LANGUAGES, false);
EntityDereferenceEngine engine = new EntityDereferenceEngine(onlineDereferencer, new DereferenceEngineConfig(dict, null));
//engine in online mode
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
//set engine in offline mode
engine.setOfflineMode(true);
Assert.assertEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TestNamedEntityExtractionEnhancementEngine method testCustomModel.
@Test
public void testCustomModel() throws EngineException, IOException {
ContentItem ci = wrapAsContentItem("urn:test:content-item:single:sentence", EHEALTH, "en");
//this test does not use default models
nerEngine.config.getDefaultModelTypes().clear();
//but instead a custom model provided by the test data
nerEngine.config.addCustomNameFinderModel("en", "bionlp2004-DNA-en.bin");
nerEngine.config.setMappedType("DNA", new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
nerEngine.computeEnhancements(ci);
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//and dc:type values MUST be the URI set as mapped type
expectedValues.put(Properties.DC_TYPE, new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
Graph g = ci.getMetadata();
int textAnnotationCount = validateAllTextAnnotations(g, EHEALTH, expectedValues);
assertEquals(7, textAnnotationCount);
}
Aggregations