Search in sources :

Example 21 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.

private void testInput(String txt, String lang) throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(txt);
    try {
        //add a simple triple to statically define the language of the test content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
        nerEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
        int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
        log.info(textAnnoNum + " TextAnnotations found ...");
        int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        log.info(entityAnnoNum + " EntityAnnotations found ...");
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 22 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class DereferenceEngineTest method testSyncDereferencing.

@Test
public void testSyncDereferencing() throws Exception {
    ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
    Dictionary<String, Object> dict = new Hashtable<String, Object>();
    dict.put(EnhancementEngine.PROPERTY_NAME, "sync");
    dict.put(FILTER_CONTENT_LANGUAGES, false);
    dict.put(FILTER_ACCEPT_LANGUAGES, false);
    EntityDereferenceEngine engine = new EntityDereferenceEngine(syncDereferencer, new DereferenceEngineConfig(dict, null));
    Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    validateDereferencedEntities(ci.getMetadata(), ENHANCER_ENTITY_REFERENCE);
}
Also used : Hashtable(java.util.Hashtable) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 23 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class DereferenceEngineTest method testAsyncMultipleEntityReferenceDereferencing.

/**
     * Test for <a href="https://issues.apache.org/jira/browse/STANBOL-1334">STANBOL-1334</a>
     * @throws Exception
     */
@Test
public void testAsyncMultipleEntityReferenceDereferencing() throws Exception {
    ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
    Dictionary<String, Object> dict = new Hashtable<String, Object>();
    dict.put(EnhancementEngine.PROPERTY_NAME, "async");
    dict.put(FILTER_CONTENT_LANGUAGES, false);
    dict.put(FILTER_ACCEPT_LANGUAGES, false);
    dict.put(ENTITY_REFERENCES, new String[] { OTHER_ENTITY_REFERENCE.getUnicodeString(), ENHANCER_ENTITY_REFERENCE.getUnicodeString() });
    DereferenceEngineConfig config = new DereferenceEngineConfig(dict, null);
    EntityDereferenceEngine engine = new EntityDereferenceEngine(asyncDereferencer, config);
    Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    validateDereferencedEntities(ci.getMetadata(), OTHER_ENTITY_REFERENCE, ENHANCER_ENTITY_REFERENCE);
}
Also used : Hashtable(java.util.Hashtable) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 24 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class DereferenceEngineTest method testOfflineMode.

/**
     * Test {@link OfflineMode} functionality
     * @throws Exception
     */
@Test
public void testOfflineMode() throws Exception {
    ContentItem ci = getContentItem("urn:test:testOfflineMode");
    EntityDereferencer onlineDereferencer = new TestDereferencer(null) {

        @Override
        public boolean supportsOfflineMode() {
            return false;
        }
    };
    Dictionary<String, Object> dict = new Hashtable<String, Object>();
    dict.put(EnhancementEngine.PROPERTY_NAME, "online");
    dict.put(FILTER_CONTENT_LANGUAGES, false);
    dict.put(FILTER_ACCEPT_LANGUAGES, false);
    EntityDereferenceEngine engine = new EntityDereferenceEngine(onlineDereferencer, new DereferenceEngineConfig(dict, null));
    //engine in online mode
    Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
    //set engine in offline mode
    engine.setOfflineMode(true);
    Assert.assertEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
}
Also used : Hashtable(java.util.Hashtable) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 25 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class TestNamedEntityExtractionEnhancementEngine method testCustomModel.

@Test
public void testCustomModel() throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem("urn:test:content-item:single:sentence", EHEALTH, "en");
    //this test does not use default models
    nerEngine.config.getDefaultModelTypes().clear();
    //but instead a custom model provided by the test data
    nerEngine.config.addCustomNameFinderModel("en", "bionlp2004-DNA-en.bin");
    nerEngine.config.setMappedType("DNA", new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
    nerEngine.computeEnhancements(ci);
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
    //adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    //and dc:type values MUST be the URI set as mapped type
    expectedValues.put(Properties.DC_TYPE, new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
    Graph g = ci.getMetadata();
    int textAnnotationCount = validateAllTextAnnotations(g, EHEALTH, expectedValues);
    assertEquals(7, textAnnotationCount);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)73 Test (org.junit.Test)62 IRI (org.apache.clerezza.commons.rdf.IRI)46 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)18 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)18 HashMap (java.util.HashMap)15 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)15 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)15 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)12 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)11 Graph (org.apache.clerezza.commons.rdf.Graph)8 Date (java.util.Date)6 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)6 Hashtable (java.util.Hashtable)5 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 MediaType (javax.ws.rs.core.MediaType)3 Triple (org.apache.clerezza.commons.rdf.Triple)3