Search in sources :

Example 16 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliClassificationEnhancementEngineTest method tesetEngine.

@Test
public void tesetEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    try {
        //add a simple triple to statically define the language of the test
        //content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
        //unit test should not depend on each other (if possible)
        //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
        classificationEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(classificationEngine.getClass().getName()));
        int textAnnoNum = EnhancementStructureHelper.validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
        assertEquals("Only a single fise:TextAnnotation is expeted", 1, textAnnoNum);
        int numTopicAnnotations = validateAllTopicAnnotations(ci.getMetadata(), expectedValues);
        assertTrue("No TpocisAnnotations found", numTopicAnnotations > 0);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 17 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliLanguageIdentifierEnhancementEngineTest method tesetEngine.

@Test
public void tesetEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    try {
        langIdentifier.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdentifier.getClass().getName()));
        int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
        assertEquals("A single TextAnnotation is expected by this Test", 1, numTextAnnotations);
        //even through this tests do not validate service quality but rather
        //the correct integration of the CELI service as EnhancementEngine
        //we expect the "fr" is detected for the parsed text
        assertEquals("The detected language for text '" + TEXT + "' MUST BE 'fr'", "fr", EnhancementEngineHelper.getLanguage(ci));
        int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 18 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliAnalyzedTextLemmatizerEngineTest method testEngineDe.

@Test
public void testEngineDe() throws IOException, EngineException {
    ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
    Assert.assertNotNull(ci);
    AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
    Assert.assertNotNull(at);
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
    Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
    //Add some Tokens with POS annotations to test the usage of
    //existing POS annotations by the lemmatizer
    Token verbrachten = at.addToken(de_verbStart, de_verbStart + de_verb.length());
    verbrachten.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb), de_verbProb));
    Token schonen = at.addToken(de_adjectiveStart, de_adjectiveStart + de_adjective.length());
    schonen.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ADJ", LexicalCategory.Adjective), de_adjectiveProb));
    Token urlaub = at.addToken(de_nounStart, de_nounStart + de_noun.length());
    urlaub.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NC", LexicalCategory.Noun), de_nounProb));
    Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
    //compute the enhancements
    try {
        engine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        //deactivate test
        return;
    }
    //now validate the enhancements
    boolean foundVerb = false;
    boolean foundAdjective = false;
    boolean foundNoun = false;
    for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
        Token token = tokens.next();
        log.info("Token: {}", token);
        List<Value<MorphoFeatures>> mfs = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
        if (de_verb.equals(token.getSpan())) {
            foundVerb = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Verb, de_verbProb);
        } else if (de_adjective.equals(token.getSpan())) {
            foundAdjective = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Adjective, de_adjectiveProb);
        } else if (de_noun.equals(token.getSpan())) {
            foundNoun = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Noun, de_nounProb);
        }
        for (Value<MorphoFeatures> mf : mfs) {
            log.info("  - {}", mf);
            Assert.assertNotNull(mf.value().getLemma());
        }
    }
    Assert.assertTrue("No MorphoFeatures found for '" + de_verb + "'!", foundVerb);
    Assert.assertTrue("No MorphoFeatures found for '" + de_adjective + "'!", foundAdjective);
    Assert.assertTrue("No MorphoFeatures found for '" + de_noun + "'!", foundNoun);
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Token(org.apache.stanbol.enhancer.nlp.model.Token) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) Value(org.apache.stanbol.enhancer.nlp.model.annotation.Value) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) MorphoFeatures(org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 19 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.

@Test
public void testCompleteMorphoAnalysis() throws Exception {
    ContentItem ci = wrapAsContentItem(TERM);
    //add a simple triple to statically define the language of the test
    //content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    //validate enhancements
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    // test if a textAnnotation is present
    //assertTrue(textAnnotationIterator.hasNext()); 
    //  -> this might be used to test that there are no TextAnnotations
    int textAnnotationCount = 0;
    while (textAnnotationIterator.hasNext()) {
        IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
        textAnnotationCount++;
        //perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
        validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
    }
    log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
    int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    //no EntityAnnotations expected
    Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 20 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testEngine.

@Test
public void testEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    //add a simple triple to statically define the language of the test
    //content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    //unit test should not depend on each other (if possible)
    //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    //validate enhancement
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
    assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
    assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    //validate the enhancement metadata
    validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
    //validate the lemma form TextAnnotation
    int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
    assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)73 Test (org.junit.Test)62 IRI (org.apache.clerezza.commons.rdf.IRI)46 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)18 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)18 HashMap (java.util.HashMap)15 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)15 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)15 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)12 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)11 Graph (org.apache.clerezza.commons.rdf.Graph)8 Date (java.util.Date)6 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)6 Hashtable (java.util.Hashtable)5 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 MediaType (javax.ws.rs.core.MediaType)3 Triple (org.apache.clerezza.commons.rdf.Triple)3