Search in sources :

Example 6 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliSentimentAnalysisEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (language == null) {
        throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    // used for the palin literals in TextAnnotations
    Language lang = new Language(language);
    try {
        List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language);
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        Graph g = ci.getMetadata();
        for (SentimentExpression se : lista) {
            try {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                // add selected text as PlainLiteral in the language extracted from the text
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(), lang)));
                g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION));
                if (se.getStartSnippet() != null && se.getEndSnippet() != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang)));
                    g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue())));
                }
            } catch (NoConvertorException e) {
                log.error(e.getMessage(), e);
            }
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 7 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.

@Test
public void testCompleteMorphoAnalysis() throws Exception {
    ContentItem ci = wrapAsContentItem(TERM);
    // add a simple triple to statically define the language of the test
    // content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    // validate enhancements
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    // test if a textAnnotation is present
    // assertTrue(textAnnotationIterator.hasNext());
    // -> this might be used to test that there are no TextAnnotations
    int textAnnotationCount = 0;
    while (textAnnotationIterator.hasNext()) {
        IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
        textAnnotationCount++;
        // perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
        validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
    }
    log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
    int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    // no EntityAnnotations expected
    Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 8 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testEngine.

@Test
public void testEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    // add a simple triple to statically define the language of the test
    // content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    // unit test should not depend on each other (if possible)
    // CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    // validate enhancement
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
    assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
    assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    // validate the enhancement metadata
    validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
    // validate the lemma form TextAnnotation
    int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
    assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 9 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliAnalyzedTextSentimentAnalysisEngineTest method testEngine.

@Test
public void testEngine() throws IOException, EngineException {
    ContentItem ci = ciFactory.createContentItem(new StringSource(text));
    Assert.assertNotNull(ci);
    AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
    Assert.assertNotNull(at);
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
    Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
    // compute the enhancements
    try {
        engine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        // deactivate test
        return;
    }
    // now validate the enhancements
    int sentimentExpressionCnt = 0;
    for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
        Token token = tokens.next();
        log.info("Token: {}", token);
        List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
        if (sentimentExpressionsList != null && sentimentExpressionsList.size() > 0)
            sentimentExpressionCnt++;
    }
    Assert.assertTrue("2 sentiment expressions should be recognized in: " + text, sentimentExpressionCnt == 2);
}
Also used : AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Value(org.apache.stanbol.enhancer.nlp.model.annotation.Value) Token(org.apache.stanbol.enhancer.nlp.model.Token) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) CeliAnalyzedTextLemmatizerEngineTest(org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliAnalyzedTextLemmatizerEngineTest) Test(org.junit.Test)

Example 10 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliClassificationEnhancementEngineTest method tesetEngine.

@Test
public void tesetEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    try {
        // add a simple triple to statically define the language of the test
        // content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
        // unit test should not depend on each other (if possible)
        // CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
        classificationEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(classificationEngine.getClass().getName()));
        int textAnnoNum = EnhancementStructureHelper.validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
        assertEquals("Only a single fise:TextAnnotation is expeted", 1, textAnnoNum);
        int numTopicAnnotations = validateAllTopicAnnotations(ci.getMetadata(), expectedValues);
        assertTrue("No TpocisAnnotations found", numTopicAnnotations > 0);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)143 IRI (org.apache.clerezza.commons.rdf.IRI)104 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)69 Graph (org.apache.clerezza.commons.rdf.Graph)66 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)49 Triple (org.apache.clerezza.commons.rdf.Triple)41 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)26 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)23 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)20 Literal (org.apache.clerezza.commons.rdf.Literal)20 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)20 IOException (java.io.IOException)18 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)17 Test (org.junit.Test)16 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)15 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)14 HashSet (java.util.HashSet)13 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)11