Search in sources :

Example 51 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method getSpottedXml.

private String getSpottedXml(String text, Graph graph) {
    StringBuilder xml = new StringBuilder();
    textAnnotationsMap = new Hashtable<String, IRI>();
    xml.append(String.format("<annotation text=\"%s\">", text));
    try {
        for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
            // Triple tAnnotation = it.next();
            IRI uri = (IRI) it.next().getSubject();
            String surfaceForm = EnhancementEngineHelper.getString(graph, uri, ENHANCER_SELECTED_TEXT);
            if (surfaceForm != null) {
                String offset = EnhancementEngineHelper.getString(graph, uri, ENHANCER_START);
                textAnnotationsMap.put(surfaceForm, uri);
                xml.append(String.format("<surfaceForm name=\"%s\" offset=\"%s\"/>", surfaceForm, offset));
            }
        }
    } catch (Exception e) {
        log.error(e.getMessage());
    }
    return xml.append("</annotation>").toString();
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) ConfigurationException(org.osgi.service.cm.ConfigurationException) SAXException(org.xml.sax.SAXException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException)

Example 52 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class CeliSentimentAnalysisEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (language == null) {
        throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    // used for the palin literals in TextAnnotations
    Language lang = new Language(language);
    try {
        List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language);
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        Graph g = ci.getMetadata();
        for (SentimentExpression se : lista) {
            try {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                // add selected text as PlainLiteral in the language extracted from the text
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(), lang)));
                g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION));
                if (se.getStartSnippet() != null && se.getEndSnippet() != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang)));
                    g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue())));
                }
            } catch (NoConvertorException e) {
                log.error(e.getMessage(), e);
            }
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 53 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class CeliLanguageIdentifierEnhancementEngineTest method tesetEngine.

@Test
public void tesetEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    try {
        langIdentifier.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdentifier.getClass().getName()));
        int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
        assertEquals("A single TextAnnotation is expected by this Test", 1, numTextAnnotations);
        // even through this tests do not validate service quality but rather
        // the correct integration of the CELI service as EnhancementEngine
        // we expect the "fr" is detected for the parsed text
        assertEquals("The detected language for text '" + TEXT + "' MUST BE 'fr'", "fr", EnhancementEngineHelper.getLanguage(ci));
        int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 54 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method validateMorphoFeatureProperty.

/**
 * [1..*] values of an {@link TypedLiteral} in the form {key=value}
 * @param enhancements The graph with the enhancements
 * @param textAnnotation the TextAnnotation to check
 */
private void validateMorphoFeatureProperty(Graph enhancements, BlankNodeOrIRI textAnnotation) {
    // This taste checks for known morpho features of a given input (constant TERM)
    Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, RDF_TYPE, null);
    assertTrue("No POS Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    while (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
        String feature = ((IRI) morphoFeature).getUnicodeString();
        assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
        if (feature.startsWith(OLIA_NAMESPACE)) {
            String key = feature.substring(OLIA_NAMESPACE.length());
            LexicalCategory cat = LexicalCategory.valueOf(key);
            assertTrue("Part of Speech of " + TERM + " should be " + LexicalCategory.Noun, (cat == LexicalCategory.Noun));
        }
    }
    morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_GENDER, null);
    assertTrue("No Gender Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    if (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
        String feature = ((IRI) morphoFeature).getUnicodeString();
        assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
        if (feature.startsWith(OLIA_NAMESPACE)) {
            String key = feature.substring(OLIA_NAMESPACE.length());
            Gender cat = Gender.valueOf(key);
            assertTrue("Gender of " + TERM + " should be " + Gender.Feminine, (cat == Gender.Feminine));
        }
    }
    morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_NUMBER, null);
    assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    if (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
        String feature = ((IRI) morphoFeature).getUnicodeString();
        assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
        if (feature.startsWith(OLIA_NAMESPACE)) {
            String key = feature.substring(OLIA_NAMESPACE.length());
            NumberFeature cat = NumberFeature.valueOf(key);
            assertTrue("Number of " + TERM + " should be " + Gender.Feminine, (cat == NumberFeature.Singular));
        }
    }
    morphoFeatureIterator = enhancements.filter(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, null);
    assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    if (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Lemma Forms value are expected of type Literal", morphoFeature instanceof Literal);
        assertFalse("Lemma forms MUST NOT be empty", ((Literal) morphoFeature).getLexicalForm().isEmpty());
        String feature = ((Literal) morphoFeature).getLexicalForm();
        assertTrue("Lemma of " + TERM + " should be " + TERM, (feature.equals(TERM)));
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) NumberFeature(org.apache.stanbol.enhancer.nlp.morpho.NumberFeature) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Gender(org.apache.stanbol.enhancer.nlp.morpho.Gender) LexicalCategory(org.apache.stanbol.enhancer.nlp.pos.LexicalCategory)

Example 55 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.

@Test
public void testCompleteMorphoAnalysis() throws Exception {
    ContentItem ci = wrapAsContentItem(TERM);
    // add a simple triple to statically define the language of the test
    // content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    // validate enhancements
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    // test if a textAnnotation is present
    // assertTrue(textAnnotationIterator.hasNext());
    // -> this might be used to test that there are no TextAnnotations
    int textAnnotationCount = 0;
    while (textAnnotationIterator.hasNext()) {
        IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
        textAnnotationCount++;
        // perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
        validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
    }
    log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
    int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    // no EntityAnnotations expected
    Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)346 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)113 Graph (org.apache.clerezza.commons.rdf.Graph)109 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)104 Triple (org.apache.clerezza.commons.rdf.Triple)88 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)84 Test (org.junit.Test)78 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)58 HashSet (java.util.HashSet)50 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)46 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)39 HashMap (java.util.HashMap)38 IOException (java.io.IOException)37 ArrayList (java.util.ArrayList)37 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)36 Literal (org.apache.clerezza.commons.rdf.Literal)35 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)31 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)29 Recipe (org.apache.stanbol.rules.base.api.Recipe)29 Language (org.apache.clerezza.commons.rdf.Language)24