Search in sources :

Example 1 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliAnalyzedTextLemmatizerEngineTest method testEngineDe.

@Test
public void testEngineDe() throws IOException, EngineException {
    ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
    Assert.assertNotNull(ci);
    AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
    Assert.assertNotNull(at);
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
    Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
    // Add some Tokens with POS annotations to test the usage of
    // existing POS annotations by the lemmatizer
    Token verbrachten = at.addToken(de_verbStart, de_verbStart + de_verb.length());
    verbrachten.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb), de_verbProb));
    Token schonen = at.addToken(de_adjectiveStart, de_adjectiveStart + de_adjective.length());
    schonen.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ADJ", LexicalCategory.Adjective), de_adjectiveProb));
    Token urlaub = at.addToken(de_nounStart, de_nounStart + de_noun.length());
    urlaub.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NC", LexicalCategory.Noun), de_nounProb));
    Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
    // compute the enhancements
    try {
        engine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        // deactivate test
        return;
    }
    // now validate the enhancements
    boolean foundVerb = false;
    boolean foundAdjective = false;
    boolean foundNoun = false;
    for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
        Token token = tokens.next();
        log.info("Token: {}", token);
        List<Value<MorphoFeatures>> mfs = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
        if (de_verb.equals(token.getSpan())) {
            foundVerb = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Verb, de_verbProb);
        } else if (de_adjective.equals(token.getSpan())) {
            foundAdjective = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Adjective, de_adjectiveProb);
        } else if (de_noun.equals(token.getSpan())) {
            foundNoun = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Noun, de_nounProb);
        }
        for (Value<MorphoFeatures> mf : mfs) {
            log.info("  - {}", mf);
            Assert.assertNotNull(mf.value().getLemma());
        }
    }
    Assert.assertTrue("No MorphoFeatures found for '" + de_verb + "'!", foundVerb);
    Assert.assertTrue("No MorphoFeatures found for '" + de_adjective + "'!", foundAdjective);
    Assert.assertTrue("No MorphoFeatures found for '" + de_noun + "'!", foundNoun);
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Token(org.apache.stanbol.enhancer.nlp.model.Token) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) Value(org.apache.stanbol.enhancer.nlp.model.annotation.Value) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) MorphoFeatures(org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 2 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.

private void testInput(String txt, String lang) throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(txt);
    try {
        // add a simple triple to statically define the language of the test content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
        nerEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
        int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
        log.info(textAnnoNum + " TextAnnotations found ...");
        int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        log.info(entityAnnoNum + " EntityAnnotations found ...");
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 3 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliSentimentAnalysisEngineTest method testInput.

private void testInput(String txt, String lang) throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(txt);
    try {
        // add a simple triple to statically define the language of the test content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
        sentimentAnalysisEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
        expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
        int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
        log.info(textAnnoNum + " TextAnnotations found ...");
        assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
        int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 4 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method doPostRequest.

/**
 * Sends a POST request to the DBpediaSpotlight url.
 *
 * @param text
 *            a <code>String</code> with the text to be analyzed
 * @param xmlTextAnnotations
 * @param textAnnotations
 * @param contentItemUri the URI of the {@link ContentItem} (only
 * used for logging in case of an error)
 * @return a <code>String</code> with the server response
 * @throws EngineException
 *             if the request cannot be sent
 */
protected Collection<Annotation> doPostRequest(String text, String xmlTextAnnotations, IRI contentItemUri) throws EngineException {
    HttpURLConnection connection = null;
    BufferedWriter wr = null;
    try {
        connection = (HttpURLConnection) spotlightUrl.openConnection();
        connection.setRequestMethod("POST");
        connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
        connection.setRequestProperty("Accept", "text/xml");
        // set ConnectionTimeout (if configured)
        if (connectionTimeout > 0) {
            connection.setConnectTimeout(connectionTimeout * 1000);
            connection.setReadTimeout(connectionTimeout * 1000);
        }
        connection.setUseCaches(false);
        connection.setDoInput(true);
        connection.setDoOutput(true);
        // Send request
        wr = new BufferedWriter(new OutputStreamWriter(connection.getOutputStream(), UTF8));
    } catch (IOException e) {
        IOUtils.closeQuietly(wr);
        throw new EngineException("Unable to open connection to " + spotlightUrl, e);
    }
    try {
        wr.write("spotter=SpotXmlParser&");
        if (spotlightDisambiguator != null && !spotlightDisambiguator.isEmpty()) {
            wr.write("disambiguator=");
            wr.write(URLEncoder.encode(spotlightDisambiguator, "UTF-8"));
            wr.write('&');
        }
        if (spotlightTypesRestriction != null && !spotlightTypesRestriction.isEmpty()) {
            wr.write("types=");
            wr.write(URLEncoder.encode(spotlightTypesRestriction, "UTF-8"));
            wr.write('&');
        }
        if (spotlightSupport != null && !spotlightSupport.isEmpty()) {
            wr.write("support=");
            wr.write(URLEncoder.encode(spotlightSupport, "UTF-8"));
            wr.write('&');
        }
        if (spotlightConfidence != null && !spotlightConfidence.isEmpty()) {
            wr.write("confidence=");
            wr.write(URLEncoder.encode(spotlightConfidence, "UTF-8"));
            wr.write('&');
        }
        if (spotlightSparql != null && !spotlightSparql.isEmpty() && spotlightTypesRestriction == null) {
            wr.write("sparql=");
            wr.write(URLEncoder.encode(spotlightSparql, "UTF-8"));
            wr.write('&');
        }
        wr.write("text=");
        wr.write(URLEncoder.encode(xmlTextAnnotations, "UTF-8"));
    } catch (UnsupportedEncodingException e) {
        throw new IllegalStateException("The platform does not support encoding " + UTF8.name(), e);
    } catch (IOException e) {
        throw new EngineException("Unable to write 'plain/text' content " + "for ContentItem " + contentItemUri + " to " + spotlightUrl, e);
    } finally {
        IOUtils.closeQuietly(wr);
    }
    InputStream is = null;
    Document xmlDoc;
    try {
        // Get Response
        is = connection.getInputStream();
        xmlDoc = loadXMLFromInputStream(is);
    } catch (IOException e) {
        throw new EngineException("Unable to spot Entities with" + "Dbpedia Spotlight Annotate RESTful Serice running at " + spotlightUrl, e);
    } catch (SAXException e) {
        throw new EngineException("Unable to parse Response from " + "Dbpedia Spotlight Annotate RESTful Serice running at " + spotlightUrl, e);
    } finally {
        IOUtils.closeQuietly(is);
    }
    return Annotation.parseAnnotations(xmlDoc);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) XMLParser.loadXMLFromInputStream(org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream) InputStream(java.io.InputStream) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) Document(org.w3c.dom.Document) BufferedWriter(java.io.BufferedWriter) SAXException(org.xml.sax.SAXException)

Example 5 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliSentimentAnalysisEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (language == null) {
        throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    // used for the palin literals in TextAnnotations
    Language lang = new Language(language);
    try {
        List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language);
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        Graph g = ci.getMetadata();
        for (SentimentExpression se : lista) {
            try {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                // add selected text as PlainLiteral in the language extracted from the text
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(), lang)));
                g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION));
                if (se.getStartSnippet() != null && se.getEndSnippet() != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang)));
                    g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue())));
                }
            } catch (NoConvertorException e) {
                log.error(e.getMessage(), e);
            }
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)55 IRI (org.apache.clerezza.commons.rdf.IRI)37 IOException (java.io.IOException)33 Graph (org.apache.clerezza.commons.rdf.Graph)24 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)23 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)20 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)15 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)15 HashMap (java.util.HashMap)13 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)13 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)12 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)10 InvalidContentException (org.apache.stanbol.enhancer.servicesapi.InvalidContentException)10 Test (org.junit.Test)10 Triple (org.apache.clerezza.commons.rdf.Triple)9 InputStream (java.io.InputStream)8 SOAPException (javax.xml.soap.SOAPException)8 Token (org.apache.stanbol.enhancer.nlp.model.Token)8 Language (org.apache.clerezza.commons.rdf.Language)7 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)7