Search in sources :

Example 6 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliLanguageIdentifierEnhancementEngineTest method tesetEngine.

@Test
public void tesetEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    try {
        langIdentifier.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdentifier.getClass().getName()));
        int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
        assertEquals("A single TextAnnotation is expected by this Test", 1, numTextAnnotations);
        // even through this tests do not validate service quality but rather
        // the correct integration of the CELI service as EnhancementEngine
        // we expect the "fr" is detected for the parsed text
        assertEquals("The detected language for text '" + TEXT + "' MUST BE 'fr'", "fr", EnhancementEngineHelper.getLanguage(ci));
        int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 7 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.

@Test
public void testCompleteMorphoAnalysis() throws Exception {
    ContentItem ci = wrapAsContentItem(TERM);
    // add a simple triple to statically define the language of the test
    // content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    // validate enhancements
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    // test if a textAnnotation is present
    // assertTrue(textAnnotationIterator.hasNext());
    // -> this might be used to test that there are no TextAnnotations
    int textAnnotationCount = 0;
    while (textAnnotationIterator.hasNext()) {
        IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
        textAnnotationCount++;
        // perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
        validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
    }
    log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
    int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    // no EntityAnnotations expected
    Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 8 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testEngine.

@Test
public void testEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    // add a simple triple to statically define the language of the test
    // content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    // unit test should not depend on each other (if possible)
    // CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    // validate enhancement
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
    assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
    assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    // validate the enhancement metadata
    validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
    // validate the lemma form TextAnnotation
    int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
    assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 9 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class CeliAnalyzedTextSentimentAnalysisEngineTest method testEngine.

@Test
public void testEngine() throws IOException, EngineException {
    ContentItem ci = ciFactory.createContentItem(new StringSource(text));
    Assert.assertNotNull(ci);
    AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
    Assert.assertNotNull(at);
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
    Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
    // compute the enhancements
    try {
        engine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        // deactivate test
        return;
    }
    // now validate the enhancements
    int sentimentExpressionCnt = 0;
    for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
        Token token = tokens.next();
        log.info("Token: {}", token);
        List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
        if (sentimentExpressionsList != null && sentimentExpressionsList.size() > 0)
            sentimentExpressionCnt++;
    }
    Assert.assertTrue("2 sentiment expressions should be recognized in: " + text, sentimentExpressionCnt == 2);
}
Also used : AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Value(org.apache.stanbol.enhancer.nlp.model.annotation.Value) Token(org.apache.stanbol.enhancer.nlp.model.Token) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) CeliAnalyzedTextLemmatizerEngineTest(org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliAnalyzedTextLemmatizerEngineTest) Test(org.junit.Test)

Example 10 with EngineException

use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.

the class DBPSpotlightCandidatesEnhancementEngine method doPostRequest.

/**
 * Sends a POST request to the DBpediaSpotlight url.
 *
 * @param text
 *            a <code>String</code> with the text to be analyzed
 * @param contentItemUri
 *            Just used for logging
 * @return a <code>String</code> with the server response
 * @throws EngineException
 *             if the request cannot be sent
 */
protected Collection<SurfaceForm> doPostRequest(String text, IRI contentItemUri) throws EngineException {
    HttpURLConnection connection = null;
    BufferedWriter wr = null;
    try {
        connection = (HttpURLConnection) spotlightUrl.openConnection();
        connection.setRequestMethod("POST");
        connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
        connection.setRequestProperty("Accept", "text/xml");
        // set ConnectionTimeout (if configured)
        if (connectionTimeout > 0) {
            connection.setConnectTimeout(connectionTimeout * 1000);
            connection.setReadTimeout(connectionTimeout * 1000);
        }
        connection.setUseCaches(false);
        connection.setDoInput(true);
        connection.setDoOutput(true);
        // Send request
        wr = new BufferedWriter(new OutputStreamWriter(connection.getOutputStream(), UTF8));
    } catch (IOException e) {
        IOUtils.closeQuietly(wr);
        throw new EngineException("Unable to open connection to " + spotlightUrl, e);
    }
    try {
        if (spotlightSpotter != null && !spotlightSpotter.isEmpty()) {
            wr.write("spotter=");
            wr.write(URLEncoder.encode(spotlightSpotter, "UTF-8"));
            wr.write('&');
        }
        if (spotlightDisambiguator != null && !spotlightDisambiguator.isEmpty()) {
            wr.write("disambiguator=");
            wr.write(URLEncoder.encode(spotlightDisambiguator, "UTF-8"));
            wr.write('&');
        }
        if (spotlightTypesRestriction != null && !spotlightTypesRestriction.isEmpty()) {
            wr.write("types=");
            wr.write(URLEncoder.encode(spotlightTypesRestriction, "UTF-8"));
            wr.write('&');
        }
        if (spotlightSupport != null && !spotlightSupport.isEmpty()) {
            wr.write("support=");
            wr.write(URLEncoder.encode(spotlightSupport, "UTF-8"));
            wr.write('&');
        }
        if (spotlightConfidence != null && !spotlightConfidence.isEmpty()) {
            wr.write("confidence=");
            wr.write(URLEncoder.encode(spotlightConfidence, "UTF-8"));
            wr.write('&');
        }
        if (spotlightSparql != null && !spotlightSparql.isEmpty() && spotlightTypesRestriction == null) {
            wr.write("sparql=");
            wr.write(URLEncoder.encode(spotlightSparql, "UTF-8"));
            wr.write('&');
        }
        wr.write("text=");
        wr.write(URLEncoder.encode(text, "UTF-8"));
    } catch (UnsupportedEncodingException e) {
        throw new IllegalStateException("The platform does not support encoding " + UTF8.name(), e);
    } catch (IOException e) {
        throw new EngineException("Unable to write 'plain/text' content " + "for ContentItem " + contentItemUri + " to " + spotlightUrl, e);
    } finally {
        IOUtils.closeQuietly(wr);
    }
    InputStream is = null;
    Document xmlDoc;
    try {
        // Get Response
        is = connection.getInputStream();
        xmlDoc = loadXMLFromInputStream(is);
    } catch (IOException e) {
        throw new EngineException("Unable to spot Entities with" + "Dbpedia Spotlight Spot RESTful Serice running at " + spotlightUrl, e);
    } catch (SAXException e) {
        throw new EngineException("Unable to parse Response from " + "Dbpedia Spotlight Spot RESTful Serice running at " + spotlightUrl, e);
    } finally {
        IOUtils.closeQuietly(is);
    }
    return CandidateResource.parseCandidates(xmlDoc);
}
Also used : HttpURLConnection(java.net.HttpURLConnection) XMLParser.loadXMLFromInputStream(org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream) InputStream(java.io.InputStream) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) Document(org.w3c.dom.Document) BufferedWriter(java.io.BufferedWriter) SAXException(org.xml.sax.SAXException)

Aggregations

EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)55 IRI (org.apache.clerezza.commons.rdf.IRI)37 IOException (java.io.IOException)33 Graph (org.apache.clerezza.commons.rdf.Graph)24 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)23 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)20 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)15 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)15 HashMap (java.util.HashMap)13 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)13 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)12 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)10 InvalidContentException (org.apache.stanbol.enhancer.servicesapi.InvalidContentException)10 Test (org.junit.Test)10 Triple (org.apache.clerezza.commons.rdf.Triple)9 InputStream (java.io.InputStream)8 SOAPException (javax.xml.soap.SOAPException)8 Token (org.apache.stanbol.enhancer.nlp.model.Token)8 Language (org.apache.clerezza.commons.rdf.Language)7 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)7