use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class CeliLanguageIdentifierEnhancementEngineTest method tesetEngine.
@Test
public void tesetEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
try {
langIdentifier.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdentifier.getClass().getName()));
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
assertEquals("A single TextAnnotation is expected by this Test", 1, numTextAnnotations);
// even through this tests do not validate service quality but rather
// the correct integration of the CELI service as EnhancementEngine
// we expect the "fr" is detected for the parsed text
assertEquals("The detected language for text '" + TEXT + "' MUST BE 'fr'", "fr", EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.
@Test
public void testCompleteMorphoAnalysis() throws Exception {
ContentItem ci = wrapAsContentItem(TERM);
// add a simple triple to statically define the language of the test
// content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
// validate enhancements
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
// test if a textAnnotation is present
// assertTrue(textAnnotationIterator.hasNext());
// -> this might be used to test that there are no TextAnnotations
int textAnnotationCount = 0;
while (textAnnotationIterator.hasNext()) {
IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
// test if selected Text is added
validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
textAnnotationCount++;
// perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
}
log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
// no EntityAnnotations expected
Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testEngine.
@Test
public void testEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
// add a simple triple to statically define the language of the test
// content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
// unit test should not depend on each other (if possible)
// CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
// validate enhancement
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
// validate the enhancement metadata
validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
// validate the lemma form TextAnnotation
int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class CeliAnalyzedTextSentimentAnalysisEngineTest method testEngine.
@Test
public void testEngine() throws IOException, EngineException {
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
Assert.assertNotNull(ci);
AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
Assert.assertNotNull(at);
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
// compute the enhancements
try {
engine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
// deactivate test
return;
}
// now validate the enhancements
int sentimentExpressionCnt = 0;
for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
Token token = tokens.next();
log.info("Token: {}", token);
List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
if (sentimentExpressionsList != null && sentimentExpressionsList.size() > 0)
sentimentExpressionCnt++;
}
Assert.assertTrue("2 sentiment expressions should be recognized in: " + text, sentimentExpressionCnt == 2);
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class DBPSpotlightCandidatesEnhancementEngine method doPostRequest.
/**
* Sends a POST request to the DBpediaSpotlight url.
*
* @param text
* a <code>String</code> with the text to be analyzed
* @param contentItemUri
* Just used for logging
* @return a <code>String</code> with the server response
* @throws EngineException
* if the request cannot be sent
*/
protected Collection<SurfaceForm> doPostRequest(String text, IRI contentItemUri) throws EngineException {
HttpURLConnection connection = null;
BufferedWriter wr = null;
try {
connection = (HttpURLConnection) spotlightUrl.openConnection();
connection.setRequestMethod("POST");
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.setRequestProperty("Accept", "text/xml");
// set ConnectionTimeout (if configured)
if (connectionTimeout > 0) {
connection.setConnectTimeout(connectionTimeout * 1000);
connection.setReadTimeout(connectionTimeout * 1000);
}
connection.setUseCaches(false);
connection.setDoInput(true);
connection.setDoOutput(true);
// Send request
wr = new BufferedWriter(new OutputStreamWriter(connection.getOutputStream(), UTF8));
} catch (IOException e) {
IOUtils.closeQuietly(wr);
throw new EngineException("Unable to open connection to " + spotlightUrl, e);
}
try {
if (spotlightSpotter != null && !spotlightSpotter.isEmpty()) {
wr.write("spotter=");
wr.write(URLEncoder.encode(spotlightSpotter, "UTF-8"));
wr.write('&');
}
if (spotlightDisambiguator != null && !spotlightDisambiguator.isEmpty()) {
wr.write("disambiguator=");
wr.write(URLEncoder.encode(spotlightDisambiguator, "UTF-8"));
wr.write('&');
}
if (spotlightTypesRestriction != null && !spotlightTypesRestriction.isEmpty()) {
wr.write("types=");
wr.write(URLEncoder.encode(spotlightTypesRestriction, "UTF-8"));
wr.write('&');
}
if (spotlightSupport != null && !spotlightSupport.isEmpty()) {
wr.write("support=");
wr.write(URLEncoder.encode(spotlightSupport, "UTF-8"));
wr.write('&');
}
if (spotlightConfidence != null && !spotlightConfidence.isEmpty()) {
wr.write("confidence=");
wr.write(URLEncoder.encode(spotlightConfidence, "UTF-8"));
wr.write('&');
}
if (spotlightSparql != null && !spotlightSparql.isEmpty() && spotlightTypesRestriction == null) {
wr.write("sparql=");
wr.write(URLEncoder.encode(spotlightSparql, "UTF-8"));
wr.write('&');
}
wr.write("text=");
wr.write(URLEncoder.encode(text, "UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new IllegalStateException("The platform does not support encoding " + UTF8.name(), e);
} catch (IOException e) {
throw new EngineException("Unable to write 'plain/text' content " + "for ContentItem " + contentItemUri + " to " + spotlightUrl, e);
} finally {
IOUtils.closeQuietly(wr);
}
InputStream is = null;
Document xmlDoc;
try {
// Get Response
is = connection.getInputStream();
xmlDoc = loadXMLFromInputStream(is);
} catch (IOException e) {
throw new EngineException("Unable to spot Entities with" + "Dbpedia Spotlight Spot RESTful Serice running at " + spotlightUrl, e);
} catch (SAXException e) {
throw new EngineException("Unable to parse Response from " + "Dbpedia Spotlight Spot RESTful Serice running at " + spotlightUrl, e);
} finally {
IOUtils.closeQuietly(is);
}
return CandidateResource.parseCandidates(xmlDoc);
}
Aggregations