use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliAnalyzedTextSentimentAnalysisEngineTest method testEngine.
@Test
public void testEngine() throws IOException, EngineException {
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
Assert.assertNotNull(ci);
AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
Assert.assertNotNull(at);
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
//compute the enhancements
try {
engine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
//deactivate test
return;
}
//now validate the enhancements
int sentimentExpressionCnt = 0;
for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
Token token = tokens.next();
log.info("Token: {}", token);
List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
if (sentimentExpressionsList != null && sentimentExpressionsList.size() > 0)
sentimentExpressionCnt++;
}
Assert.assertTrue("2 sentiment expressions should be recognized in: " + text, sentimentExpressionCnt == 2);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliSentimentAnalysisEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
// add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
sentimentAnalysisEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliClassificationEnhancementEngineTest method tesetEngine.
@Test
public void tesetEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
try {
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
//unit test should not depend on each other (if possible)
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
classificationEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(classificationEngine.getClass().getName()));
int textAnnoNum = EnhancementStructureHelper.validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
assertEquals("Only a single fise:TextAnnotation is expeted", 1, textAnnoNum);
int numTopicAnnotations = validateAllTopicAnnotations(ci.getMetadata(), expectedValues);
assertTrue("No TpocisAnnotations found", numTopicAnnotations > 0);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliLanguageIdentifierEnhancementEngineTest method tesetEngine.
@Test
public void tesetEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
try {
langIdentifier.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdentifier.getClass().getName()));
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
assertEquals("A single TextAnnotation is expected by this Test", 1, numTextAnnotations);
//even through this tests do not validate service quality but rather
//the correct integration of the CELI service as EnhancementEngine
//we expect the "fr" is detected for the parsed text
assertEquals("The detected language for text '" + TEXT + "' MUST BE 'fr'", "fr", EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliAnalyzedTextLemmatizerEngineTest method testEngineDe.
@Test
public void testEngineDe() throws IOException, EngineException {
ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
Assert.assertNotNull(ci);
AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
Assert.assertNotNull(at);
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
//Add some Tokens with POS annotations to test the usage of
//existing POS annotations by the lemmatizer
Token verbrachten = at.addToken(de_verbStart, de_verbStart + de_verb.length());
verbrachten.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb), de_verbProb));
Token schonen = at.addToken(de_adjectiveStart, de_adjectiveStart + de_adjective.length());
schonen.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ADJ", LexicalCategory.Adjective), de_adjectiveProb));
Token urlaub = at.addToken(de_nounStart, de_nounStart + de_noun.length());
urlaub.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NC", LexicalCategory.Noun), de_nounProb));
Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
//compute the enhancements
try {
engine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
//deactivate test
return;
}
//now validate the enhancements
boolean foundVerb = false;
boolean foundAdjective = false;
boolean foundNoun = false;
for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
Token token = tokens.next();
log.info("Token: {}", token);
List<Value<MorphoFeatures>> mfs = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
if (de_verb.equals(token.getSpan())) {
foundVerb = !mfs.isEmpty();
validateMorphFeatureProbability(mfs, LexicalCategory.Verb, de_verbProb);
} else if (de_adjective.equals(token.getSpan())) {
foundAdjective = !mfs.isEmpty();
validateMorphFeatureProbability(mfs, LexicalCategory.Adjective, de_adjectiveProb);
} else if (de_noun.equals(token.getSpan())) {
foundNoun = !mfs.isEmpty();
validateMorphFeatureProbability(mfs, LexicalCategory.Noun, de_nounProb);
}
for (Value<MorphoFeatures> mf : mfs) {
log.info(" - {}", mf);
Assert.assertNotNull(mf.value().getLemma());
}
}
Assert.assertTrue("No MorphoFeatures found for '" + de_verb + "'!", foundVerb);
Assert.assertTrue("No MorphoFeatures found for '" + de_adjective + "'!", foundAdjective);
Assert.assertTrue("No MorphoFeatures found for '" + de_noun + "'!", foundNoun);
}
Aggregations