use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method validateMorphoFeatureProperty.
/**
* [1..*] values of an {@link TypedLiteral} in the form {key=value}
* @param enhancements The graph with the enhancements
* @param textAnnotation the TextAnnotation to check
*/
private void validateMorphoFeatureProperty(Graph enhancements, BlankNodeOrIRI textAnnotation) {
//This taste checks for known morpho features of a given input (constant TERM)
Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, RDF_TYPE, null);
assertTrue("No POS Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
while (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
LexicalCategory cat = LexicalCategory.valueOf(key);
assertTrue("Part of Speech of " + TERM + " should be " + LexicalCategory.Noun, (cat == LexicalCategory.Noun));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_GENDER, null);
assertTrue("No Gender Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
Gender cat = Gender.valueOf(key);
assertTrue("Gender of " + TERM + " should be " + Gender.Feminine, (cat == Gender.Feminine));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_NUMBER, null);
assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
String feature = ((IRI) morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
if (feature.startsWith(OLIA_NAMESPACE)) {
String key = feature.substring(OLIA_NAMESPACE.length());
NumberFeature cat = NumberFeature.valueOf(key);
assertTrue("Number of " + TERM + " should be " + Gender.Feminine, (cat == NumberFeature.Singular));
}
}
morphoFeatureIterator = enhancements.filter(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, null);
assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
if (morphoFeatureIterator.hasNext()) {
RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
assertTrue("Lemma Forms value are expected of type Literal", morphoFeature instanceof Literal);
assertFalse("Lemma forms MUST NOT be empty", ((Literal) morphoFeature).getLexicalForm().isEmpty());
String feature = ((Literal) morphoFeature).getLexicalForm();
assertTrue("Lemma of " + TERM + " should be " + TERM, (feature.equals(TERM)));
}
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class CeliSentimentAnalysisEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
// add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
sentimentAnalysisEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class CeliClassificationEnhancementEngineTest method tesetEngine.
@Test
public void tesetEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
try {
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
//unit test should not depend on each other (if possible)
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
classificationEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(classificationEngine.getClass().getName()));
int textAnnoNum = EnhancementStructureHelper.validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
assertEquals("Only a single fise:TextAnnotation is expeted", 1, textAnnoNum);
int numTopicAnnotations = validateAllTopicAnnotations(ci.getMetadata(), expectedValues);
assertTrue("No TpocisAnnotations found", numTopicAnnotations > 0);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class CeliLanguageIdentifierEnhancementEngineTest method tesetEngine.
@Test
public void tesetEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
try {
langIdentifier.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdentifier.getClass().getName()));
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
assertEquals("A single TextAnnotation is expected by this Test", 1, numTextAnnotations);
//even through this tests do not validate service quality but rather
//the correct integration of the CELI service as EnhancementEngine
//we expect the "fr" is detected for the parsed text
assertEquals("The detected language for text '" + TEXT + "' MUST BE 'fr'", "fr", EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method validateLemmaFormProperty.
/**
* [1..*] values of an {@link PlainLiteral} in the same language as the
* analyzed text
* @param enhancements The graph with the enhancements
* @param textAnnotation the TextAnnotation to check
* @param lang the language of the analyzed text
* @return The number of lemma forms found
*/
private int validateLemmaFormProperty(Graph enhancements, BlankNodeOrIRI textAnnotation, String lang) {
Iterator<Triple> lemmaFormsIterator = enhancements.filter(textAnnotation, hasLemmaForm, null);
assertTrue("No lemma form value found for TextAnnotation " + textAnnotation + "!", lemmaFormsIterator.hasNext());
int lemmaFormCount = 0;
while (lemmaFormsIterator.hasNext()) {
lemmaFormCount++;
RDFTerm lemmaForms = lemmaFormsIterator.next().getObject();
assertTrue("Lemma Forms value are expected of type Literal", lemmaForms instanceof Literal);
assertFalse("Lemma forms MUST NOT be empty", ((Literal) lemmaForms).getLexicalForm().isEmpty());
assertNotNull("Language of the Lemma Form literal MUST BE not null", ((Literal) lemmaForms).getLanguage());
assertEquals("Language of the Lemma Form literal MUST BE the same as for the parsed text", lang, ((Literal) lemmaForms).getLanguage().toString());
}
return lemmaFormCount;
}
Aggregations