use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class IndexedGraphTest method bNodeConsitency.
@Test
public void bNodeConsitency() {
Graph mGraph = getEmptyGraph();
final BlankNode bNode = new BlankNode() {
@Override
public int hashCode() {
return -1;
}
@Override
public boolean equals(Object o) {
return o instanceof BlankNode;
}
};
final BlankNode bNodeClone = new BlankNode() {
@Override
public int hashCode() {
return -1;
}
@Override
public boolean equals(Object o) {
return o instanceof BlankNode;
}
};
mGraph.add(new TripleImpl(bNode, uriRef1, uriRef2));
mGraph.add(new TripleImpl(bNodeClone, uriRef2, uriRef3));
BlankNodeOrIRI bNodeBack = mGraph.filter(null, uriRef1, uriRef2).next().getSubject();
Assert.assertEquals("The bnode we get back is not equals to the one we added", bNode, bNodeBack);
BlankNodeOrIRI bNodeBack2 = mGraph.filter(null, uriRef2, uriRef3).next().getSubject();
Assert.assertEquals("The returnned bnodes are no longer equals", bNodeBack, bNodeBack2);
Assert.assertTrue("Not finding a triple when searching with equal bNode", mGraph.filter(bNodeBack, uriRef2, null).hasNext());
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class CeliAnalyzedTextLemmatizerEngineTest method testEngineDe.
@Test
public void testEngineDe() throws IOException, EngineException {
ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
Assert.assertNotNull(ci);
AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
Assert.assertNotNull(at);
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
// Add some Tokens with POS annotations to test the usage of
// existing POS annotations by the lemmatizer
Token verbrachten = at.addToken(de_verbStart, de_verbStart + de_verb.length());
verbrachten.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb), de_verbProb));
Token schonen = at.addToken(de_adjectiveStart, de_adjectiveStart + de_adjective.length());
schonen.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ADJ", LexicalCategory.Adjective), de_adjectiveProb));
Token urlaub = at.addToken(de_nounStart, de_nounStart + de_noun.length());
urlaub.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NC", LexicalCategory.Noun), de_nounProb));
Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
// compute the enhancements
try {
engine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
// deactivate test
return;
}
// now validate the enhancements
boolean foundVerb = false;
boolean foundAdjective = false;
boolean foundNoun = false;
for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
Token token = tokens.next();
log.info("Token: {}", token);
List<Value<MorphoFeatures>> mfs = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
if (de_verb.equals(token.getSpan())) {
foundVerb = !mfs.isEmpty();
validateMorphFeatureProbability(mfs, LexicalCategory.Verb, de_verbProb);
} else if (de_adjective.equals(token.getSpan())) {
foundAdjective = !mfs.isEmpty();
validateMorphFeatureProbability(mfs, LexicalCategory.Adjective, de_adjectiveProb);
} else if (de_noun.equals(token.getSpan())) {
foundNoun = !mfs.isEmpty();
validateMorphFeatureProbability(mfs, LexicalCategory.Noun, de_nounProb);
}
for (Value<MorphoFeatures> mf : mfs) {
log.info(" - {}", mf);
Assert.assertNotNull(mf.value().getLemma());
}
}
Assert.assertTrue("No MorphoFeatures found for '" + de_verb + "'!", foundVerb);
Assert.assertTrue("No MorphoFeatures found for '" + de_adjective + "'!", foundAdjective);
Assert.assertTrue("No MorphoFeatures found for '" + de_noun + "'!", foundNoun);
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
// add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
nerEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class CeliSentimentAnalysisEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
// add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
sentimentAnalysisEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.
/**
* The method adds the returned DBpedia Spotlight annotations to the content
* item's metadata. For each DBpedia resource an EntityAnnotation is created
* and linked to the according TextAnnotation.
*
* @param occs
* a Collection of entity information
* @param ci
* the content item
*/
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
for (Annotation occ : occs) {
if (textAnnotationsMap.get(occ.surfaceForm) != null) {
IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
Graph model = ci.getMetadata();
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
entityAnnotationMap.put(occ.uri, entityAnnotation);
Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
Collection<String> t = occ.getTypeNames();
if (t != null) {
Iterator<String> it = t.iterator();
while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
}
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
}
}
}
Aggregations