use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.
@Test
public void testCompleteMorphoAnalysis() throws Exception {
ContentItem ci = wrapAsContentItem(TERM);
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
//validate enhancements
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
// test if a textAnnotation is present
//assertTrue(textAnnotationIterator.hasNext());
// -> this might be used to test that there are no TextAnnotations
int textAnnotationCount = 0;
while (textAnnotationIterator.hasNext()) {
IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
// test if selected Text is added
validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
textAnnotationCount++;
//perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
}
log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
//no EntityAnnotations expected
Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method testEngine.
@Test
public void testEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
//unit test should not depend on each other (if possible)
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
try {
morphoAnalysisEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
TestUtils.logEnhancements(ci);
//validate enhancement
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
//validate the enhancement metadata
validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
//validate the lemma form TextAnnotation
int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
shutdownEngine(morphoAnalysisEngine);
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
//add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
nerEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
//used for the palin literals in TextAnnotations
Language lang = new Language(language);
try {
List<NamedEntity> lista = this.client.extractEntities(text, language);
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
for (NamedEntity ne : lista) {
try {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
//add selected text as PlainLiteral in the language extracted from the text
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(ne.getFormKind(), lang)));
g.add(new TripleImpl(textAnnotation, DC_TYPE, getEntityRefForType(ne.type)));
if (ne.getFrom() != null && ne.getTo() != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(ne.getFrom().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(ne.getTo().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, ne.getFormKind(), ne.getFrom().intValue()), lang)));
}
} catch (NoConvertorException e) {
log.error(e.getMessage(), e);
}
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI NER (Named Entity Recognition)" + " service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI NER (Named Entity Recognition) service!", e);
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class SpotlightEngineUtils method createEntityAnnotation.
/**
* Creates a fise:EntityAnnotation for the parsed parameters and
* adds it the the {@link ContentItem#getMetadata()}. <p>
* This method assumes a write lock on the parsed content item.
* @param resource the candidate resource
* @param engine the engine
* @param ci the content item
* @param textAnnotation the fise:TextAnnotation to dc:relate the
* created fise:EntityAnnotation
* @return the URI of the created fise:TextAnnotation
*/
public static IRI createEntityAnnotation(CandidateResource resource, EnhancementEngine engine, ContentItem ci, IRI textAnnotation) {
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
Graph model = ci.getMetadata();
Literal label = new PlainLiteralImpl(resource.label, new Language("en"));
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, resource.getUri()));
model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE, literalFactory.createTypedLiteral(resource.contextualScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(resource.support)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory.createTypedLiteral(resource.priorScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory.createTypedLiteral(resource.finalScore)));
return entityAnnotation;
}
Aggregations