use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class Mapping method toResource.
/**
* Converts the parsed value based on the mapping information to an RDF
* {@link RDFTerm}. Optionally supports also validation if the parsed
* value is valid for the {@link Mapping#ontType ontology type} specified by
* the parsed mapping.
* @param value the value
* @param mapping the mapping
* @param validate
* @return the {@link RDFTerm} or <code>null</code> if the parsed value is
* <code>null</code> or {@link String#isEmpty() empty}.
* @throws IllegalArgumentException if the parsed {@link Mapping} is
* <code>null</code>
*/
protected RDFTerm toResource(String value, boolean validate) {
//used for date validation
Metadata dummy = null;
if (value == null || value.isEmpty()) {
//ignore null and empty values
return null;
}
RDFTerm object;
if (ontType == null) {
object = new PlainLiteralImpl(value);
} else if (ontType == RDFS.Resource) {
try {
if (validate) {
new URI(value);
}
object = new IRI(value);
} catch (URISyntaxException e) {
log.warn("Unable to create Reference for value {} (not a valid URI)" + " -> create a literal instead", value);
object = new PlainLiteralImpl(value);
}
} else {
//typed literal
Class<?> clazz = Mapping.ONT_TYPE_MAP.get(ontType);
if (clazz.equals(Date.class)) {
//parseDate(..) method
if (dummy == null) {
dummy = new Metadata();
}
//any Property with the Date type could be used here
dummy.add(DATE.getName(), value);
//access parseDate(..)
Date date = dummy.getDate(DublinCore.DATE);
if (date != null) {
//now use the Clerezza Literal factory
object = lf.createTypedLiteral(date);
} else {
//fall back to xsd:string
object = new TypedLiteralImpl(value, XSD.string);
}
} else {
object = new TypedLiteralImpl(value, ontType);
}
if (validate && clazz != null && !clazz.equals(Date.class)) {
//we need not to validate dates
try {
lf.createObject(clazz, (Literal) object);
} catch (NoConvertorException e) {
log.info("Unable to validate typed literals of type {} because" + "there is no converter for Class {} registered with Clerezza", ontType, clazz);
} catch (InvalidLiteralTypeException e) {
log.info("The value '{}' is not valid for dataType {}!" + "create literal with type 'xsd:string' instead", value, ontType);
object = new TypedLiteralImpl(value, XSD.string);
}
}
//else no validation needed
}
if (converter != null) {
object = converter.convert(object);
}
return object;
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class RestfulLangidentEngine method computeEnhancements.
/**
* Compute enhancements for supplied ContentItem. The results of the process
* are expected to be stored in the metadata of the content item.
* <p/>
* The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
* persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
* <p/>
* This method creates a new POSContentPart using {@link org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} from a text/plain part and
* stores it as a new part in the content item. The metadata is not changed.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(final ContentItem ci) throws EngineException {
//get the plain text Blob
Map.Entry<IRI, Blob> textBlob = getPlainText(this, ci, false);
Blob blob = textBlob.getValue();
//send the text to the server
final HttpPost request = new HttpPost(serviceUrl);
request.setEntity(new InputStreamEntity(blob.getStream(), blob.getContentLength(), ContentType.create(blob.getMimeType(), blob.getParameter().get("charset"))));
//execute the request
List<LangSuggestion> detected;
try {
detected = AccessController.doPrivileged(new PrivilegedExceptionAction<List<LangSuggestion>>() {
public List<LangSuggestion> run() throws ClientProtocolException, IOException {
return httpClient.execute(request, new LangIdentResponseHandler(ci, objectMapper));
}
});
} catch (PrivilegedActionException pae) {
Exception e = pae.getException();
if (e instanceof ClientProtocolException) {
throw new EngineException(this, ci, "Exception while executing Request " + "on RESTful Language Identification Service at " + serviceUrl, e);
} else if (e instanceof IOException) {
throw new EngineException(this, ci, "Exception while executing Request " + "on RESTful Language Identification Service at " + serviceUrl, e);
} else {
throw RuntimeException.class.cast(e);
}
}
Graph metadata = ci.getMetadata();
log.debug("Detected Languages for ContentItem {} and Blob {}");
ci.getLock().writeLock().lock();
try {
//write TextAnnotations for the detected languages
for (LangSuggestion suggestion : detected) {
// add a hypothesis
log.debug(" > {}@{}", suggestion.getLanguage(), suggestion.hasProbability() ? suggestion.getProbability() : "-,--");
IRI textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(suggestion.getLanguage())));
metadata.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
if (suggestion.hasProbability()) {
metadata.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getProbability())));
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliAnalyzedTextSentimentAnalysisEngineTest method testEngine.
@Test
public void testEngine() throws IOException, EngineException {
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
Assert.assertNotNull(ci);
AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
Assert.assertNotNull(at);
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
//compute the enhancements
try {
engine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
//deactivate test
return;
}
//now validate the enhancements
int sentimentExpressionCnt = 0;
for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
Token token = tokens.next();
log.info("Token: {}", token);
List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
if (sentimentExpressionsList != null && sentimentExpressionsList.size() > 0)
sentimentExpressionCnt++;
}
Assert.assertTrue("2 sentiment expressions should be recognized in: " + text, sentimentExpressionCnt == 2);
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliSentimentAnalysisEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
// add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
sentimentAnalysisEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.
the class CeliSentimentAnalysisEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
//used for the palin literals in TextAnnotations
Language lang = new Language(language);
try {
List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language);
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
for (SentimentExpression se : lista) {
try {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
//add selected text as PlainLiteral in the language extracted from the text
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(), lang)));
g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION));
if (se.getStartSnippet() != null && se.getEndSnippet() != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang)));
g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue())));
}
} catch (NoConvertorException e) {
log.error(e.getMessage(), e);
}
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
}
}
Aggregations