use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class OpenCalaisEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
Graph calaisModel = getCalaisAnalysis(text, contentPart.getValue().getMimeType());
if (calaisModel != null) {
//Acquire a write lock on the ContentItem when adding the enhancements
ci.getLock().writeLock().lock();
try {
createEnhancements(queryModel(calaisModel), ci);
if (log.isDebugEnabled()) {
Serializer serializer = Serializer.getInstance();
ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
try {
log.debug("Calais Enhancements:\n{}", debugStream.toString("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class CeliClassificationEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
//NOTE: in the computeEnhancements Method on can check metadata already
// checked within the canEnhance method. THis is not required, but it
// may help to identify potential bugs in the EnhancementJobManager
// implementation
String language = EnhancementEngineHelper.getLanguage(ci);
if (!isLangSupported(language)) {
throw new IllegalStateException("Call to computeEnhancement with unsupported language '" + language + " for ContentItem " + ci.getUri() + ": This is also checked " + "in the canEnhance method! -> This indicated an Bug in the " + "implementation of the " + "EnhancementJobManager!");
}
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicates an Bug in the implementation of " + "the EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {} of ContentItem {}", contentPart.getKey(), ci.getUri());
return;
}
//NOTE: EnhancementEngine implementations should pass all Exceptions
// (RuntimeExceptions as is and others wrapped as EngineExceptions).
// The EnhancementJobManager implementation has to catch and
// process all those. Handling depends on the configuration of the
// EnhancementChain (e.g. if this engine is optional enhancement of
// the ContentItem will continue).
// This is important as otherwise Users would get "200 ok" replies
// for failed enhancement requests that have failed!
//
// This means that:
// * Http clients should pass on IOExceptions and SOAPExceptions
// * No try/catch that also includes RuntimeExceptions
List<Concept> lista;
try {
lista = this.client.extractConcepts(text, language);
} catch (IOException e) {
//re-throw exceptions as EngineException
throw new EngineException("Error while calling the CELI classification" + " service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI classification service!", e);
}
if (lista.isEmpty()) {
//nothing to do
return;
}
Graph g = ci.getMetadata();
//NOTE: EnhancementEngines that use "ENHANCE_ASYNC" need to acquire a
// writeLock before modifications to the enhancement metadata
ci.getLock().writeLock().lock();
try {
//see STANBOL-617 for rules how to encode extracted topics
//we need a single TextAnnotation to link all TopicAnnotations
IRI textAnnotation = createTextEnhancement(ci, this);
// add the dc:type skos:Concept
g.add(new TripleImpl(textAnnotation, DC_TYPE, SKOS_CONCEPT));
//not create the fise:TopicAnnotations
for (Concept ne : lista) {
IRI topicAnnotation = EnhancementEngineHelper.createTopicEnhancement(ci, this);
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_REFERENCE, ne.getUri()));
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(ne.getLabel())));
//TODO: currently I use owl:class as entity-type, because that is
// what the linked dbpedia ontology resources are.
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_TYPE, OWL_CLASS));
g.add(new TripleImpl(topicAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(ne.getConfidence())));
//link to the TextAnnotation
g.add(new TripleImpl(topicAnnotation, DC_RELATION, textAnnotation));
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class UIMARemoteClient method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
for (UIMASimpleServletClient ussc : usscList) {
logger.info("Accessing uima source:" + ussc.getSourceName() + " endpoint:" + ussc.getUri());
List<FeatureStructure> featureSetList = ussc.process(text);
IRI uimaIRI = new IRI(uimaUri);
FeatureStructureListHolder holder;
ci.getLock().writeLock().lock();
try {
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
} catch (NoSuchPartException e) {
holder = new FeatureStructureListHolder();
logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
ci.addPart(uimaIRI, holder);
logger.info(uimaUri + " content part added.");
} finally {
ci.getLock().writeLock().unlock();
}
ci.getLock().writeLock().lock();
try {
holder.addFeatureStructureList(ussc.getSourceName(), featureSetList);
} finally {
ci.getLock().writeLock().unlock();
}
}
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class UIMALocal method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
JCas jcas;
try {
logger.info("Processing text with UIMA AE...");
jcas = processText(text);
} catch (ResourceInitializationException ex) {
logger.error("Error initializing UIMA AE", ex);
throw new EngineException("Error initializing UIMA AE", ex);
} catch (AnalysisEngineProcessException ex) {
logger.error("Error running UIMA AE", ex);
throw new EngineException("Error running UIMA AE", ex);
}
//just for being sure
if (jcas == null) {
return;
}
for (String typeName : uimaTypeNames) {
List<FeatureStructure> featureSetList = concertToCasLight(jcas, typeName);
IRI uimaIRI = new IRI(uimaUri);
FeatureStructureListHolder holder;
ci.getLock().writeLock().lock();
try {
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
} catch (NoSuchPartException e) {
holder = new FeatureStructureListHolder();
logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
ci.addPart(uimaIRI, holder);
logger.info(uimaUri + " content part added.");
} finally {
ci.getLock().writeLock().unlock();
}
ci.getLock().writeLock().lock();
try {
holder.addFeatureStructureList(uimaSourceName, featureSetList);
} finally {
ci.getLock().writeLock().unlock();
}
}
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class CustomDirFileContentItemFactoryTest method testCustomDir.
/**
* Tests that the specified directory is actually used!
*/
@Test
public void testCustomDir() throws IOException {
assertTrue("The custom dir '" + customDir + "'MUST exist", customDir.exists());
assertTrue("The custom dir '" + customDir + "'MUST be an directory", customDir.isDirectory());
int numFiles = customDir.list().length;
Blob blob = contentItemFactory.createBlob(new StringSource("ensure a file exist"));
assertNotNull(blob);
Assert.assertEquals("Creating a new Blob has not increased the " + "number of files by one!", numFiles, customDir.list().length - 1);
}
Aggregations