use of org.apache.uima.jcas.JCas in project stanbol by apache.
the class UIMALocal method processText.
/*
* process a field value executing UIMA the CAS containing it as document
* text - From SOLR.
*/
private JCas processText(String textFieldValue) throws ResourceInitializationException, AnalysisEngineProcessException {
logger.info(new StringBuffer("Analazying text").toString());
/*
* get the UIMA analysis engine
*/
AnalysisEngine ae = aeProvider.getAE();
/*
* create a JCas which contain the text to analyze
*/
JCas jcas = ae.newJCas();
jcas.setDocumentText(textFieldValue);
/*
* perform analysis on text field
*/
ae.process(jcas);
logger.info(new StringBuilder("Text processing completed").toString());
return jcas;
}
use of org.apache.uima.jcas.JCas in project lucene-solr by apache.
the class UIMAUpdateRequestProcessor method processAdd.
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null;
try {
/* get Solr document */
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
/* get the fields to analyze */
String[] texts = getTextsToAnalyze(solrInputDocument);
for (String currentText : texts) {
text = currentText;
if (text != null && text.length() > 0) {
/* create a JCas which contain the text to analyze */
JCas jcas = pool.getJCas(0);
try {
/* process the text value */
processText(text, jcas);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
/* get field mapping from config */
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (Entry<String, Map<String, MapField>> entry : typesAndFeaturesFieldsMap.entrySet()) {
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
}
} finally {
pool.releaseJCas(jcas);
}
}
}
} catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField();
if (logField == null) {
SchemaField uniqueKeyField = cmd.getReq().getSchema().getUniqueKeyField();
if (uniqueKeyField != null) {
logField = uniqueKeyField.getName();
}
}
String optionalFieldInfo = logField == null ? "." : ". " + logField + "=" + cmd.getSolrInputDocument().getField(logField).getValue() + ", ";
int len;
String debugString;
if (text != null && text.length() > 0) {
len = Math.min(text.length(), 100);
debugString = " text=\"" + text.substring(0, len) + "...\"";
} else {
debugString = " null text";
}
if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn("skip the text processing due to {}", new StringBuilder().append(e.getLocalizedMessage()).append(optionalFieldInfo).append(debugString));
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "processing error " + e.getLocalizedMessage() + optionalFieldInfo + debugString, e);
}
}
super.processAdd(cmd);
}
use of org.apache.uima.jcas.JCas in project stanbol by apache.
the class UIMALocal method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
JCas jcas;
try {
logger.info("Processing text with UIMA AE...");
jcas = processText(text);
} catch (ResourceInitializationException ex) {
logger.error("Error initializing UIMA AE", ex);
throw new EngineException("Error initializing UIMA AE", ex);
} catch (AnalysisEngineProcessException ex) {
logger.error("Error running UIMA AE", ex);
throw new EngineException("Error running UIMA AE", ex);
}
//just for being sure
if (jcas == null) {
return;
}
for (String typeName : uimaTypeNames) {
List<FeatureStructure> featureSetList = concertToCasLight(jcas, typeName);
IRI uimaIRI = new IRI(uimaUri);
FeatureStructureListHolder holder;
ci.getLock().writeLock().lock();
try {
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
} catch (NoSuchPartException e) {
holder = new FeatureStructureListHolder();
logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
ci.addPart(uimaIRI, holder);
logger.info(uimaUri + " content part added.");
} finally {
ci.getLock().writeLock().unlock();
}
ci.getLock().writeLock().lock();
try {
holder.addFeatureStructureList(uimaSourceName, featureSetList);
} finally {
ci.getLock().writeLock().unlock();
}
}
}
Aggregations