use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.
the class SaxUIMAServletResult2Offsets method startElement.
/*
* (non-Javadoc)
*
* @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
* java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
@Override
public void startElement(String uri, String localName, String qname, Attributes attrs) throws SAXException {
if (!localName.equals("result")) {
elementCounter++;
String type = localName;
FeatureStructure fs = new FeatureStructure(sourceName + "." + localName + "#" + elementCounter, type);
for (int i = 0; i < attrs.getLength(); i++) {
String name = attrs.getQName(i);
String value = attrs.getValue(i);
if (checkIfInteger(value)) {
Feature<Integer> f = new Feature(name, value);
fs.addFeature(f);
} else {
Feature<String> f = new Feature(name, value);
fs.addFeature(f);
}
}
fsList.add(fs);
}
}
use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.
the class UIMAToTriples method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
FeatureStructureListHolder holder;
LiteralFactory literalFactory = LiteralFactory.getInstance();
try {
IRI uimaIRI = new IRI(uimaUri);
logger.info(new StringBuilder("Trying to load holder for ref:").append(uimaUri).toString());
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
for (String source : sourceNames) {
logger.info(new StringBuilder("Processing UIMA source:").append(source).toString());
List<FeatureStructure> sourceList = holder.getFeatureStructureList(source);
if (sourceList != null) {
logger.info(new StringBuilder("UIMA source:").append(source).append(" contains ").append(sourceList.size()).append(" annotations.").toString());
} else {
logger.info(new StringBuilder("Source list is null:").append(source).toString());
continue;
}
for (FeatureStructure fs : sourceList) {
String typeName = fs.getTypeName();
logger.debug(new StringBuilder("Checking ").append(typeName).toString());
if (tnfs.checkFeatureStructureAllowed(typeName, fs.getFeatures())) {
logger.debug(new StringBuilder("Adding ").append(typeName).toString());
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
Graph metadata = ci.getMetadata();
String uriRefStr = uimaUri + ":" + typeName;
if (mappings.containsKey(typeName)) {
uriRefStr = mappings.get(typeName);
}
metadata.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(uriRefStr)));
if (fs.getFeature("begin") != null) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(fs.getFeature("begin").getValueAsInteger())));
}
if (fs.getFeature("end") != null) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(fs.getFeature("end").getValueAsInteger())));
}
if (fs.getCoveredText() != null && !fs.getCoveredText().isEmpty()) {
metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(fs.getCoveredText())));
}
for (Feature f : fs.getFeatures()) {
if (!f.getName().equals("begin") && !f.getName().equals("end") && tnfs.checkFeatureToConvert(typeName, f)) {
String predRefStr = uimaUri + ":" + f.getName();
if (mappings.containsKey(f.getName())) {
predRefStr = mappings.get(f.getName());
}
IRI predicate = new IRI(predRefStr);
metadata.add(new TripleImpl(textAnnotation, predicate, new PlainLiteralImpl(f.getValueAsString())));
}
}
}
}
}
} catch (NoSuchPartException e) {
logger.error(new StringBuilder("No UIMA results found with ref:").append(uimaUri).toString(), e);
}
}
use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.
the class UIMARemoteClient method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
for (UIMASimpleServletClient ussc : usscList) {
logger.info("Accessing uima source:" + ussc.getSourceName() + " endpoint:" + ussc.getUri());
List<FeatureStructure> featureSetList = ussc.process(text);
IRI uimaIRI = new IRI(uimaUri);
FeatureStructureListHolder holder;
ci.getLock().writeLock().lock();
try {
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
} catch (NoSuchPartException e) {
holder = new FeatureStructureListHolder();
logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
ci.addPart(uimaIRI, holder);
logger.info(uimaUri + " content part added.");
} finally {
ci.getLock().writeLock().unlock();
}
ci.getLock().writeLock().lock();
try {
holder.addFeatureStructureList(ussc.getSourceName(), featureSetList);
} finally {
ci.getLock().writeLock().unlock();
}
}
}
use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.
the class UIMAServletClient method getFSList.
/**
* Queries the UIMA Simple Servlet and returns the FeatureSet list.
* @param servletURI The uri of the servlet
* @param sourceName The source name of this processor
* @param types The TypeConfigMap for the generated FeatureSets
* @param input The Sofa String
* @return The generated FeatureSet list
*/
public List<FeatureStructure> getFSList(String servletURI, String sourceName, String input) {
try {
if (input == null) {
logger.error("input (sofaString) is null!");
return null;
}
// Construct data
String data = URLEncoder.encode("text", "UTF-8") + "=" + URLEncoder.encode(input, "UTF-8");
data += "&" + URLEncoder.encode("mode", "UTF-8") + "=" + URLEncoder.encode("xml", "UTF-8");
// Send data
URL url = new URL(servletURI);
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
OutputStreamWriter wr = new OutputStreamWriter(conn.getOutputStream());
wr.write(data);
wr.flush();
// Get the response
BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream()));
XMLReader xr = XMLReaderFactory.createXMLReader();
SaxUIMAServletResult2Offsets handler = new SaxUIMAServletResult2Offsets();
handler.setSourceName(sourceName);
xr.setContentHandler(handler);
xr.setErrorHandler(handler);
xr.parse(new InputSource(rd));
wr.close();
rd.close();
List<FeatureStructure> fsList = handler.getFsList();
for (FeatureStructure fs : fsList) {
fs.setCoveredText(fs.getSofaChunk(input));
}
return fsList;
} catch (SAXException ex) {
logger.error("Error in UIMAClient", ex);
} catch (IOException ex) {
logger.error("Error in UIMAClient", ex);
}
return null;
}
use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.
the class UIMALocal method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
JCas jcas;
try {
logger.info("Processing text with UIMA AE...");
jcas = processText(text);
} catch (ResourceInitializationException ex) {
logger.error("Error initializing UIMA AE", ex);
throw new EngineException("Error initializing UIMA AE", ex);
} catch (AnalysisEngineProcessException ex) {
logger.error("Error running UIMA AE", ex);
throw new EngineException("Error running UIMA AE", ex);
}
//just for being sure
if (jcas == null) {
return;
}
for (String typeName : uimaTypeNames) {
List<FeatureStructure> featureSetList = concertToCasLight(jcas, typeName);
IRI uimaIRI = new IRI(uimaUri);
FeatureStructureListHolder holder;
ci.getLock().writeLock().lock();
try {
holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
} catch (NoSuchPartException e) {
holder = new FeatureStructureListHolder();
logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
ci.addPart(uimaIRI, holder);
logger.info(uimaUri + " content part added.");
} finally {
ci.getLock().writeLock().unlock();
}
ci.getLock().writeLock().lock();
try {
holder.addFeatureStructureList(uimaSourceName, featureSetList);
} finally {
ci.getLock().writeLock().unlock();
}
}
}
Aggregations