Search in sources :

Example 1 with FeatureStructure

use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.

the class SaxUIMAServletResult2Offsets method startElement.

/*
     * (non-Javadoc)
     *
     * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
     * java.lang.String, java.lang.String, org.xml.sax.Attributes)
     */
@Override
public void startElement(String uri, String localName, String qname, Attributes attrs) throws SAXException {
    if (!localName.equals("result")) {
        elementCounter++;
        String type = localName;
        FeatureStructure fs = new FeatureStructure(sourceName + "." + localName + "#" + elementCounter, type);
        for (int i = 0; i < attrs.getLength(); i++) {
            String name = attrs.getQName(i);
            String value = attrs.getValue(i);
            if (checkIfInteger(value)) {
                Feature<Integer> f = new Feature(name, value);
                fs.addFeature(f);
            } else {
                Feature<String> f = new Feature(name, value);
                fs.addFeature(f);
            }
        }
        fsList.add(fs);
    }
}
Also used : FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) Feature(org.apache.stanbol.commons.caslight.Feature)

Example 2 with FeatureStructure

use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.

the class UIMAToTriples method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    FeatureStructureListHolder holder;
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    try {
        IRI uimaIRI = new IRI(uimaUri);
        logger.info(new StringBuilder("Trying to load holder for ref:").append(uimaUri).toString());
        holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
        for (String source : sourceNames) {
            logger.info(new StringBuilder("Processing UIMA source:").append(source).toString());
            List<FeatureStructure> sourceList = holder.getFeatureStructureList(source);
            if (sourceList != null) {
                logger.info(new StringBuilder("UIMA source:").append(source).append(" contains ").append(sourceList.size()).append(" annotations.").toString());
            } else {
                logger.info(new StringBuilder("Source list is null:").append(source).toString());
                continue;
            }
            for (FeatureStructure fs : sourceList) {
                String typeName = fs.getTypeName();
                logger.debug(new StringBuilder("Checking ").append(typeName).toString());
                if (tnfs.checkFeatureStructureAllowed(typeName, fs.getFeatures())) {
                    logger.debug(new StringBuilder("Adding ").append(typeName).toString());
                    IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                    Graph metadata = ci.getMetadata();
                    String uriRefStr = uimaUri + ":" + typeName;
                    if (mappings.containsKey(typeName)) {
                        uriRefStr = mappings.get(typeName);
                    }
                    metadata.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(uriRefStr)));
                    if (fs.getFeature("begin") != null) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(fs.getFeature("begin").getValueAsInteger())));
                    }
                    if (fs.getFeature("end") != null) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(fs.getFeature("end").getValueAsInteger())));
                    }
                    if (fs.getCoveredText() != null && !fs.getCoveredText().isEmpty()) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(fs.getCoveredText())));
                    }
                    for (Feature f : fs.getFeatures()) {
                        if (!f.getName().equals("begin") && !f.getName().equals("end") && tnfs.checkFeatureToConvert(typeName, f)) {
                            String predRefStr = uimaUri + ":" + f.getName();
                            if (mappings.containsKey(f.getName())) {
                                predRefStr = mappings.get(f.getName());
                            }
                            IRI predicate = new IRI(predRefStr);
                            metadata.add(new TripleImpl(textAnnotation, predicate, new PlainLiteralImpl(f.getValueAsString())));
                        }
                    }
                }
            }
        }
    } catch (NoSuchPartException e) {
        logger.error(new StringBuilder("No UIMA results found with ref:").append(uimaUri).toString(), e);
    }
}
Also used : FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) FeatureStructureListHolder(org.apache.stanbol.commons.caslight.FeatureStructureListHolder) NoSuchPartException(org.apache.stanbol.enhancer.servicesapi.NoSuchPartException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Feature(org.apache.stanbol.commons.caslight.Feature) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 3 with FeatureStructure

use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.

the class UIMARemoteClient method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text;
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    for (UIMASimpleServletClient ussc : usscList) {
        logger.info("Accessing uima source:" + ussc.getSourceName() + " endpoint:" + ussc.getUri());
        List<FeatureStructure> featureSetList = ussc.process(text);
        IRI uimaIRI = new IRI(uimaUri);
        FeatureStructureListHolder holder;
        ci.getLock().writeLock().lock();
        try {
            holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
        } catch (NoSuchPartException e) {
            holder = new FeatureStructureListHolder();
            logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
            ci.addPart(uimaIRI, holder);
            logger.info(uimaUri + " content part added.");
        } finally {
            ci.getLock().writeLock().unlock();
        }
        ci.getLock().writeLock().lock();
        try {
            holder.addFeatureStructureList(ussc.getSourceName(), featureSetList);
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) UIMASimpleServletClient(org.apache.stanbol.enhancer.engines.uimaremote.tools.UIMASimpleServletClient) FeatureStructureListHolder(org.apache.stanbol.commons.caslight.FeatureStructureListHolder) NoSuchPartException(org.apache.stanbol.enhancer.servicesapi.NoSuchPartException) IOException(java.io.IOException)

Example 4 with FeatureStructure

use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.

the class UIMAServletClient method getFSList.

/**
     * Queries the UIMA Simple Servlet and returns the FeatureSet list.
     * @param servletURI The uri of the servlet
     * @param sourceName The source name of this processor
     * @param types The TypeConfigMap for the generated FeatureSets
     * @param input The Sofa String
     * @return The generated FeatureSet list
     */
public List<FeatureStructure> getFSList(String servletURI, String sourceName, String input) {
    try {
        if (input == null) {
            logger.error("input (sofaString) is null!");
            return null;
        }
        // Construct data
        String data = URLEncoder.encode("text", "UTF-8") + "=" + URLEncoder.encode(input, "UTF-8");
        data += "&" + URLEncoder.encode("mode", "UTF-8") + "=" + URLEncoder.encode("xml", "UTF-8");
        // Send data
        URL url = new URL(servletURI);
        URLConnection conn = url.openConnection();
        conn.setDoOutput(true);
        OutputStreamWriter wr = new OutputStreamWriter(conn.getOutputStream());
        wr.write(data);
        wr.flush();
        // Get the response
        BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream()));
        XMLReader xr = XMLReaderFactory.createXMLReader();
        SaxUIMAServletResult2Offsets handler = new SaxUIMAServletResult2Offsets();
        handler.setSourceName(sourceName);
        xr.setContentHandler(handler);
        xr.setErrorHandler(handler);
        xr.parse(new InputSource(rd));
        wr.close();
        rd.close();
        List<FeatureStructure> fsList = handler.getFsList();
        for (FeatureStructure fs : fsList) {
            fs.setCoveredText(fs.getSofaChunk(input));
        }
        return fsList;
    } catch (SAXException ex) {
        logger.error("Error in UIMAClient", ex);
    } catch (IOException ex) {
        logger.error("Error in UIMAClient", ex);
    }
    return null;
}
Also used : InputSource(org.xml.sax.InputSource) InputStreamReader(java.io.InputStreamReader) IOException(java.io.IOException) URL(java.net.URL) URLConnection(java.net.URLConnection) SAXException(org.xml.sax.SAXException) FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) XMLReader(org.xml.sax.XMLReader)

Example 5 with FeatureStructure

use of org.apache.stanbol.commons.caslight.FeatureStructure in project stanbol by apache.

the class UIMALocal method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text;
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    JCas jcas;
    try {
        logger.info("Processing text with UIMA AE...");
        jcas = processText(text);
    } catch (ResourceInitializationException ex) {
        logger.error("Error initializing UIMA AE", ex);
        throw new EngineException("Error initializing UIMA AE", ex);
    } catch (AnalysisEngineProcessException ex) {
        logger.error("Error running UIMA AE", ex);
        throw new EngineException("Error running UIMA AE", ex);
    }
    //just for being sure
    if (jcas == null) {
        return;
    }
    for (String typeName : uimaTypeNames) {
        List<FeatureStructure> featureSetList = concertToCasLight(jcas, typeName);
        IRI uimaIRI = new IRI(uimaUri);
        FeatureStructureListHolder holder;
        ci.getLock().writeLock().lock();
        try {
            holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
        } catch (NoSuchPartException e) {
            holder = new FeatureStructureListHolder();
            logger.info("Adding FeatureSet List Holder content part with uri:" + uimaUri);
            ci.addPart(uimaIRI, holder);
            logger.info(uimaUri + " content part added.");
        } finally {
            ci.getLock().writeLock().unlock();
        }
        ci.getLock().writeLock().lock();
        try {
            holder.addFeatureStructureList(uimaSourceName, featureSetList);
        } finally {
            ci.getLock().writeLock().unlock();
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) JCas(org.apache.uima.jcas.JCas) NoSuchPartException(org.apache.stanbol.enhancer.servicesapi.NoSuchPartException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) FeatureStructureListHolder(org.apache.stanbol.commons.caslight.FeatureStructureListHolder)

Aggregations

FeatureStructure (org.apache.stanbol.commons.caslight.FeatureStructure)6 IOException (java.io.IOException)3 IRI (org.apache.clerezza.commons.rdf.IRI)3 Feature (org.apache.stanbol.commons.caslight.Feature)3 FeatureStructureListHolder (org.apache.stanbol.commons.caslight.FeatureStructureListHolder)3 NoSuchPartException (org.apache.stanbol.enhancer.servicesapi.NoSuchPartException)3 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)2 InvalidContentException (org.apache.stanbol.enhancer.servicesapi.InvalidContentException)2 BufferedReader (java.io.BufferedReader)1 InputStreamReader (java.io.InputStreamReader)1 OutputStreamWriter (java.io.OutputStreamWriter)1 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1 ArrayList (java.util.ArrayList)1 Graph (org.apache.clerezza.commons.rdf.Graph)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)1 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)1 UIMASimpleServletClient (org.apache.stanbol.enhancer.engines.uimaremote.tools.UIMASimpleServletClient)1 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)1