Search in sources :

Example 1 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class TopicClassificationEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (!(acceptedLanguageSet.isEmpty() || acceptedLanguageSet.contains(language) || acceptedLanguageSet.contains(""))) {
        throw new IllegalStateException("The language '" + language + "' of the ContentItem is not configured as " + " active for this Engine (active: " + acceptedLanguageSet + ").");
    }
    String text;
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(String.format("Unable to extract " + " textual content from ContentPart %s of ContentItem %s!", contentPart.getKey(), ci.getUri()), e);
    }
    if (text.trim().isEmpty()) {
        log.warn("ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from", contentPart.getKey(), ci.getUri());
        return;
    }
    Graph metadata = ci.getMetadata();
    List<TopicSuggestion> topics;
    try {
        topics = suggestTopics(text);
        if (topics.isEmpty()) {
            return;
        }
    } catch (ClassifierException e) {
        throw new EngineException(e);
    }
    IRI precision = new IRI(NamespaceEnum.fise + "classifier/precision");
    IRI recall = new IRI(NamespaceEnum.fise + "classifier/recall");
    IRI f1 = new IRI(NamespaceEnum.fise + "classifier/f1");
    LiteralFactory lf = LiteralFactory.getInstance();
    ci.getLock().writeLock().lock();
    try {
        // Global text annotation to attach all the topic annotation to it.
        IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
        metadata.add(new TripleImpl(textAnnotation, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
        for (TopicSuggestion topic : topics) {
            IRI enhancement = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TOPICANNOTATION));
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION, textAnnotation));
            // add link to entity
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE, new IRI(topic.conceptUri)));
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
            // add confidence information
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(topic.score))));
            // add performance estimates of the classifier if available
            ClassificationReport perf = getPerformanceEstimates(topic.conceptUri);
            if (perf.uptodate) {
                metadata.add(new TripleImpl(enhancement, precision, lf.createTypedLiteral(Double.valueOf(perf.precision))));
                metadata.add(new TripleImpl(enhancement, recall, lf.createTypedLiteral(Double.valueOf(perf.recall))));
                metadata.add(new TripleImpl(enhancement, f1, lf.createTypedLiteral(Double.valueOf(perf.f1))));
            }
            // fetch concept label from the entityhub or a referenced site if available
            Entity entity = entityhub.getEntity(topic.conceptUri);
            if (entity == null) {
                entity = referencedSiteManager.getEntity(topic.conceptUri);
            }
            if (entity != null) {
                Representation representation = entity.getRepresentation();
                // TODO: extract all languages based on some configuration instead of hardcoding English
                Text label = representation.getFirst(NamespaceEnum.skos + "prefLabel", "en", "en-US", "en-GB");
                if (label == null) {
                    label = representation.getFirst(NamespaceEnum.rdfs + "label", "en", "en-US", "en-GB");
                }
                if (label != null) {
                    metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(label.getText())));
                }
            }
        }
    } catch (ClassifierException e) {
        throw new EngineException(e);
    } catch (IllegalArgumentException e) {
        throw new EngineException(e);
    } catch (EntityhubException e) {
        throw new EngineException(e);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) IOException(java.io.IOException) TopicSuggestion(org.apache.stanbol.enhancer.topic.api.TopicSuggestion) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ClassificationReport(org.apache.stanbol.enhancer.topic.api.ClassificationReport) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 2 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class UIMAToTriples method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    FeatureStructureListHolder holder;
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    try {
        IRI uimaIRI = new IRI(uimaUri);
        logger.info(new StringBuilder("Trying to load holder for ref:").append(uimaUri).toString());
        holder = ci.getPart(uimaIRI, FeatureStructureListHolder.class);
        for (String source : sourceNames) {
            logger.info(new StringBuilder("Processing UIMA source:").append(source).toString());
            List<FeatureStructure> sourceList = holder.getFeatureStructureList(source);
            if (sourceList != null) {
                logger.info(new StringBuilder("UIMA source:").append(source).append(" contains ").append(sourceList.size()).append(" annotations.").toString());
            } else {
                logger.info(new StringBuilder("Source list is null:").append(source).toString());
                continue;
            }
            for (FeatureStructure fs : sourceList) {
                String typeName = fs.getTypeName();
                logger.debug(new StringBuilder("Checking ").append(typeName).toString());
                if (tnfs.checkFeatureStructureAllowed(typeName, fs.getFeatures())) {
                    logger.debug(new StringBuilder("Adding ").append(typeName).toString());
                    IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                    Graph metadata = ci.getMetadata();
                    String uriRefStr = uimaUri + ":" + typeName;
                    if (mappings.containsKey(typeName)) {
                        uriRefStr = mappings.get(typeName);
                    }
                    metadata.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(uriRefStr)));
                    if (fs.getFeature("begin") != null) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(fs.getFeature("begin").getValueAsInteger())));
                    }
                    if (fs.getFeature("end") != null) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(fs.getFeature("end").getValueAsInteger())));
                    }
                    if (fs.getCoveredText() != null && !fs.getCoveredText().isEmpty()) {
                        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(fs.getCoveredText())));
                    }
                    for (Feature f : fs.getFeatures()) {
                        if (!f.getName().equals("begin") && !f.getName().equals("end") && tnfs.checkFeatureToConvert(typeName, f)) {
                            String predRefStr = uimaUri + ":" + f.getName();
                            if (mappings.containsKey(f.getName())) {
                                predRefStr = mappings.get(f.getName());
                            }
                            IRI predicate = new IRI(predRefStr);
                            metadata.add(new TripleImpl(textAnnotation, predicate, new PlainLiteralImpl(f.getValueAsString())));
                        }
                    }
                }
            }
        }
    } catch (NoSuchPartException e) {
        logger.error(new StringBuilder("No UIMA results found with ref:").append(uimaUri).toString(), e);
    }
}
Also used : FeatureStructure(org.apache.stanbol.commons.caslight.FeatureStructure) IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) FeatureStructureListHolder(org.apache.stanbol.commons.caslight.FeatureStructureListHolder) NoSuchPartException(org.apache.stanbol.enhancer.servicesapi.NoSuchPartException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Feature(org.apache.stanbol.commons.caslight.Feature) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 3 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class CeliSentimentAnalysisEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (language == null) {
        throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    // used for the palin literals in TextAnnotations
    Language lang = new Language(language);
    try {
        List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language);
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        Graph g = ci.getMetadata();
        for (SentimentExpression se : lista) {
            try {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                // add selected text as PlainLiteral in the language extracted from the text
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(), lang)));
                g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION));
                if (se.getStartSnippet() != null && se.getEndSnippet() != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang)));
                    g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue())));
                }
            } catch (NoConvertorException e) {
                log.error(e.getMessage(), e);
            }
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 4 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class CeliNamedEntityExtractionEnhancementEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (language == null) {
        throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    // used for the palin literals in TextAnnotations
    Language lang = new Language(language);
    try {
        List<NamedEntity> lista = this.client.extractEntities(text, language);
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        Graph g = ci.getMetadata();
        for (NamedEntity ne : lista) {
            try {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                // add selected text as PlainLiteral in the language extracted from the text
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(ne.getFormKind(), lang)));
                g.add(new TripleImpl(textAnnotation, DC_TYPE, getEntityRefForType(ne.type)));
                if (ne.getFrom() != null && ne.getTo() != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(ne.getFrom().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(ne.getTo().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, ne.getFormKind(), ne.getFrom().intValue()), lang)));
                }
            } catch (NoConvertorException e) {
                log.error(e.getMessage(), e);
            }
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI NER (Named Entity Recognition)" + " service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI NER (Named Entity Recognition) service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 5 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class ContentItemResource method getPlacesAsJSON.

/**
 * @return an RDF/JSON descriptions of places for the word map widget
 */
public String getPlacesAsJSON() throws ParseException, UnsupportedEncodingException {
    Graph g = new IndexedGraph();
    LiteralFactory lf = LiteralFactory.getInstance();
    Graph metadata = contentItem.getMetadata();
    for (EntityExtractionSummary p : getPlaceOccurrences()) {
        EntitySuggestion bestGuess = p.getBestGuess();
        if (bestGuess == null) {
            continue;
        }
        IRI uri = new IRI(bestGuess.getUri());
        Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
        if (latitudes.hasNext()) {
            g.add(latitudes.next());
        }
        Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
        if (longitutes.hasNext()) {
            g.add(longitutes.next());
            g.add(new TripleImpl(uri, Properties.RDFS_LABEL, lf.createTypedLiteral(bestGuess.getLabel())));
        }
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    serializer.serialize(out, g, SupportedFormat.RDF_JSON);
    String rdfString = out.toString("utf-8");
    return rdfString;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ByteArrayOutputStream(java.io.ByteArrayOutputStream) EnhancementEngineHelper.getString(org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Aggregations

LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)24 IRI (org.apache.clerezza.commons.rdf.IRI)22 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)20 Graph (org.apache.clerezza.commons.rdf.Graph)15 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)10 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)9 Language (org.apache.clerezza.commons.rdf.Language)8 Triple (org.apache.clerezza.commons.rdf.Triple)7 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)5 Literal (org.apache.clerezza.commons.rdf.Literal)4 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)4 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)4 ArrayList (java.util.ArrayList)3 Date (java.util.Date)3 Map (java.util.Map)3 SOAPException (javax.xml.soap.SOAPException)3 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)3