Search in sources :

Example 1 with EntityhubException

use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.

the class TopicClassificationEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (!(acceptedLanguageSet.isEmpty() || acceptedLanguageSet.contains(language) || acceptedLanguageSet.contains(""))) {
        throw new IllegalStateException("The language '" + language + "' of the ContentItem is not configured as " + " active for this Engine (active: " + acceptedLanguageSet + ").");
    }
    String text;
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(String.format("Unable to extract " + " textual content from ContentPart %s of ContentItem %s!", contentPart.getKey(), ci.getUri()), e);
    }
    if (text.trim().isEmpty()) {
        log.warn("ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from", contentPart.getKey(), ci.getUri());
        return;
    }
    Graph metadata = ci.getMetadata();
    List<TopicSuggestion> topics;
    try {
        topics = suggestTopics(text);
        if (topics.isEmpty()) {
            return;
        }
    } catch (ClassifierException e) {
        throw new EngineException(e);
    }
    IRI precision = new IRI(NamespaceEnum.fise + "classifier/precision");
    IRI recall = new IRI(NamespaceEnum.fise + "classifier/recall");
    IRI f1 = new IRI(NamespaceEnum.fise + "classifier/f1");
    LiteralFactory lf = LiteralFactory.getInstance();
    ci.getLock().writeLock().lock();
    try {
        // Global text annotation to attach all the topic annotation to it.
        IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
        metadata.add(new TripleImpl(textAnnotation, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
        for (TopicSuggestion topic : topics) {
            IRI enhancement = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TOPICANNOTATION));
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION, textAnnotation));
            // add link to entity
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE, new IRI(topic.conceptUri)));
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
            // add confidence information
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(topic.score))));
            // add performance estimates of the classifier if available
            ClassificationReport perf = getPerformanceEstimates(topic.conceptUri);
            if (perf.uptodate) {
                metadata.add(new TripleImpl(enhancement, precision, lf.createTypedLiteral(Double.valueOf(perf.precision))));
                metadata.add(new TripleImpl(enhancement, recall, lf.createTypedLiteral(Double.valueOf(perf.recall))));
                metadata.add(new TripleImpl(enhancement, f1, lf.createTypedLiteral(Double.valueOf(perf.f1))));
            }
            // fetch concept label from the entityhub or a referenced site if available
            Entity entity = entityhub.getEntity(topic.conceptUri);
            if (entity == null) {
                entity = referencedSiteManager.getEntity(topic.conceptUri);
            }
            if (entity != null) {
                Representation representation = entity.getRepresentation();
                // TODO: extract all languages based on some configuration instead of hardcoding English
                Text label = representation.getFirst(NamespaceEnum.skos + "prefLabel", "en", "en-US", "en-GB");
                if (label == null) {
                    label = representation.getFirst(NamespaceEnum.rdfs + "label", "en", "en-US", "en-GB");
                }
                if (label != null) {
                    metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(label.getText())));
                }
            }
        }
    } catch (ClassifierException e) {
        throw new EngineException(e);
    } catch (IllegalArgumentException e) {
        throw new EngineException(e);
    } catch (EntityhubException e) {
        throw new EngineException(e);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) IOException(java.io.IOException) TopicSuggestion(org.apache.stanbol.enhancer.topic.api.TopicSuggestion) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ClassificationReport(org.apache.stanbol.enhancer.topic.api.ClassificationReport) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 2 with EntityhubException

use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.

the class AbstractBackend method listSubjects.

@Override
public Collection<Object> listSubjects(Object property, Object object) {
    FieldQuery query = createQuery();
    if (this.isURI(object)) {
        query.setConstraint(property.toString(), new ReferenceConstraint(object.toString()));
    } else if (object instanceof Text) {
        Text text = (Text) object;
        TextConstraint constraint;
        if (text.getLanguage() == null) {
            constraint = new TextConstraint(text.getText(), PatternType.none, true);
        } else {
            constraint = new TextConstraint(text.getText(), PatternType.none, true, text.getLanguage());
        }
        query.setConstraint(property.toString(), constraint);
    } else {
        Set<DataTypeEnum> dataTypes = DataTypeEnum.getPrimaryDataTypes(object.getClass());
        if (dataTypes == null || dataTypes.isEmpty()) {
            query.setConstraint(property.toString(), new ValueConstraint(object));
        } else {
            Collection<String> types = new ArrayList<String>(dataTypes.size());
            for (DataTypeEnum type : dataTypes) {
                types.add(type.getUri());
            }
            query.setConstraint(property.toString(), new ValueConstraint(object, types));
        }
    }
    query.setLimit(Integer.valueOf(DEFAULT_MAX_SELECT));
    QueryResultList<String> results;
    try {
        results = query(query);
    } catch (EntityhubException e) {
        throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
    }
    Collection<Object> references;
    if (results.isEmpty()) {
        references = Collections.emptySet();
    } else if (results.size() == 1) {
        //assuming that a single result is a likely case
        references = Collections.singleton((Object) getValueFactory().createReference(results.iterator().next()));
    } else {
        int offset = 0;
        references = new HashSet<Object>(results.size());
        for (String result : results) {
            references.add(getValueFactory().createReference(result));
        }
        while (results.size() >= DEFAULT_MAX_SELECT && references.size() <= DEFAULT_MAX_RESULTS - DEFAULT_MAX_SELECT) {
            offset = offset + results.size();
            query.setOffset(offset);
            try {
                results = query(query);
            } catch (EntityhubException e) {
                throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
            }
            for (String result : results) {
                references.add(getValueFactory().createReference(result));
            }
        }
    }
    return references;
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) DataTypeEnum(org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 3 with EntityhubException

use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.

the class TrackingDereferencerBase method dereference.

@Override
public final boolean dereference(IRI uri, Graph graph, Lock writeLock, DereferenceContext dc) throws DereferenceException {
    T service = getService();
    if (service == null) {
        throw new DereferenceException(uri, serviceClass.getClass().getSimpleName() + "service is currently not available");
    }
    EntityhubDereferenceContext derefContext = (EntityhubDereferenceContext) dc;
    Representation rep;
    try {
        rep = getRepresentation(service, uri.getUnicodeString(), derefContext.isOfflineMode());
    } catch (EntityhubException e) {
        throw new DereferenceException(uri, e);
    }
    //we need the languages as strings
    final Set<String> langs = derefContext.getLanguages();
    final FieldMapper fieldMapper = derefContext.getFieldMapper();
    final Program<Object> ldpathProgram = derefContext.getProgram();
    if (rep != null) {
        if (fieldMapper == null && ldpathProgram == null && (langs == null || langs.isEmpty())) {
            copyAll(uri, rep, graph, writeLock);
        } else {
            //we need to apply some filters while dereferencing
            if (fieldMapper != null || (langs != null && !langs.isEmpty())) {
                //this considers speficied fields and included languages
                copyMapped(uri, rep, fieldMapper, langs, graph, writeLock);
            }
            if (ldpathProgram != null) {
                //this executes LDPath statements
                copyLdPath(uri, getRdfBackend(service), ldpathProgram, langs, graph, writeLock);
            }
        }
        return true;
    } else {
        return false;
    }
}
Also used : DereferenceException(org.apache.stanbol.enhancer.engines.dereference.DereferenceException) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) RdfRepresentation(org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation) FieldMapper(org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper)

Example 4 with EntityhubException

use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.

the class TrackingDereferencerBase method copyLdPath.

/**
     * Executes the {@link #ldpathProgram} using the parsed URI as context and
     * writes the the results to the parsed ImmutableGraph
     * @param uri the context
     * @param rdfBackend the RdfBackend the LDPath program is executed on
     * @param ldpathProgram The {@link Program} parsed via the dereference context
     * @param langs the set of languages to dereference
     * @param graph the graph to store the results
     * @param writeLock the write lock for the graph
     * @throws DereferenceException on any {@link EntityhubException} while
     * executing the LDPath program
     */
private void copyLdPath(IRI uri, RDFBackend<Object> rdfBackend, Program<Object> ldpathProgram, Set<String> langs, Graph graph, Lock writeLock) throws DereferenceException {
    //A RdfReference needs to be used as context
    RdfReference context = valueFactory.createReference(uri);
    //create the representation that stores results in an intermediate
    //graph (we do not want partial results on an error
    Graph ldPathResults = new SimpleGraph();
    RdfRepresentation result = valueFactory.createRdfRepresentation(uri, ldPathResults);
    //execute the LDPath Program and write results to the RDF ImmutableGraph
    try {
        for (org.apache.marmotta.ldpath.model.fields.FieldMapping<?, Object> mapping : ldpathProgram.getFields()) {
            Collection<?> values;
            try {
                values = mapping.getValues(rdfBackend, context);
            } catch (RuntimeException e) {
                throw new DereferenceException(uri, e);
            }
            if (values != null && !values.isEmpty()) {
                String fieldName = mapping.getFieldName();
                if (langs == null || langs.isEmpty()) {
                    result.add(fieldName, values);
                } else {
                    //filter for languages
                    for (Object value : values) {
                        if ((!(value instanceof Text)) || langs.contains(((Text) value).getLanguage())) {
                            result.add(fieldName, value);
                        }
                    //else text with filtered language ... do not add
                    }
                }
            }
        }
    } catch (EntityhubException e) {
        throw new DereferenceException(uri, e);
    }
    if (log.isTraceEnabled()) {
        log.trace("dereferenced via LDPath {}", ModelUtils.getRepresentationInfo(result));
    }
    if (!ldPathResults.isEmpty()) {
        //copy the results
        writeLock.lock();
        try {
            graph.addAll(ldPathResults);
        } finally {
            writeLock.unlock();
        }
    }
}
Also used : Text(org.apache.stanbol.entityhub.servicesapi.model.Text) DereferenceException(org.apache.stanbol.enhancer.engines.dereference.DereferenceException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) RdfRepresentation(org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) RdfReference(org.apache.stanbol.entityhub.model.clerezza.RdfReference)

Example 5 with EntityhubException

use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.

the class EntityhubRootResource method getMapping.

@GET
@Path("mapping")
@Produces({ APPLICATION_JSON, RDF_XML, N3, TURTLE, X_TURTLE, RDF_JSON, N_TRIPLE, TEXT_HTML })
public Response getMapping(@QueryParam("id") String reference, @Context HttpHeaders headers) throws WebApplicationException {
    log.debug("get mapping for request > id : {} > accept: {}", reference, headers.getAcceptableMediaTypes());
    Set<String> supported = new HashSet<String>(JerseyUtils.REPRESENTATION_SUPPORTED_MEDIA_TYPES);
    supported.add(TEXT_HTML);
    MediaType acceptedMediaType = getAcceptableMediaType(headers, supported, APPLICATION_JSON_TYPE);
    if (reference == null || reference.isEmpty()) {
        //if HTML -> print the docu of the restfull service
        if (TEXT_HTML_TYPE.isCompatible(acceptedMediaType)) {
            ResponseBuilder rb = Response.ok(new Viewable("mapping", this));
            rb.header(HttpHeaders.CONTENT_TYPE, TEXT_HTML + "; charset=utf-8");
        //addCORSOrigin(servletContext, rb, headers);
        } else {
            return Response.status(Status.BAD_REQUEST).entity("The mapping id (URI) is missing.\n").header(HttpHeaders.ACCEPT, acceptedMediaType).build();
        }
    }
    //Entityhub entityhub = ContextHelper.getServiceFromContext(Entityhub.class, servletContext);
    Entity mapping;
    try {
        mapping = entityhub.getMappingById(reference);
    } catch (EntityhubException e) {
        log.error("error while getting the mapping for {}", reference, e);
        return Response.status(Status.INTERNAL_SERVER_ERROR).build();
    }
    if (mapping == null) {
        return Response.status(Status.NOT_FOUND).entity("No mapping found for '" + reference + "'.\n").header(HttpHeaders.ACCEPT, acceptedMediaType).build();
    } else {
        ResponseBuilder rb = Response.ok(mapping);
        rb.header(HttpHeaders.CONTENT_TYPE, acceptedMediaType + "; charset=utf-8");
        //addCORSOrigin(servletContext, rb, headers);
        return rb.build();
    }
}
Also used : Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Viewable(org.apache.stanbol.commons.web.viewable.Viewable) MediaType(javax.ws.rs.core.MediaType) MediaTypeUtil.getAcceptableMediaType(org.apache.stanbol.commons.web.base.utils.MediaTypeUtil.getAcceptableMediaType) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) ResponseBuilder(javax.ws.rs.core.Response.ResponseBuilder) HashSet(java.util.HashSet) Path(javax.ws.rs.Path) EntityhubLDPath(org.apache.stanbol.entityhub.ldpath.EntityhubLDPath) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Aggregations

EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)19 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)10 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)9 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)8 MediaType (javax.ws.rs.core.MediaType)7 MediaTypeUtil.getAcceptableMediaType (org.apache.stanbol.commons.web.base.utils.MediaTypeUtil.getAcceptableMediaType)7 EntityhubLDPath (org.apache.stanbol.entityhub.ldpath.EntityhubLDPath)7 HashSet (java.util.HashSet)6 GET (javax.ws.rs.GET)6 Path (javax.ws.rs.Path)6 Viewable (org.apache.stanbol.commons.web.viewable.Viewable)6 Produces (javax.ws.rs.Produces)5 WebApplicationException (javax.ws.rs.WebApplicationException)5 Graph (org.apache.clerezza.commons.rdf.Graph)3 Entityhub (org.apache.stanbol.entityhub.servicesapi.Entityhub)3 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)3 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 IRI (org.apache.clerezza.commons.rdf.IRI)2