Search in sources :

Example 16 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class EntityhubImpl method importEntity.

/**
     * Imports the Entity
     * @param remoteEntity the Entity to import
     * @return the Entity created and stored within the Entityhub
     * @throws YardException
     */
protected Entity importEntity(Entity remoteEntity) throws YardException {
    if (remoteEntity == null) {
        return null;
    }
    Site site = siteManager.getSite(remoteEntity.getSite());
    if (site == null) {
        log.warn("Unable to import Entity {} because the ReferencedSite {} is currently not active -> return null", remoteEntity.getId(), remoteEntity.getSite());
        return null;
    }
    ValueFactory valueFactory = entityhubYard.getValueFactory();
    //Create the locally managed Entity
    Representation localRep = entityhubYard.create(constructResourceId(DEFAULT_MANAGED_ENTITY_PREFIX));
    Entity localEntity = loadEntity(localRep);
    importEntity(remoteEntity, site, localEntity, valueFactory);
    //Second create and init the Mapping
    Representation entityMappingRepresentation = entityhubYard.create(constructResourceId(DEFAULT_MAPPING_PREFIX));
    Entity entityMappingEntity = loadEntity(entityMappingRepresentation);
    establishMapping(localEntity, remoteEntity, site, entityMappingEntity);
    //Store the entity and the mappedEntity in the entityhubYard
    storeEntity(localEntity);
    storeEntity(entityMappingEntity);
    return localEntity;
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) ValueFactory(org.apache.stanbol.entityhub.servicesapi.model.ValueFactory)

Example 17 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class TopicClassificationEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (!(acceptedLanguageSet.isEmpty() || acceptedLanguageSet.contains(language) || acceptedLanguageSet.contains(""))) {
        throw new IllegalStateException("The language '" + language + "' of the ContentItem is not configured as " + " active for this Engine (active: " + acceptedLanguageSet + ").");
    }
    String text;
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(String.format("Unable to extract " + " textual content from ContentPart %s of ContentItem %s!", contentPart.getKey(), ci.getUri()), e);
    }
    if (text.trim().isEmpty()) {
        log.warn("ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from", contentPart.getKey(), ci.getUri());
        return;
    }
    Graph metadata = ci.getMetadata();
    List<TopicSuggestion> topics;
    try {
        topics = suggestTopics(text);
        if (topics.isEmpty()) {
            return;
        }
    } catch (ClassifierException e) {
        throw new EngineException(e);
    }
    IRI precision = new IRI(NamespaceEnum.fise + "classifier/precision");
    IRI recall = new IRI(NamespaceEnum.fise + "classifier/recall");
    IRI f1 = new IRI(NamespaceEnum.fise + "classifier/f1");
    LiteralFactory lf = LiteralFactory.getInstance();
    ci.getLock().writeLock().lock();
    try {
        // Global text annotation to attach all the topic annotation to it.
        IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
        metadata.add(new TripleImpl(textAnnotation, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
        for (TopicSuggestion topic : topics) {
            IRI enhancement = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TOPICANNOTATION));
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION, textAnnotation));
            // add link to entity
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE, new IRI(topic.conceptUri)));
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
            // add confidence information
            metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(topic.score))));
            // add performance estimates of the classifier if available
            ClassificationReport perf = getPerformanceEstimates(topic.conceptUri);
            if (perf.uptodate) {
                metadata.add(new TripleImpl(enhancement, precision, lf.createTypedLiteral(Double.valueOf(perf.precision))));
                metadata.add(new TripleImpl(enhancement, recall, lf.createTypedLiteral(Double.valueOf(perf.recall))));
                metadata.add(new TripleImpl(enhancement, f1, lf.createTypedLiteral(Double.valueOf(perf.f1))));
            }
            // fetch concept label from the entityhub or a referenced site if available
            Entity entity = entityhub.getEntity(topic.conceptUri);
            if (entity == null) {
                entity = referencedSiteManager.getEntity(topic.conceptUri);
            }
            if (entity != null) {
                Representation representation = entity.getRepresentation();
                // TODO: extract all languages based on some configuration instead of hardcoding English
                Text label = representation.getFirst(NamespaceEnum.skos + "prefLabel", "en", "en-US", "en-GB");
                if (label == null) {
                    label = representation.getFirst(NamespaceEnum.rdfs + "label", "en", "en-US", "en-GB");
                }
                if (label != null) {
                    metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(label.getText())));
                }
            }
        }
    } catch (ClassifierException e) {
        throw new EngineException(e);
    } catch (IllegalArgumentException e) {
        throw new EngineException(e);
    } catch (EntityhubException e) {
        throw new EngineException(e);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) IOException(java.io.IOException) TopicSuggestion(org.apache.stanbol.enhancer.topic.api.TopicSuggestion) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ClassificationReport(org.apache.stanbol.enhancer.topic.api.ClassificationReport) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 18 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class RepresentationReader method readFrom.

@Override
public Map<String, Representation> readFrom(Class<Map<String, Representation>> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
    log.info("Read Representations from Request Data");
    long start = System.currentTimeMillis();
    //(1) get the charset and the acceptedMediaType
    String charset = "UTF-8";
    if (mediaType.getParameters().containsKey("charset")) {
        charset = mediaType.getParameters().get("charset");
    }
    MediaType acceptedMediaType = getAcceptedMediaType(httpHeaders);
    log.info("readFrom: mediaType {} | accepted {} | charset {}", new Object[] { mediaType, acceptedMediaType, charset });
    // (2) read the Content from the request (this needs to deal with  
    //    MediaType.APPLICATION_FORM_URLENCODED_TYPE and 
    //    MediaType.MULTIPART_FORM_DATA_TYPE requests!
    RequestData content;
    if (mediaType.isCompatible(MediaType.APPLICATION_FORM_URLENCODED_TYPE)) {
        try {
            content = MessageBodyReaderUtils.formForm(entityStream, charset, "encoding", Arrays.asList("entity", "content"));
        } catch (IllegalArgumentException e) {
            log.info("Bad Request: {}", e);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(e.toString()).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
        if (content.getMediaType() == null) {
            String message = String.format("Missing parameter %s used to specify the media type" + "(supported values: %s", "encoding", supportedMediaTypes);
            log.info("Bad Request: {}", message);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
        if (!isSupported(content.getMediaType())) {
            String message = String.format("Unsupported Content-Type specified by parameter " + "encoding=%s (supported: %s)", content.getMediaType().toString(), supportedMediaTypes);
            log.info("Bad Request: {}", message);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
    } else if (mediaType.isCompatible(MediaType.MULTIPART_FORM_DATA_TYPE)) {
        log.info("read from MimeMultipart");
        List<RequestData> contents;
        try {
            contents = MessageBodyReaderUtils.fromMultipart(entityStream, mediaType);
        } catch (IllegalArgumentException e) {
            log.info("Bad Request: {}", e.toString());
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(e.toString()).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
        if (contents.isEmpty()) {
            String message = "Request does not contain any Mime BodyParts.";
            log.info("Bad Request: {}", message);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        } else if (contents.size() > 1) {
            //print warnings about ignored parts
            log.warn("{} Request contains more than one Parts: others than " + "the first will be ignored", MediaType.MULTIPART_FORM_DATA_TYPE);
            for (int i = 1; i < contents.size(); i++) {
                RequestData ignored = contents.get(i);
                log.warn("  ignore Content {}: Name {}| MediaType {}", new Object[] { i + 1, ignored.getName(), ignored.getMediaType() });
            }
        }
        content = contents.get(0);
        if (content.getMediaType() == null) {
            String message = String.format("MediaType not specified for mime body part for file %s. " + "The media type must be one of the supported values: %s", content.getName(), supportedMediaTypes);
            log.info("Bad Request: {}", message);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
        if (!isSupported(content.getMediaType())) {
            String message = String.format("Unsupported Content-Type %s specified for mime body part " + "for file %s (supported: %s)", content.getMediaType(), content.getName(), supportedMediaTypes);
            log.info("Bad Request: {}", message);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
    } else {
        content = new RequestData(mediaType, null, entityStream);
    }
    long readingCompleted = System.currentTimeMillis();
    log.info("   ... reading request data {}ms", readingCompleted - start);
    Map<String, Representation> parsed = parseFromContent(content, acceptedMediaType);
    long parsingCompleted = System.currentTimeMillis();
    log.info("   ... parsing data {}ms", parsingCompleted - readingCompleted);
    return parsed;
}
Also used : WebApplicationException(javax.ws.rs.WebApplicationException) RequestData(org.apache.stanbol.entityhub.jersey.utils.MessageBodyReaderUtils.RequestData) MediaType(javax.ws.rs.core.MediaType) List(java.util.List) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation)

Example 19 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class RepresentationReader method parseFromContent.

public Map<String, Representation> parseFromContent(RequestData content, MediaType acceptedMediaType) {
    // (3) Parse the Representtion(s) form the entity stream
    if (content.getMediaType().isCompatible(MediaType.APPLICATION_JSON_TYPE)) {
        //parse from json
        throw new UnsupportedOperationException("Parsing of JSON not yet implemented :(");
    } else if (isSupported(content.getMediaType())) {
        //from RDF serialisation
        RdfValueFactory valueFactory = RdfValueFactory.getInstance();
        Map<String, Representation> representations = new HashMap<String, Representation>();
        Set<BlankNodeOrIRI> processed = new HashSet<BlankNodeOrIRI>();
        Graph graph = new IndexedGraph();
        try {
            parser.parse(graph, content.getEntityStream(), content.getMediaType().toString());
        } catch (UnsupportedParsingFormatException e) {
            //String acceptedMediaType = httpHeaders.getFirst("Accept");
            //throw an internal server Error, because we check in
            //isReadable(..) for supported types and still we get here a
            //unsupported format -> therefore it looks like an configuration
            //error the server (e.g. a missing Bundle with the required bundle)
            String message = "Unable to create the Parser for the supported format" + content.getMediaType() + " (" + e + ")";
            log.error(message, e);
            throw new WebApplicationException(Response.status(Status.INTERNAL_SERVER_ERROR).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        } catch (RuntimeException e) {
            //NOTE: Clerezza seams not to provide specific exceptions on
            //      parsing errors. Hence the catch for all RuntimeException
            String message = "Unable to parse the provided RDF data (format: " + content.getMediaType() + ", message: " + e.getMessage() + ")";
            log.error(message, e);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
        for (Iterator<Triple> st = graph.iterator(); st.hasNext(); ) {
            BlankNodeOrIRI resource = st.next().getSubject();
            if (resource instanceof IRI && processed.add(resource)) {
                //build a new representation
                representations.put(((IRI) resource).getUnicodeString(), valueFactory.createRdfRepresentation((IRI) resource, graph));
            }
        }
        return representations;
    } else {
        //unsupported media type
        String message = String.format("Parsed Content-Type '%s' is not one of the supported %s", content.getMediaType(), supportedMediaTypes);
        log.info("Bad Request: {}", message);
        throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashSet(java.util.HashSet) Set(java.util.Set) WebApplicationException(javax.ws.rs.WebApplicationException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Iterator(java.util.Iterator) RdfValueFactory(org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) HashMap(java.util.HashMap) Map(java.util.Map) MultivaluedMap(javax.ws.rs.core.MultivaluedMap) UnsupportedParsingFormatException(org.apache.clerezza.rdf.core.serializedform.UnsupportedParsingFormatException)

Example 20 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class ResultListWriter method writeTo.

@Override
public void writeTo(QueryResultList<?> list, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
    //check for wildcard
    if (mediaType.isWildcardType() && mediaType.isWildcardSubtype()) {
        mediaType = ModelWriter.DEFAULT_MEDIA_TYPE;
    }
    String charset = mediaType.getParameters().get("charset");
    if (charset == null) {
        charset = ModelWriter.DEFAULT_CHARSET;
        mediaType = mediaType.withCharset(charset);
        httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, mediaType.toString());
    }
    Class<? extends Representation> nativeType;
    if (list.isEmpty()) {
        //for empty lists
        //the type does not matter
        nativeType = null;
    } else if (Representation.class.isAssignableFrom(list.getType())) {
        nativeType = ((Representation) list.iterator().next()).getClass();
    } else if (Entity.class.isAssignableFrom(list.getType())) {
        nativeType = ((Entity) list.iterator().next()).getRepresentation().getClass();
    } else {
        //only a list of string ids
        nativeType = null;
    }
    Iterator<ServiceReference> refs = writerRegistry.getModelWriters(getMatchType(mediaType), nativeType).iterator();
    ModelWriter writer = null;
    MediaType selectedMediaType = null;
    while ((writer == null || selectedMediaType == null) && refs.hasNext()) {
        writer = writerRegistry.getService(refs.next());
        if (writer != null) {
            if (mediaType.isWildcardType() || mediaType.isWildcardSubtype()) {
                selectedMediaType = writer.getBestMediaType(mediaType);
            } else {
                selectedMediaType = mediaType;
            }
        }
    }
    selectedMediaType = selectedMediaType.withCharset(charset);
    httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, mediaType.toString());
    if (writer == null || selectedMediaType == null) {
        throw new WebApplicationException("Unable to serialize ResultList with " + list.getType() + " (nativeType: " + nativeType + ") to " + mediaType);
    }
    log.debug("serialize ResultList of {} with ModelWriter {}", nativeType, writer.getClass().getName());
    writer.write(list, entityStream, selectedMediaType);
}
Also used : Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) WebApplicationException(javax.ws.rs.WebApplicationException) MediaType(javax.ws.rs.core.MediaType) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) ModelWriter(org.apache.stanbol.entityhub.web.ModelWriter) ServiceReference(org.osgi.framework.ServiceReference)

Aggregations

Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)198 Test (org.junit.Test)117 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)32 HashSet (java.util.HashSet)31 Yard (org.apache.stanbol.entityhub.servicesapi.yard.Yard)25 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)16 YardException (org.apache.stanbol.entityhub.servicesapi.yard.YardException)15 ValueFactory (org.apache.stanbol.entityhub.servicesapi.model.ValueFactory)14 Reference (org.apache.stanbol.entityhub.servicesapi.model.Reference)12 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)12 ArrayList (java.util.ArrayList)11 RdfRepresentation (org.apache.stanbol.entityhub.model.sesame.RdfRepresentation)10 IOException (java.io.IOException)9 IRI (org.apache.clerezza.commons.rdf.IRI)9 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)8 Graph (org.apache.clerezza.commons.rdf.Graph)8 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)8 RdfRepresentation (org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation)8 RdfValueFactory (org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory)8 EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)8