use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class EntityhubImpl method importEntity.
/**
* Imports the Entity
* @param remoteEntity the Entity to import
* @return the Entity created and stored within the Entityhub
* @throws YardException
*/
protected Entity importEntity(Entity remoteEntity) throws YardException {
if (remoteEntity == null) {
return null;
}
Site site = siteManager.getSite(remoteEntity.getSite());
if (site == null) {
log.warn("Unable to import Entity {} because the ReferencedSite {} is currently not active -> return null", remoteEntity.getId(), remoteEntity.getSite());
return null;
}
ValueFactory valueFactory = entityhubYard.getValueFactory();
//Create the locally managed Entity
Representation localRep = entityhubYard.create(constructResourceId(DEFAULT_MANAGED_ENTITY_PREFIX));
Entity localEntity = loadEntity(localRep);
importEntity(remoteEntity, site, localEntity, valueFactory);
//Second create and init the Mapping
Representation entityMappingRepresentation = entityhubYard.create(constructResourceId(DEFAULT_MAPPING_PREFIX));
Entity entityMappingEntity = loadEntity(entityMappingRepresentation);
establishMapping(localEntity, remoteEntity, site, entityMappingEntity);
//Store the entity and the mappedEntity in the entityhubYard
storeEntity(localEntity);
storeEntity(entityMappingEntity);
return localEntity;
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class TopicClassificationEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (!(acceptedLanguageSet.isEmpty() || acceptedLanguageSet.contains(language) || acceptedLanguageSet.contains(""))) {
throw new IllegalStateException("The language '" + language + "' of the ContentItem is not configured as " + " active for this Engine (active: " + acceptedLanguageSet + ").");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(String.format("Unable to extract " + " textual content from ContentPart %s of ContentItem %s!", contentPart.getKey(), ci.getUri()), e);
}
if (text.trim().isEmpty()) {
log.warn("ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from", contentPart.getKey(), ci.getUri());
return;
}
Graph metadata = ci.getMetadata();
List<TopicSuggestion> topics;
try {
topics = suggestTopics(text);
if (topics.isEmpty()) {
return;
}
} catch (ClassifierException e) {
throw new EngineException(e);
}
IRI precision = new IRI(NamespaceEnum.fise + "classifier/precision");
IRI recall = new IRI(NamespaceEnum.fise + "classifier/recall");
IRI f1 = new IRI(NamespaceEnum.fise + "classifier/f1");
LiteralFactory lf = LiteralFactory.getInstance();
ci.getLock().writeLock().lock();
try {
// Global text annotation to attach all the topic annotation to it.
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textAnnotation, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
for (TopicSuggestion topic : topics) {
IRI enhancement = EnhancementEngineHelper.createEntityEnhancement(ci, this);
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TOPICANNOTATION));
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION, textAnnotation));
// add link to entity
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE, new IRI(topic.conceptUri)));
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
// add confidence information
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(topic.score))));
// add performance estimates of the classifier if available
ClassificationReport perf = getPerformanceEstimates(topic.conceptUri);
if (perf.uptodate) {
metadata.add(new TripleImpl(enhancement, precision, lf.createTypedLiteral(Double.valueOf(perf.precision))));
metadata.add(new TripleImpl(enhancement, recall, lf.createTypedLiteral(Double.valueOf(perf.recall))));
metadata.add(new TripleImpl(enhancement, f1, lf.createTypedLiteral(Double.valueOf(perf.f1))));
}
// fetch concept label from the entityhub or a referenced site if available
Entity entity = entityhub.getEntity(topic.conceptUri);
if (entity == null) {
entity = referencedSiteManager.getEntity(topic.conceptUri);
}
if (entity != null) {
Representation representation = entity.getRepresentation();
// TODO: extract all languages based on some configuration instead of hardcoding English
Text label = representation.getFirst(NamespaceEnum.skos + "prefLabel", "en", "en-US", "en-GB");
if (label == null) {
label = representation.getFirst(NamespaceEnum.rdfs + "label", "en", "en-US", "en-GB");
}
if (label != null) {
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(label.getText())));
}
}
}
} catch (ClassifierException e) {
throw new EngineException(e);
} catch (IllegalArgumentException e) {
throw new EngineException(e);
} catch (EntityhubException e) {
throw new EngineException(e);
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class RepresentationReader method readFrom.
@Override
public Map<String, Representation> readFrom(Class<Map<String, Representation>> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
log.info("Read Representations from Request Data");
long start = System.currentTimeMillis();
//(1) get the charset and the acceptedMediaType
String charset = "UTF-8";
if (mediaType.getParameters().containsKey("charset")) {
charset = mediaType.getParameters().get("charset");
}
MediaType acceptedMediaType = getAcceptedMediaType(httpHeaders);
log.info("readFrom: mediaType {} | accepted {} | charset {}", new Object[] { mediaType, acceptedMediaType, charset });
// (2) read the Content from the request (this needs to deal with
// MediaType.APPLICATION_FORM_URLENCODED_TYPE and
// MediaType.MULTIPART_FORM_DATA_TYPE requests!
RequestData content;
if (mediaType.isCompatible(MediaType.APPLICATION_FORM_URLENCODED_TYPE)) {
try {
content = MessageBodyReaderUtils.formForm(entityStream, charset, "encoding", Arrays.asList("entity", "content"));
} catch (IllegalArgumentException e) {
log.info("Bad Request: {}", e);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(e.toString()).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
if (content.getMediaType() == null) {
String message = String.format("Missing parameter %s used to specify the media type" + "(supported values: %s", "encoding", supportedMediaTypes);
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
if (!isSupported(content.getMediaType())) {
String message = String.format("Unsupported Content-Type specified by parameter " + "encoding=%s (supported: %s)", content.getMediaType().toString(), supportedMediaTypes);
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
} else if (mediaType.isCompatible(MediaType.MULTIPART_FORM_DATA_TYPE)) {
log.info("read from MimeMultipart");
List<RequestData> contents;
try {
contents = MessageBodyReaderUtils.fromMultipart(entityStream, mediaType);
} catch (IllegalArgumentException e) {
log.info("Bad Request: {}", e.toString());
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(e.toString()).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
if (contents.isEmpty()) {
String message = "Request does not contain any Mime BodyParts.";
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
} else if (contents.size() > 1) {
//print warnings about ignored parts
log.warn("{} Request contains more than one Parts: others than " + "the first will be ignored", MediaType.MULTIPART_FORM_DATA_TYPE);
for (int i = 1; i < contents.size(); i++) {
RequestData ignored = contents.get(i);
log.warn(" ignore Content {}: Name {}| MediaType {}", new Object[] { i + 1, ignored.getName(), ignored.getMediaType() });
}
}
content = contents.get(0);
if (content.getMediaType() == null) {
String message = String.format("MediaType not specified for mime body part for file %s. " + "The media type must be one of the supported values: %s", content.getName(), supportedMediaTypes);
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
if (!isSupported(content.getMediaType())) {
String message = String.format("Unsupported Content-Type %s specified for mime body part " + "for file %s (supported: %s)", content.getMediaType(), content.getName(), supportedMediaTypes);
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
} else {
content = new RequestData(mediaType, null, entityStream);
}
long readingCompleted = System.currentTimeMillis();
log.info(" ... reading request data {}ms", readingCompleted - start);
Map<String, Representation> parsed = parseFromContent(content, acceptedMediaType);
long parsingCompleted = System.currentTimeMillis();
log.info(" ... parsing data {}ms", parsingCompleted - readingCompleted);
return parsed;
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class RepresentationReader method parseFromContent.
public Map<String, Representation> parseFromContent(RequestData content, MediaType acceptedMediaType) {
// (3) Parse the Representtion(s) form the entity stream
if (content.getMediaType().isCompatible(MediaType.APPLICATION_JSON_TYPE)) {
//parse from json
throw new UnsupportedOperationException("Parsing of JSON not yet implemented :(");
} else if (isSupported(content.getMediaType())) {
//from RDF serialisation
RdfValueFactory valueFactory = RdfValueFactory.getInstance();
Map<String, Representation> representations = new HashMap<String, Representation>();
Set<BlankNodeOrIRI> processed = new HashSet<BlankNodeOrIRI>();
Graph graph = new IndexedGraph();
try {
parser.parse(graph, content.getEntityStream(), content.getMediaType().toString());
} catch (UnsupportedParsingFormatException e) {
//String acceptedMediaType = httpHeaders.getFirst("Accept");
//throw an internal server Error, because we check in
//isReadable(..) for supported types and still we get here a
//unsupported format -> therefore it looks like an configuration
//error the server (e.g. a missing Bundle with the required bundle)
String message = "Unable to create the Parser for the supported format" + content.getMediaType() + " (" + e + ")";
log.error(message, e);
throw new WebApplicationException(Response.status(Status.INTERNAL_SERVER_ERROR).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
} catch (RuntimeException e) {
//NOTE: Clerezza seams not to provide specific exceptions on
// parsing errors. Hence the catch for all RuntimeException
String message = "Unable to parse the provided RDF data (format: " + content.getMediaType() + ", message: " + e.getMessage() + ")";
log.error(message, e);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
for (Iterator<Triple> st = graph.iterator(); st.hasNext(); ) {
BlankNodeOrIRI resource = st.next().getSubject();
if (resource instanceof IRI && processed.add(resource)) {
//build a new representation
representations.put(((IRI) resource).getUnicodeString(), valueFactory.createRdfRepresentation((IRI) resource, graph));
}
}
return representations;
} else {
//unsupported media type
String message = String.format("Parsed Content-Type '%s' is not one of the supported %s", content.getMediaType(), supportedMediaTypes);
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
}
use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.
the class ResultListWriter method writeTo.
@Override
public void writeTo(QueryResultList<?> list, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
//check for wildcard
if (mediaType.isWildcardType() && mediaType.isWildcardSubtype()) {
mediaType = ModelWriter.DEFAULT_MEDIA_TYPE;
}
String charset = mediaType.getParameters().get("charset");
if (charset == null) {
charset = ModelWriter.DEFAULT_CHARSET;
mediaType = mediaType.withCharset(charset);
httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, mediaType.toString());
}
Class<? extends Representation> nativeType;
if (list.isEmpty()) {
//for empty lists
//the type does not matter
nativeType = null;
} else if (Representation.class.isAssignableFrom(list.getType())) {
nativeType = ((Representation) list.iterator().next()).getClass();
} else if (Entity.class.isAssignableFrom(list.getType())) {
nativeType = ((Entity) list.iterator().next()).getRepresentation().getClass();
} else {
//only a list of string ids
nativeType = null;
}
Iterator<ServiceReference> refs = writerRegistry.getModelWriters(getMatchType(mediaType), nativeType).iterator();
ModelWriter writer = null;
MediaType selectedMediaType = null;
while ((writer == null || selectedMediaType == null) && refs.hasNext()) {
writer = writerRegistry.getService(refs.next());
if (writer != null) {
if (mediaType.isWildcardType() || mediaType.isWildcardSubtype()) {
selectedMediaType = writer.getBestMediaType(mediaType);
} else {
selectedMediaType = mediaType;
}
}
}
selectedMediaType = selectedMediaType.withCharset(charset);
httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, mediaType.toString());
if (writer == null || selectedMediaType == null) {
throw new WebApplicationException("Unable to serialize ResultList with " + list.getType() + " (nativeType: " + nativeType + ") to " + mediaType);
}
log.debug("serialize ResultList of {} with ModelWriter {}", nativeType, writer.getClass().getName());
writer.write(list, entityStream, selectedMediaType);
}
Aggregations