use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.
the class TopicClassificationEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (!(acceptedLanguageSet.isEmpty() || acceptedLanguageSet.contains(language) || acceptedLanguageSet.contains(""))) {
throw new IllegalStateException("The language '" + language + "' of the ContentItem is not configured as " + " active for this Engine (active: " + acceptedLanguageSet + ").");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(String.format("Unable to extract " + " textual content from ContentPart %s of ContentItem %s!", contentPart.getKey(), ci.getUri()), e);
}
if (text.trim().isEmpty()) {
log.warn("ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from", contentPart.getKey(), ci.getUri());
return;
}
Graph metadata = ci.getMetadata();
List<TopicSuggestion> topics;
try {
topics = suggestTopics(text);
if (topics.isEmpty()) {
return;
}
} catch (ClassifierException e) {
throw new EngineException(e);
}
IRI precision = new IRI(NamespaceEnum.fise + "classifier/precision");
IRI recall = new IRI(NamespaceEnum.fise + "classifier/recall");
IRI f1 = new IRI(NamespaceEnum.fise + "classifier/f1");
LiteralFactory lf = LiteralFactory.getInstance();
ci.getLock().writeLock().lock();
try {
// Global text annotation to attach all the topic annotation to it.
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textAnnotation, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
for (TopicSuggestion topic : topics) {
IRI enhancement = EnhancementEngineHelper.createEntityEnhancement(ci, this);
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TOPICANNOTATION));
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION, textAnnotation));
// add link to entity
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE, new IRI(topic.conceptUri)));
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
// add confidence information
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(topic.score))));
// add performance estimates of the classifier if available
ClassificationReport perf = getPerformanceEstimates(topic.conceptUri);
if (perf.uptodate) {
metadata.add(new TripleImpl(enhancement, precision, lf.createTypedLiteral(Double.valueOf(perf.precision))));
metadata.add(new TripleImpl(enhancement, recall, lf.createTypedLiteral(Double.valueOf(perf.recall))));
metadata.add(new TripleImpl(enhancement, f1, lf.createTypedLiteral(Double.valueOf(perf.f1))));
}
// fetch concept label from the entityhub or a referenced site if available
Entity entity = entityhub.getEntity(topic.conceptUri);
if (entity == null) {
entity = referencedSiteManager.getEntity(topic.conceptUri);
}
if (entity != null) {
Representation representation = entity.getRepresentation();
// TODO: extract all languages based on some configuration instead of hardcoding English
Text label = representation.getFirst(NamespaceEnum.skos + "prefLabel", "en", "en-US", "en-GB");
if (label == null) {
label = representation.getFirst(NamespaceEnum.rdfs + "label", "en", "en-US", "en-GB");
}
if (label != null) {
metadata.add(new TripleImpl(enhancement, org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(label.getText())));
}
}
}
} catch (ClassifierException e) {
throw new EngineException(e);
} catch (IllegalArgumentException e) {
throw new EngineException(e);
} catch (EntityhubException e) {
throw new EngineException(e);
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.
the class AbstractBackend method listSubjects.
@Override
public Collection<Object> listSubjects(Object property, Object object) {
FieldQuery query = createQuery();
if (this.isURI(object)) {
query.setConstraint(property.toString(), new ReferenceConstraint(object.toString()));
} else if (object instanceof Text) {
Text text = (Text) object;
TextConstraint constraint;
if (text.getLanguage() == null) {
constraint = new TextConstraint(text.getText(), PatternType.none, true);
} else {
constraint = new TextConstraint(text.getText(), PatternType.none, true, text.getLanguage());
}
query.setConstraint(property.toString(), constraint);
} else {
Set<DataTypeEnum> dataTypes = DataTypeEnum.getPrimaryDataTypes(object.getClass());
if (dataTypes == null || dataTypes.isEmpty()) {
query.setConstraint(property.toString(), new ValueConstraint(object));
} else {
Collection<String> types = new ArrayList<String>(dataTypes.size());
for (DataTypeEnum type : dataTypes) {
types.add(type.getUri());
}
query.setConstraint(property.toString(), new ValueConstraint(object, types));
}
}
query.setLimit(Integer.valueOf(DEFAULT_MAX_SELECT));
QueryResultList<String> results;
try {
results = query(query);
} catch (EntityhubException e) {
throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
}
Collection<Object> references;
if (results.isEmpty()) {
references = Collections.emptySet();
} else if (results.size() == 1) {
//assuming that a single result is a likely case
references = Collections.singleton((Object) getValueFactory().createReference(results.iterator().next()));
} else {
int offset = 0;
references = new HashSet<Object>(results.size());
for (String result : results) {
references.add(getValueFactory().createReference(result));
}
while (results.size() >= DEFAULT_MAX_SELECT && references.size() <= DEFAULT_MAX_RESULTS - DEFAULT_MAX_SELECT) {
offset = offset + results.size();
query.setOffset(offset);
try {
results = query(query);
} catch (EntityhubException e) {
throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
}
for (String result : results) {
references.add(getValueFactory().createReference(result));
}
}
}
return references;
}
use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.
the class TrackingDereferencerBase method dereference.
@Override
public final boolean dereference(IRI uri, Graph graph, Lock writeLock, DereferenceContext dc) throws DereferenceException {
T service = getService();
if (service == null) {
throw new DereferenceException(uri, serviceClass.getClass().getSimpleName() + "service is currently not available");
}
EntityhubDereferenceContext derefContext = (EntityhubDereferenceContext) dc;
Representation rep;
try {
rep = getRepresentation(service, uri.getUnicodeString(), derefContext.isOfflineMode());
} catch (EntityhubException e) {
throw new DereferenceException(uri, e);
}
//we need the languages as strings
final Set<String> langs = derefContext.getLanguages();
final FieldMapper fieldMapper = derefContext.getFieldMapper();
final Program<Object> ldpathProgram = derefContext.getProgram();
if (rep != null) {
if (fieldMapper == null && ldpathProgram == null && (langs == null || langs.isEmpty())) {
copyAll(uri, rep, graph, writeLock);
} else {
//we need to apply some filters while dereferencing
if (fieldMapper != null || (langs != null && !langs.isEmpty())) {
//this considers speficied fields and included languages
copyMapped(uri, rep, fieldMapper, langs, graph, writeLock);
}
if (ldpathProgram != null) {
//this executes LDPath statements
copyLdPath(uri, getRdfBackend(service), ldpathProgram, langs, graph, writeLock);
}
}
return true;
} else {
return false;
}
}
use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.
the class TrackingDereferencerBase method copyLdPath.
/**
* Executes the {@link #ldpathProgram} using the parsed URI as context and
* writes the the results to the parsed ImmutableGraph
* @param uri the context
* @param rdfBackend the RdfBackend the LDPath program is executed on
* @param ldpathProgram The {@link Program} parsed via the dereference context
* @param langs the set of languages to dereference
* @param graph the graph to store the results
* @param writeLock the write lock for the graph
* @throws DereferenceException on any {@link EntityhubException} while
* executing the LDPath program
*/
private void copyLdPath(IRI uri, RDFBackend<Object> rdfBackend, Program<Object> ldpathProgram, Set<String> langs, Graph graph, Lock writeLock) throws DereferenceException {
//A RdfReference needs to be used as context
RdfReference context = valueFactory.createReference(uri);
//create the representation that stores results in an intermediate
//graph (we do not want partial results on an error
Graph ldPathResults = new SimpleGraph();
RdfRepresentation result = valueFactory.createRdfRepresentation(uri, ldPathResults);
//execute the LDPath Program and write results to the RDF ImmutableGraph
try {
for (org.apache.marmotta.ldpath.model.fields.FieldMapping<?, Object> mapping : ldpathProgram.getFields()) {
Collection<?> values;
try {
values = mapping.getValues(rdfBackend, context);
} catch (RuntimeException e) {
throw new DereferenceException(uri, e);
}
if (values != null && !values.isEmpty()) {
String fieldName = mapping.getFieldName();
if (langs == null || langs.isEmpty()) {
result.add(fieldName, values);
} else {
//filter for languages
for (Object value : values) {
if ((!(value instanceof Text)) || langs.contains(((Text) value).getLanguage())) {
result.add(fieldName, value);
}
//else text with filtered language ... do not add
}
}
}
}
} catch (EntityhubException e) {
throw new DereferenceException(uri, e);
}
if (log.isTraceEnabled()) {
log.trace("dereferenced via LDPath {}", ModelUtils.getRepresentationInfo(result));
}
if (!ldPathResults.isEmpty()) {
//copy the results
writeLock.lock();
try {
graph.addAll(ldPathResults);
} finally {
writeLock.unlock();
}
}
}
use of org.apache.stanbol.entityhub.servicesapi.EntityhubException in project stanbol by apache.
the class EntityhubRootResource method getMapping.
@GET
@Path("mapping")
@Produces({ APPLICATION_JSON, RDF_XML, N3, TURTLE, X_TURTLE, RDF_JSON, N_TRIPLE, TEXT_HTML })
public Response getMapping(@QueryParam("id") String reference, @Context HttpHeaders headers) throws WebApplicationException {
log.debug("get mapping for request > id : {} > accept: {}", reference, headers.getAcceptableMediaTypes());
Set<String> supported = new HashSet<String>(JerseyUtils.REPRESENTATION_SUPPORTED_MEDIA_TYPES);
supported.add(TEXT_HTML);
MediaType acceptedMediaType = getAcceptableMediaType(headers, supported, APPLICATION_JSON_TYPE);
if (reference == null || reference.isEmpty()) {
//if HTML -> print the docu of the restfull service
if (TEXT_HTML_TYPE.isCompatible(acceptedMediaType)) {
ResponseBuilder rb = Response.ok(new Viewable("mapping", this));
rb.header(HttpHeaders.CONTENT_TYPE, TEXT_HTML + "; charset=utf-8");
//addCORSOrigin(servletContext, rb, headers);
} else {
return Response.status(Status.BAD_REQUEST).entity("The mapping id (URI) is missing.\n").header(HttpHeaders.ACCEPT, acceptedMediaType).build();
}
}
//Entityhub entityhub = ContextHelper.getServiceFromContext(Entityhub.class, servletContext);
Entity mapping;
try {
mapping = entityhub.getMappingById(reference);
} catch (EntityhubException e) {
log.error("error while getting the mapping for {}", reference, e);
return Response.status(Status.INTERNAL_SERVER_ERROR).build();
}
if (mapping == null) {
return Response.status(Status.NOT_FOUND).entity("No mapping found for '" + reference + "'.\n").header(HttpHeaders.ACCEPT, acceptedMediaType).build();
} else {
ResponseBuilder rb = Response.ok(mapping);
rb.header(HttpHeaders.CONTENT_TYPE, acceptedMediaType + "; charset=utf-8");
//addCORSOrigin(servletContext, rb, headers);
return rb.build();
}
}
Aggregations