use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ExecutionPlanHelper method getExecutable.
/**
* Evaluates the parsed {@link ImmutableGraph execution plan} and the set of already executed
* {@link ExecutionPlan#EXECUTION_NODE ep:ExecutionNode}s to find the next
* nodes that can be executed.
* @param executionPlan the execution plan
* @param executed the already executed {@link ExecutionPlan#EXECUTION_NODE node}s
* or an empty set to determine the nodes to start the execution.
* @return the set of nodes that can be executed next or an empty set if
* there are no more nodes to execute.
*/
public static Set<BlankNodeOrIRI> getExecutable(Graph executionPlan, Set<BlankNodeOrIRI> executed) {
Set<BlankNodeOrIRI> executeable = new HashSet<BlankNodeOrIRI>();
for (Iterator<Triple> nodes = executionPlan.filter(null, RDF_TYPE, EXECUTION_NODE); nodes.hasNext(); ) {
BlankNodeOrIRI node = nodes.next().getSubject();
if (!executed.contains(node)) {
Iterator<Triple> dependsIt = executionPlan.filter(node, DEPENDS_ON, null);
boolean dependendExecuted = true;
while (dependsIt.hasNext() && dependendExecuted) {
dependendExecuted = executed.contains(dependsIt.next().getObject());
}
if (dependendExecuted) {
executeable.add(node);
}
}
}
return executeable;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ExecutionMetadata method parseFrom.
public static ExecutionMetadata parseFrom(Graph executionMetadata, IRI contentItemUri) {
BlankNodeOrIRI ce = ExecutionMetadataHelper.getChainExecution(executionMetadata, contentItemUri);
ExecutionMetadata em;
if (ce != null) {
em = new ExecutionMetadata(executionMetadata, contentItemUri, ce);
} else {
em = null;
}
return em;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class EnhancementEngineHelper method getLanguageAnnotations.
/**
* Getter for the Resources of fise:TextAnnotations that do have a value
* of the dc:language property. The returned list is sorted by 'fise:confidence'.
* Annotations with missing confidence are ranked last.<p>
* NOTE that the returned list will likely contain annotations for the same language
* if multiple language identification are used in the same {@link Chain}.
* @param graph the graph with the enhancement.
* Typically {@link ContentItem#getMetadata()}
* @return the sorted list of language annotations or an empty list if none.
* @throws IllegalArgumentException if <code>null</code> is parsed as graph
*/
public static List<BlankNodeOrIRI> getLanguageAnnotations(Graph graph) {
if (graph == null) {
throw new IllegalArgumentException("The parsed graph MUST NOT be NULL!");
}
// I do not use SPARQL, because I do not want to instantiate a QueryEngine
final Map<BlankNodeOrIRI, Double> confidences = new HashMap<BlankNodeOrIRI, Double>();
List<BlankNodeOrIRI> langAnnotations = new ArrayList<BlankNodeOrIRI>();
Iterator<Triple> textAnnoataions = graph.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
while (textAnnoataions.hasNext()) {
BlankNodeOrIRI textAnnotation = textAnnoataions.next().getSubject();
String language = getString(graph, textAnnotation, DC_LANGUAGE);
if (language != null) {
Double confidence = null;
try {
confidence = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
} catch (InvalidLiteralTypeException e) {
// STANBOL-1417: not a double value
try {
//try with float
Float fconf = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Float.class, lf);
if (fconf != null) {
confidence = Double.valueOf(fconf.doubleValue());
}
} catch (InvalidLiteralTypeException e1) {
log.warn("Unable to parse confidence for language annotation " + textAnnotation, e);
}
}
confidences.put(textAnnotation, confidence);
langAnnotations.add(textAnnotation);
}
}
if (langAnnotations.size() > 1) {
Collections.sort(langAnnotations, new Comparator<BlankNodeOrIRI>() {
@Override
public int compare(BlankNodeOrIRI o1, BlankNodeOrIRI o2) {
Double c1 = confidences.get(o1);
Double c2 = confidences.get(o2);
//decrising order (values without confidence last)
if (c1 == null) {
return c2 == null ? 0 : 1;
} else if (c2 == null) {
return -1;
} else {
return c2.compareTo(c1);
}
}
});
}
return langAnnotations;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ExecutionPlanHelper method calculateExecutionPlan.
/**
* Creates an execution plan based on the
* {@link ServiceProperties#ENHANCEMENT_ENGINE_ORDERING} of the parsed
* EnhancementEngines. NOTE that the parsed list is modified as it is sorted by
* using the {@link EnhancementEngineHelper#EXECUTION_ORDER_COMPARATOR}.<p>
* A second parameter with the set of optional engines can be used to define
* what {@link ExecutionPlan#EXECUTION_NODE} in the execution plan should be
* marked as {@link ExecutionPlan#OPTIONAL}.
* @param chainName the name of the Chain to build the execution plan for
* @param availableEngines the list of engines
* @param optional the names of optional engines.
* @param missing the names of missing engines
* @param enhProps chain scoped enhancement properties. The key of the outer
* map are the name of the engine or <code>null</code> for the chain. The
* inner map uses the property as key and the value(s) as value. Multiple
* values can be parsed as {@link Collection}. Single values will be
* converted to RDF {@link TypedLiteral}s by using the {@link LiteralFactory}.
* For types not supported by the LiteralFactory the <code>toString()</code>
* method will be used. <code>null</code> can be parsed if no enhancement
* properties are present.
* @return the execution plan
* @since 0.12.1
*/
public static ImmutableGraph calculateExecutionPlan(String chainName, List<EnhancementEngine> availableEngines, Set<String> optional, Set<String> missing, Map<String, Map<String, Object>> enhProps) {
if (chainName == null || chainName.isEmpty()) {
throw new IllegalArgumentException("The parsed ChainName MUST NOT be empty!");
}
Collections.sort(availableEngines, EXECUTION_ORDER_COMPARATOR);
//now we have all required and possible also optional engines
// -> build the execution plan
Graph ep = new IndexedGraph();
BlankNodeOrIRI epNode = createExecutionPlan(ep, chainName, enhProps != null ? enhProps.get(null) : null);
Integer prevOrder = null;
Set<BlankNodeOrIRI> prev = null;
Set<BlankNodeOrIRI> current = new HashSet<BlankNodeOrIRI>();
for (String name : missing) {
boolean optionalMissing = optional.contains(name);
BlankNodeOrIRI node = writeExecutionNode(ep, epNode, name, optionalMissing, null, enhProps == null ? null : enhProps.get(name));
if (!optionalMissing) {
current.add(node);
}
// else add missing optional engines without any dependsOn restrictions
}
for (EnhancementEngine engine : availableEngines) {
String name = engine.getName();
Integer order = getEngineOrder(engine);
if (prevOrder == null || !prevOrder.equals(order)) {
prev = current;
current = new HashSet<BlankNodeOrIRI>();
prevOrder = order;
}
try {
BlankNodeOrIRI executionNode = writeExecutionNode(ep, epNode, name, optional.contains(name), prev, enhProps == null ? null : enhProps.get(name));
current.add(executionNode);
} catch (RuntimeException e) {
//add the engine and class to ease debugging in such cases
log.error("Exception while writing ExecutionNode for Enhancement Eninge: " + engine + "(class: " + engine.getClass() + ")", e);
//rethrow it
throw e;
}
}
return ep.getImmutableGraph();
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class EntityDereferenceEngine method computeEnhancements.
@Override
public final void computeEnhancements(ContentItem ci) throws EngineException {
if (offline && !dereferencer.supportsOfflineMode()) {
//entity dereferencer does no longer support offline mode
return;
}
log.debug("> dereference Entities for ContentItem {}", ci.getUri());
long start = System.nanoTime();
Map<String, Object> enhancemntProps = EnhancementEngineHelper.getEnhancementProperties(this, ci);
final DereferenceContext derefContext;
final Graph metadata = ci.getMetadata();
Set<IRI> referencedEntities = new HashSet<IRI>();
ci.getLock().readLock().lock();
try {
//(1) Create the DereferenceContext
if (filterContentLanguages) {
//parse the languages detected for the content
Set<String> contentLanguages = new HashSet<String>();
for (BlankNodeOrIRI langAnno : EnhancementEngineHelper.getLanguageAnnotations(metadata)) {
contentLanguages.add(EnhancementEngineHelper.getString(metadata, langAnno, DC_LANGUAGE));
}
enhancemntProps.put(DereferenceContext.INTERNAL_CONTENT_LANGUAGES, contentLanguages);
}
//create the dereference context and handle possible configuration exceptions
try {
derefContext = contextFactory.createContext(this, enhancemntProps);
derefContext.setOfflineMode(offline);
} catch (DereferenceConfigurationException e) {
StringBuilder message = new StringBuilder("Unsupported Derefernece Configuarion ");
if (e.getProperty() != null) {
message.append("for property '").append(e.getProperty()).append("' ");
}
message.append(" parsed via the EnhancementProperties of this request!");
throw new EnhancementPropertyException(this, ci, e.getProperty(), message.toString(), e);
}
//parse the referenced entities from the graph
//(2) read all Entities we need to dereference from the parsed contentItem
Set<IRI> checked = new HashSet<IRI>();
//since STANBOL-1334 the list of properties that refer to entities can be configured
for (IRI referenceProperty : derefContext.getEntityReferences()) {
Iterator<Triple> entityReferences = metadata.filter(null, referenceProperty, null);
while (entityReferences.hasNext()) {
Triple triple = entityReferences.next();
RDFTerm entityReference = triple.getObject();
if (//only URIs
(entityReference instanceof IRI) && //do not check a URI twice
checked.add((IRI) entityReference) && //fallback mode
chekcFallbackMode((IRI) entityReference, metadata) && checkURI((IRI) entityReference)) {
//URI prefixes and patterns
boolean added = referencedEntities.add((IRI) entityReference);
if (added && log.isTraceEnabled()) {
log.trace(" ... schedule Entity {} (referenced-by: {})", entityReference, referenceProperty);
}
} else if (log.isTraceEnabled()) {
log.trace(" ... ignore Entity {} (referenced-by: {})", entityReference, referenceProperty);
}
}
}
} finally {
ci.getLock().readLock().unlock();
}
long schedule = System.nanoTime();
final Lock writeLock = ci.getLock().writeLock();
log.trace(" - scheduled {} Entities for dereferencing", referencedEntities.size());
//(2) dereference the Entities
ExecutorService executor = dereferencer.getExecutor();
Set<IRI> failedEntities = new HashSet<IRI>();
int dereferencedCount = 0;
List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(referencedEntities.size());
if (executor != null && !executor.isShutdown()) {
//schedule all entities to dereference
for (final IRI entity : referencedEntities) {
DereferenceJob dereferenceJob = new DereferenceJob(entity, metadata, writeLock, derefContext);
dereferenceJob.setFuture(executor.submit(dereferenceJob));
dereferenceJobs.add(dereferenceJob);
}
//wait for all entities to be dereferenced
for (DereferenceJob dereferenceJob : dereferenceJobs) {
try {
if (dereferenceJob.await()) {
dereferencedCount++;
}
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
throw new EngineException(this, ci, "Interupted while waiting for dereferencing Entities", e);
} catch (ExecutionException e) {
if (e.getCause() instanceof DereferenceException) {
failedEntities.add(dereferenceJob.entity);
log.debug(" ... error while dereferencing " + dereferenceJob.entity + "!", e);
} else {
//unknown error
throw new EngineException(this, ci, "Unchecked Error while " + "dereferencing Entity " + dereferenceJob.entity + "!", e);
}
}
}
} else {
//dereference using the current thread
for (IRI entity : referencedEntities) {
try {
log.trace(" ... dereference {}", entity);
if (dereferencer.dereference(entity, metadata, writeLock, derefContext)) {
dereferencedCount++;
log.trace(" + success");
} else {
log.trace(" - not found");
}
} catch (DereferenceException e) {
log.debug(" ... error while dereferencing " + entity + "!", e);
failedEntities.add(entity);
}
}
}
long end = System.nanoTime();
float sheduleDuration = ((schedule - start) / 10000) / 100f;
float dereferenceDuration = ((end - schedule) / 10000) / 100f;
float duration = ((end - start) / 10000) / 100f;
if (!failedEntities.isEmpty()) {
log.warn(" - unable to dereference {} of {} for ContentItem {}", new Object[] { failedEntities.size(), referencedEntities.size(), ci.getUri() });
}
if (log.isDebugEnabled() && dereferencedCount > 0) {
log.debug(" - dereferenced {} of {} Entities in {}ms | schedule:{}ms | " + " dereference: {}ms ({}ms/entity)", new Object[] { dereferencedCount, referencedEntities.size(), duration, sheduleDuration, dereferenceDuration, dereferenceDuration / dereferencedCount });
}
}
Aggregations