use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class ContentItemResource method initOccurrences.
private void initOccurrences() {
Graph graph = contentItem.getMetadata();
LiteralFactory lf = LiteralFactory.getInstance();
Map<IRI, Collection<BlankNodeOrIRI>> suggestionMap = new HashMap<IRI, Collection<BlankNodeOrIRI>>();
// 1) get Entity Annotations
Map<BlankNodeOrIRI, Map<EAProps, Object>> entitySuggestionMap = new HashMap<BlankNodeOrIRI, Map<EAProps, Object>>();
Iterator<Triple> entityAnnotations = graph.filter(null, RDF.type, ENHANCER_ENTITYANNOTATION);
while (entityAnnotations.hasNext()) {
BlankNodeOrIRI entityAnnotation = entityAnnotations.next().getSubject();
// to avoid multiple lookups (e.g. if one entityAnnotation links to+
// several TextAnnotations) we cache the data in an intermediate Map
Map<EAProps, Object> eaData = new EnumMap<EAProps, Object>(EAProps.class);
eaData.put(EAProps.entity, getReference(graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE));
eaData.put(EAProps.label, getString(graph, entityAnnotation, ENHANCER_ENTITY_LABEL));
eaData.put(EAProps.confidence, EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf));
entitySuggestionMap.put(entityAnnotation, eaData);
Iterator<IRI> textAnnotations = getReferences(graph, entityAnnotation, DC_RELATION);
while (textAnnotations.hasNext()) {
IRI textAnnotation = textAnnotations.next();
Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
if (suggestions == null) {
suggestions = new ArrayList<BlankNodeOrIRI>();
suggestionMap.put(textAnnotation, suggestions);
}
suggestions.add(entityAnnotation);
}
}
// 2) get the TextAnnotations
Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION);
while (textAnnotations.hasNext()) {
BlankNodeOrIRI textAnnotation = textAnnotations.next().getSubject();
// we need to process those to show multiple mentions
// if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
// // this is not the most specific occurrence of this name: skip
// continue;
// }
String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT);
// TextAnnotations without fise:selected-text are no longer ignored
// if(text == null){
// //ignore text annotations without text
// continue;
// }
Integer start = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_START, Integer.class, lf);
Integer end = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_END, Integer.class, lf);
Double confidence = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
Iterator<IRI> types = getReferences(graph, textAnnotation, DC_TYPE);
if (!types.hasNext()) {
// create an iterator over null in case no types are present
types = Collections.singleton((IRI) null).iterator();
}
while (types.hasNext()) {
IRI type = types.next();
Map<EntityExtractionSummary, EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
if (occurrenceMap == null) {
occurrenceMap = new TreeMap<EntityExtractionSummary, EntityExtractionSummary>();
extractionsByTypeMap.put(type, occurrenceMap);
}
// in case of a language annotation use the detected language as label
if (DC_LINGUISTIC_SYSTEM.equals(type)) {
text = EnhancementEngineHelper.getString(graph, textAnnotation, DC_LANGUAGE);
}
EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start, end, confidence, defaultThumbnails);
Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
if (suggestions != null) {
for (BlankNodeOrIRI entityAnnotation : suggestions) {
Map<EAProps, Object> eaData = entitySuggestionMap.get(entityAnnotation);
entity.addSuggestion((IRI) eaData.get(EAProps.entity), (String) eaData.get(EAProps.label), (Double) eaData.get(EAProps.confidence), graph);
}
}
EntityExtractionSummary existingSummary = occurrenceMap.get(entity);
if (existingSummary == null) {
// new extraction summary
occurrenceMap.put(entity, entity);
} else {
// extraction summary with this text and suggestions already
// present ... only add a mention to the existing
existingSummary.addMention(new Mention(text, start, end, confidence));
}
}
}
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EnhancementEngineHelper method extractEnhancementProperties.
/**
* Extracts all EnhancementProperties from the parsed Node and adds them to
* the parsed map
* @param properties The Map to add the extracted properties. extracted values
* are appended to existing values.
* @param graph the RDF graph containing the data
* @param node the node to extract the properties from
* @param level the name of the level (only used for logging)
*/
private static void extractEnhancementProperties(Map<String, Object> properties, Graph graph, BlankNodeOrIRI node, String level) {
log.debug(" - extract {} properties from {}", level, node);
Iterator<Triple> props = graph.filter(node, null, null);
while (props.hasNext()) {
Triple t = props.next();
String propUri = t.getPredicate().getUnicodeString();
if (propUri.startsWith(EHPROP_NS)) {
String prop = propUri.substring(EHPROP_NS_LENGTH);
RDFTerm resource = t.getObject();
Object value = extractEnhancementPropertyValue(resource);
if (value != null && !prop.isEmpty()) {
Object current = properties.get(prop);
if (log.isDebugEnabled()) {
if (current != null) {
log.debug(" ... append {} property '{}' to {}='{}'", new Object[] { level, value, prop, current });
} else {
log.debug(" ... add {} property {}='{}'", new Object[] { level, prop, value });
}
}
if (current instanceof Collection<?>) {
((Collection) current).add(value);
} else if (current != null) {
Collection<Object> col = new ArrayList<Object>(4);
col.add(current);
col.add(value);
properties.put(prop, col);
} else {
properties.put(prop, value);
}
}
}
}
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EnhancementEngineHelper method set.
/**
* Replaces all current values of the property for the resource
* with the parsed values
* @param graph the graph
* @param resource the resource
* @param property the property
* @param value the value. In case it is an instance of {@link RDFTerm} it
* is directly added to the graph. Otherwise the parsed {@link LiteralFactory}
* is used to create a {@link TypedLiteral} for the parsed value.
* @param literalFactory the {@link LiteralFactory} used in case the parsed
* value is not an {@link RDFTerm}
*/
public static void set(Graph graph, BlankNodeOrIRI resource, IRI property, Collection<?> values, LiteralFactory literalFactory) {
Iterator<Triple> currentValues = graph.filter(resource, property, null);
while (currentValues.hasNext()) {
currentValues.next();
currentValues.remove();
}
if (values != null) {
for (Object value : values) {
if (value instanceof RDFTerm) {
graph.add(new TripleImpl(resource, property, (RDFTerm) value));
} else if (value != null) {
graph.add(new TripleImpl(resource, property, literalFactory.createTypedLiteral(value)));
}
}
}
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EnhancementEngineHelper method getLanguageAnnotations.
/**
* Getter for the Resources of fise:TextAnnotations that do have a value
* of the dc:language property. The returned list is sorted by 'fise:confidence'.
* Annotations with missing confidence are ranked last.<p>
* NOTE that the returned list will likely contain annotations for the same language
* if multiple language identification are used in the same {@link Chain}.
* @param graph the graph with the enhancement.
* Typically {@link ContentItem#getMetadata()}
* @return the sorted list of language annotations or an empty list if none.
* @throws IllegalArgumentException if <code>null</code> is parsed as graph
*/
public static List<BlankNodeOrIRI> getLanguageAnnotations(Graph graph) {
if (graph == null) {
throw new IllegalArgumentException("The parsed graph MUST NOT be NULL!");
}
// I do not use SPARQL, because I do not want to instantiate a QueryEngine
final Map<BlankNodeOrIRI, Double> confidences = new HashMap<BlankNodeOrIRI, Double>();
List<BlankNodeOrIRI> langAnnotations = new ArrayList<BlankNodeOrIRI>();
Iterator<Triple> textAnnoataions = graph.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
while (textAnnoataions.hasNext()) {
BlankNodeOrIRI textAnnotation = textAnnoataions.next().getSubject();
String language = getString(graph, textAnnotation, DC_LANGUAGE);
if (language != null) {
Double confidence = null;
try {
confidence = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
} catch (InvalidLiteralTypeException e) {
// STANBOL-1417: not a double value
try {
// try with float
Float fconf = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Float.class, lf);
if (fconf != null) {
confidence = Double.valueOf(fconf.doubleValue());
}
} catch (InvalidLiteralTypeException e1) {
log.warn("Unable to parse confidence for language annotation " + textAnnotation, e);
}
}
confidences.put(textAnnotation, confidence);
langAnnotations.add(textAnnotation);
}
}
if (langAnnotations.size() > 1) {
Collections.sort(langAnnotations, new Comparator<BlankNodeOrIRI>() {
@Override
public int compare(BlankNodeOrIRI o1, BlankNodeOrIRI o2) {
Double c1 = confidences.get(o1);
Double c2 = confidences.get(o2);
// decrising order (values without confidence last)
if (c1 == null) {
return c2 == null ? 0 : 1;
} else if (c2 == null) {
return -1;
} else {
return c2.compareTo(c1);
}
}
});
}
return langAnnotations;
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class ExecutionPlanHelper method getExecutable.
/**
* Evaluates the parsed {@link ImmutableGraph execution plan} and the set of already executed
* {@link ExecutionPlan#EXECUTION_NODE ep:ExecutionNode}s to find the next
* nodes that can be executed.
* @param executionPlan the execution plan
* @param executed the already executed {@link ExecutionPlan#EXECUTION_NODE node}s
* or an empty set to determine the nodes to start the execution.
* @return the set of nodes that can be executed next or an empty set if
* there are no more nodes to execute.
*/
public static Set<BlankNodeOrIRI> getExecutable(Graph executionPlan, Set<BlankNodeOrIRI> executed) {
Set<BlankNodeOrIRI> executeable = new HashSet<BlankNodeOrIRI>();
for (Iterator<Triple> nodes = executionPlan.filter(null, RDF_TYPE, EXECUTION_NODE); nodes.hasNext(); ) {
BlankNodeOrIRI node = nodes.next().getSubject();
if (!executed.contains(node)) {
Iterator<Triple> dependsIt = executionPlan.filter(node, DEPENDS_ON, null);
boolean dependendExecuted = true;
while (dependsIt.hasNext() && dependendExecuted) {
dependendExecuted = executed.contains(dependsIt.next().getObject());
}
if (dependendExecuted) {
executeable.add(node);
}
}
}
return executeable;
}
Aggregations