Search in sources :

Example 81 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class ContentItemResource method initOccurrences.

private void initOccurrences() {
    Graph graph = contentItem.getMetadata();
    LiteralFactory lf = LiteralFactory.getInstance();
    Map<IRI, Collection<BlankNodeOrIRI>> suggestionMap = new HashMap<IRI, Collection<BlankNodeOrIRI>>();
    // 1) get Entity Annotations
    Map<BlankNodeOrIRI, Map<EAProps, Object>> entitySuggestionMap = new HashMap<BlankNodeOrIRI, Map<EAProps, Object>>();
    Iterator<Triple> entityAnnotations = graph.filter(null, RDF.type, ENHANCER_ENTITYANNOTATION);
    while (entityAnnotations.hasNext()) {
        BlankNodeOrIRI entityAnnotation = entityAnnotations.next().getSubject();
        // to avoid multiple lookups (e.g. if one entityAnnotation links to+
        // several TextAnnotations) we cache the data in an intermediate Map
        Map<EAProps, Object> eaData = new EnumMap<EAProps, Object>(EAProps.class);
        eaData.put(EAProps.entity, getReference(graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE));
        eaData.put(EAProps.label, getString(graph, entityAnnotation, ENHANCER_ENTITY_LABEL));
        eaData.put(EAProps.confidence, EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf));
        entitySuggestionMap.put(entityAnnotation, eaData);
        Iterator<IRI> textAnnotations = getReferences(graph, entityAnnotation, DC_RELATION);
        while (textAnnotations.hasNext()) {
            IRI textAnnotation = textAnnotations.next();
            Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
            if (suggestions == null) {
                suggestions = new ArrayList<BlankNodeOrIRI>();
                suggestionMap.put(textAnnotation, suggestions);
            }
            suggestions.add(entityAnnotation);
        }
    }
    // 2) get the TextAnnotations
    Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION);
    while (textAnnotations.hasNext()) {
        BlankNodeOrIRI textAnnotation = textAnnotations.next().getSubject();
        // we need to process those to show multiple mentions
        // if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
        // // this is not the most specific occurrence of this name: skip
        // continue;
        // }
        String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT);
        // TextAnnotations without fise:selected-text are no longer ignored
        // if(text == null){
        // //ignore text annotations without text
        // continue;
        // }
        Integer start = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_START, Integer.class, lf);
        Integer end = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_END, Integer.class, lf);
        Double confidence = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
        Iterator<IRI> types = getReferences(graph, textAnnotation, DC_TYPE);
        if (!types.hasNext()) {
            // create an iterator over null in case no types are present
            types = Collections.singleton((IRI) null).iterator();
        }
        while (types.hasNext()) {
            IRI type = types.next();
            Map<EntityExtractionSummary, EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
            if (occurrenceMap == null) {
                occurrenceMap = new TreeMap<EntityExtractionSummary, EntityExtractionSummary>();
                extractionsByTypeMap.put(type, occurrenceMap);
            }
            // in case of a language annotation use the detected language as label
            if (DC_LINGUISTIC_SYSTEM.equals(type)) {
                text = EnhancementEngineHelper.getString(graph, textAnnotation, DC_LANGUAGE);
            }
            EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start, end, confidence, defaultThumbnails);
            Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
            if (suggestions != null) {
                for (BlankNodeOrIRI entityAnnotation : suggestions) {
                    Map<EAProps, Object> eaData = entitySuggestionMap.get(entityAnnotation);
                    entity.addSuggestion((IRI) eaData.get(EAProps.entity), (String) eaData.get(EAProps.label), (Double) eaData.get(EAProps.confidence), graph);
                }
            }
            EntityExtractionSummary existingSummary = occurrenceMap.get(entity);
            if (existingSummary == null) {
                // new extraction summary
                occurrenceMap.put(entity, entity);
            } else {
                // extraction summary with this text and suggestions already
                // present ... only add a mention to the existing
                existingSummary.addMention(new Mention(text, start, end, confidence));
            }
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) EnhancementEngineHelper.getString(org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString) EnumMap(java.util.EnumMap) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Triple(org.apache.clerezza.commons.rdf.Triple) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Collection(java.util.Collection) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 82 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class EnhancementEngineHelper method extractEnhancementProperties.

/**
 * Extracts all EnhancementProperties from the parsed Node and adds them to
 * the parsed map
 * @param properties The Map to add the extracted properties. extracted values
 * are appended to existing values.
 * @param graph the RDF graph containing the data
 * @param node the node to extract the properties from
 * @param level the name of the level (only used for logging)
 */
private static void extractEnhancementProperties(Map<String, Object> properties, Graph graph, BlankNodeOrIRI node, String level) {
    log.debug(" - extract {} properties from {}", level, node);
    Iterator<Triple> props = graph.filter(node, null, null);
    while (props.hasNext()) {
        Triple t = props.next();
        String propUri = t.getPredicate().getUnicodeString();
        if (propUri.startsWith(EHPROP_NS)) {
            String prop = propUri.substring(EHPROP_NS_LENGTH);
            RDFTerm resource = t.getObject();
            Object value = extractEnhancementPropertyValue(resource);
            if (value != null && !prop.isEmpty()) {
                Object current = properties.get(prop);
                if (log.isDebugEnabled()) {
                    if (current != null) {
                        log.debug(" ... append {} property '{}' to {}='{}'", new Object[] { level, value, prop, current });
                    } else {
                        log.debug(" ... add {} property {}='{}'", new Object[] { level, prop, value });
                    }
                }
                if (current instanceof Collection<?>) {
                    ((Collection) current).add(value);
                } else if (current != null) {
                    Collection<Object> col = new ArrayList<Object>(4);
                    col.add(current);
                    col.add(value);
                    properties.put(prop, col);
                } else {
                    properties.put(prop, value);
                }
            }
        }
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Collection(java.util.Collection) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 83 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class EnhancementEngineHelper method set.

/**
 * Replaces all current values of the property for the resource
 * with the parsed values
 * @param graph the graph
 * @param resource the resource
 * @param property the property
 * @param value the value. In case it is an instance of {@link RDFTerm} it
 * is directly added to the graph. Otherwise the parsed {@link LiteralFactory}
 * is used to create a {@link TypedLiteral} for the parsed value.
 * @param literalFactory the {@link LiteralFactory} used in case the parsed
 * value is not an {@link RDFTerm}
 */
public static void set(Graph graph, BlankNodeOrIRI resource, IRI property, Collection<?> values, LiteralFactory literalFactory) {
    Iterator<Triple> currentValues = graph.filter(resource, property, null);
    while (currentValues.hasNext()) {
        currentValues.next();
        currentValues.remove();
    }
    if (values != null) {
        for (Object value : values) {
            if (value instanceof RDFTerm) {
                graph.add(new TripleImpl(resource, property, (RDFTerm) value));
            } else if (value != null) {
                graph.add(new TripleImpl(resource, property, literalFactory.createTypedLiteral(value)));
            }
        }
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 84 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class EnhancementEngineHelper method getLanguageAnnotations.

/**
 * Getter for the Resources of fise:TextAnnotations that do have a value
 * of the dc:language property. The returned list is sorted by 'fise:confidence'.
 * Annotations with missing confidence are ranked last.<p>
 * NOTE that the returned list will likely contain annotations for the same language
 * if multiple language identification are used in the same {@link Chain}.
 * @param graph the graph with the enhancement.
 * Typically {@link ContentItem#getMetadata()}
 * @return the sorted list of language annotations or an empty list if none.
 * @throws IllegalArgumentException if <code>null</code> is parsed as graph
 */
public static List<BlankNodeOrIRI> getLanguageAnnotations(Graph graph) {
    if (graph == null) {
        throw new IllegalArgumentException("The parsed graph MUST NOT be NULL!");
    }
    // I do not use SPARQL, because I do not want to instantiate a QueryEngine
    final Map<BlankNodeOrIRI, Double> confidences = new HashMap<BlankNodeOrIRI, Double>();
    List<BlankNodeOrIRI> langAnnotations = new ArrayList<BlankNodeOrIRI>();
    Iterator<Triple> textAnnoataions = graph.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    while (textAnnoataions.hasNext()) {
        BlankNodeOrIRI textAnnotation = textAnnoataions.next().getSubject();
        String language = getString(graph, textAnnotation, DC_LANGUAGE);
        if (language != null) {
            Double confidence = null;
            try {
                confidence = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
            } catch (InvalidLiteralTypeException e) {
                // STANBOL-1417: not a double value
                try {
                    // try with float
                    Float fconf = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Float.class, lf);
                    if (fconf != null) {
                        confidence = Double.valueOf(fconf.doubleValue());
                    }
                } catch (InvalidLiteralTypeException e1) {
                    log.warn("Unable to parse confidence for language annotation " + textAnnotation, e);
                }
            }
            confidences.put(textAnnotation, confidence);
            langAnnotations.add(textAnnotation);
        }
    }
    if (langAnnotations.size() > 1) {
        Collections.sort(langAnnotations, new Comparator<BlankNodeOrIRI>() {

            @Override
            public int compare(BlankNodeOrIRI o1, BlankNodeOrIRI o2) {
                Double c1 = confidences.get(o1);
                Double c2 = confidences.get(o2);
                // decrising order (values without confidence last)
                if (c1 == null) {
                    return c2 == null ? 0 : 1;
                } else if (c2 == null) {
                    return -1;
                } else {
                    return c2.compareTo(c1);
                }
            }
        });
    }
    return langAnnotations;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) InvalidLiteralTypeException(org.apache.clerezza.rdf.core.InvalidLiteralTypeException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Triple(org.apache.clerezza.commons.rdf.Triple)

Example 85 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class ExecutionPlanHelper method getExecutable.

/**
 * Evaluates the parsed {@link ImmutableGraph execution plan} and the set of already executed
 * {@link ExecutionPlan#EXECUTION_NODE ep:ExecutionNode}s to find the next
 * nodes that can be executed.
 * @param executionPlan the execution plan
 * @param executed the already executed {@link ExecutionPlan#EXECUTION_NODE node}s
 * or an empty set to determine the nodes to start the execution.
 * @return the set of nodes that can be executed next or an empty set if
 * there are no more nodes to execute.
 */
public static Set<BlankNodeOrIRI> getExecutable(Graph executionPlan, Set<BlankNodeOrIRI> executed) {
    Set<BlankNodeOrIRI> executeable = new HashSet<BlankNodeOrIRI>();
    for (Iterator<Triple> nodes = executionPlan.filter(null, RDF_TYPE, EXECUTION_NODE); nodes.hasNext(); ) {
        BlankNodeOrIRI node = nodes.next().getSubject();
        if (!executed.contains(node)) {
            Iterator<Triple> dependsIt = executionPlan.filter(node, DEPENDS_ON, null);
            boolean dependendExecuted = true;
            while (dependsIt.hasNext() && dependendExecuted) {
                dependendExecuted = executed.contains(dependsIt.next().getObject());
            }
            if (dependendExecuted) {
                executeable.add(node);
            }
        }
    }
    return executeable;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashSet(java.util.HashSet)

Aggregations

Triple (org.apache.clerezza.commons.rdf.Triple)151 IRI (org.apache.clerezza.commons.rdf.IRI)88 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)84 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)70 Graph (org.apache.clerezza.commons.rdf.Graph)45 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)41 HashSet (java.util.HashSet)34 Literal (org.apache.clerezza.commons.rdf.Literal)30 ArrayList (java.util.ArrayList)27 Lock (java.util.concurrent.locks.Lock)21 HashMap (java.util.HashMap)20 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)19 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)19 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)15 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)12 Test (org.junit.Test)12 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)10 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)9 GraphNode (org.apache.clerezza.rdf.utils.GraphNode)8 IOException (java.io.IOException)7