Search in sources :

Example 1 with EnhancementEngineHelper.getString

use of org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString in project stanbol by apache.

the class ContentItemResource method initOccurrences.

private void initOccurrences() {
    Graph graph = contentItem.getMetadata();
    LiteralFactory lf = LiteralFactory.getInstance();
    Map<IRI, Collection<BlankNodeOrIRI>> suggestionMap = new HashMap<IRI, Collection<BlankNodeOrIRI>>();
    // 1) get Entity Annotations
    Map<BlankNodeOrIRI, Map<EAProps, Object>> entitySuggestionMap = new HashMap<BlankNodeOrIRI, Map<EAProps, Object>>();
    Iterator<Triple> entityAnnotations = graph.filter(null, RDF.type, ENHANCER_ENTITYANNOTATION);
    while (entityAnnotations.hasNext()) {
        BlankNodeOrIRI entityAnnotation = entityAnnotations.next().getSubject();
        // to avoid multiple lookups (e.g. if one entityAnnotation links to+
        // several TextAnnotations) we cache the data in an intermediate Map
        Map<EAProps, Object> eaData = new EnumMap<EAProps, Object>(EAProps.class);
        eaData.put(EAProps.entity, getReference(graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE));
        eaData.put(EAProps.label, getString(graph, entityAnnotation, ENHANCER_ENTITY_LABEL));
        eaData.put(EAProps.confidence, EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf));
        entitySuggestionMap.put(entityAnnotation, eaData);
        Iterator<IRI> textAnnotations = getReferences(graph, entityAnnotation, DC_RELATION);
        while (textAnnotations.hasNext()) {
            IRI textAnnotation = textAnnotations.next();
            Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
            if (suggestions == null) {
                suggestions = new ArrayList<BlankNodeOrIRI>();
                suggestionMap.put(textAnnotation, suggestions);
            }
            suggestions.add(entityAnnotation);
        }
    }
    // 2) get the TextAnnotations
    Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION);
    while (textAnnotations.hasNext()) {
        BlankNodeOrIRI textAnnotation = textAnnotations.next().getSubject();
        // we need to process those to show multiple mentions
        // if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
        // // this is not the most specific occurrence of this name: skip
        // continue;
        // }
        String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT);
        // TextAnnotations without fise:selected-text are no longer ignored
        // if(text == null){
        // //ignore text annotations without text
        // continue;
        // }
        Integer start = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_START, Integer.class, lf);
        Integer end = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_END, Integer.class, lf);
        Double confidence = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
        Iterator<IRI> types = getReferences(graph, textAnnotation, DC_TYPE);
        if (!types.hasNext()) {
            // create an iterator over null in case no types are present
            types = Collections.singleton((IRI) null).iterator();
        }
        while (types.hasNext()) {
            IRI type = types.next();
            Map<EntityExtractionSummary, EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
            if (occurrenceMap == null) {
                occurrenceMap = new TreeMap<EntityExtractionSummary, EntityExtractionSummary>();
                extractionsByTypeMap.put(type, occurrenceMap);
            }
            // in case of a language annotation use the detected language as label
            if (DC_LINGUISTIC_SYSTEM.equals(type)) {
                text = EnhancementEngineHelper.getString(graph, textAnnotation, DC_LANGUAGE);
            }
            EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start, end, confidence, defaultThumbnails);
            Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
            if (suggestions != null) {
                for (BlankNodeOrIRI entityAnnotation : suggestions) {
                    Map<EAProps, Object> eaData = entitySuggestionMap.get(entityAnnotation);
                    entity.addSuggestion((IRI) eaData.get(EAProps.entity), (String) eaData.get(EAProps.label), (Double) eaData.get(EAProps.confidence), graph);
                }
            }
            EntityExtractionSummary existingSummary = occurrenceMap.get(entity);
            if (existingSummary == null) {
                // new extraction summary
                occurrenceMap.put(entity, entity);
            } else {
                // extraction summary with this text and suggestions already
                // present ... only add a mention to the existing
                existingSummary.addMention(new Mention(text, start, end, confidence));
            }
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) EnhancementEngineHelper.getString(org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString) EnumMap(java.util.EnumMap) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Triple(org.apache.clerezza.commons.rdf.Triple) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Collection(java.util.Collection) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Aggregations

Collection (java.util.Collection)1 EnumMap (java.util.EnumMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)1 Graph (org.apache.clerezza.commons.rdf.Graph)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 Triple (org.apache.clerezza.commons.rdf.Triple)1 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)1 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)1 EnhancementEngineHelper.getString (org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString)1