Search in sources :

Example 6 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class EnhancementEngineHelper method addContributingEngine.

/**
 * Adds the parsed {@link EnhancementEngine} as dc:contributer to the
 * enhancement and also sets the dc:modified property accordingly
 * @param metadata the {@link ContentItem#getMetadata()}
 * @param enhancement the enhancement
 * @param engine the engine
 */
public static void addContributingEngine(Graph metadata, IRI enhancement, EnhancementEngine engine) {
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    // TODO: use a public dereferencing URI instead?
    metadata.add(new TripleImpl(enhancement, DC_CONTRIBUTOR, literalFactory.createTypedLiteral(engine.getClass().getName())));
    // set the modification date to the current date.
    set(metadata, enhancement, DC_MODIFIED, new Date(), literalFactory);
}
Also used : TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Date(java.util.Date) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 7 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class NEREngineCore method findNamedEntities.

protected void findNamedEntities(final ContentItem ci, final AnalysedText at, final String text, final String lang, final TokenNameFinderModel nameFinderModel) {
    if (ci == null) {
        throw new IllegalArgumentException("Parsed ContentItem MUST NOT be NULL");
    }
    if (at == null && text == null) {
        log.warn("NULL was parsed as AnalysedText AND Text for content item " + ci.getUri() + ". One of the two MUST BE present! -> call ignored");
        return;
    }
    final Language language;
    if (lang != null && !lang.isEmpty()) {
        language = new Language(lang);
    } else {
        language = null;
    }
    if (log.isDebugEnabled()) {
        log.debug("findNamedEntities model={},  language={}, text=", new Object[] { nameFinderModel, language, StringUtils.abbreviate(at != null ? at.getSpan() : text, 100) });
    }
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    Graph g = ci.getMetadata();
    Map<String, List<NameOccurrence>> entityNames;
    if (at != null) {
        entityNames = extractNameOccurrences(nameFinderModel, at, lang);
    } else {
        entityNames = extractNameOccurrences(nameFinderModel, text, lang);
    }
    // lock the ContentItem while writing the RDF data for found Named Entities
    ci.getLock().writeLock().lock();
    try {
        Map<String, IRI> previousAnnotations = new LinkedHashMap<String, IRI>();
        for (Map.Entry<String, List<NameOccurrence>> nameInContext : entityNames.entrySet()) {
            String name = nameInContext.getKey();
            List<NameOccurrence> occurrences = nameInContext.getValue();
            IRI firstOccurrenceAnnotation = null;
            for (NameOccurrence occurrence : occurrences) {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(name, language)));
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.context, language)));
                if (occurrence.type != null) {
                    g.add(new TripleImpl(textAnnotation, DC_TYPE, occurrence.type));
                }
                if (occurrence.confidence != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(occurrence.confidence)));
                }
                if (occurrence.start != null && occurrence.end != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occurrence.start)));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occurrence.end)));
                }
                // name
                if (firstOccurrenceAnnotation == null) {
                    // specific occurrence
                    for (Map.Entry<String, IRI> entry : previousAnnotations.entrySet()) {
                        if (entry.getKey().contains(name)) {
                            // we have found a most specific previous
                            // occurrence, use it as subsumption target
                            firstOccurrenceAnnotation = entry.getValue();
                            g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
                            break;
                        }
                    }
                    if (firstOccurrenceAnnotation == null) {
                        // no most specific previous occurrence, I am the first,
                        // most specific occurrence to be later used as a target
                        firstOccurrenceAnnotation = textAnnotation;
                        previousAnnotations.put(name, textAnnotation);
                    }
                } else {
                    // I am referring to a most specific first occurrence of the
                    // same name
                    g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
                }
            }
        }
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) LinkedHashMap(java.util.LinkedHashMap) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) List(java.util.List) ArrayList(java.util.ArrayList) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 8 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class IndexedGraphTest method createGraph.

private static void createGraph(Collection<Triple> tc, int triples, Long seed) {
    Random rnd = new Random();
    if (seed != null) {
        rnd.setSeed(seed);
    }
    LiteralFactory lf = LiteralFactory.getInstance();
    // randoms are in the range [0..3]
    // literal
    double l = 1.0;
    // int
    double i = l / 3;
    // double
    double d = l * 2 / 3;
    // bNode
    double b = 2.0;
    // create new bNode
    double nb = b - (l * 2 / 3);
    double random;
    BlankNodeOrIRI subject = null;
    IRI predicate = null;
    List<IRI> predicateList = new ArrayList<IRI>();
    predicateList.add(RDF.first);
    predicateList.add(RDF.rest);
    predicateList.add(RDF.type);
    predicateList.add(RDFS.label);
    predicateList.add(RDFS.comment);
    predicateList.add(RDFS.range);
    predicateList.add(RDFS.domain);
    predicateList.add(FOAF.name);
    predicateList.add(FOAF.nick);
    predicateList.add(FOAF.homepage);
    predicateList.add(FOAF.age);
    predicateList.add(FOAF.depiction);
    String URI_PREFIX = "http://www.test.org/bigGraph/ref";
    Language DE = new Language("de");
    Language EN = new Language("en");
    Iterator<IRI> predicates = predicateList.iterator();
    List<BlankNode> bNodes = new ArrayList<BlankNode>();
    bNodes.add(new BlankNode());
    for (int count = 0; tc.size() < triples; count++) {
        random = rnd.nextDouble() * 3;
        if (random >= 2.5 || count == 0) {
            if (random <= 2.75) {
                subject = new IRI(URI_PREFIX + count);
            } else {
                int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
                subject = bNodes.get(rndIndex);
            }
        }
        if (random > 2.0 || count == 0) {
            if (!predicates.hasNext()) {
                Collections.shuffle(predicateList, rnd);
                predicates = predicateList.iterator();
            }
            predicate = predicates.next();
        }
        if (random <= l) {
            // literal
            if (random <= i) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
            } else if (random <= d) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
            } else {
                Literal text;
                if (random <= i) {
                    text = new PlainLiteralImpl("Literal for " + count);
                } else if (random <= d) {
                    text = new PlainLiteralImpl("An English literal for " + count, EN);
                } else {
                    text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
                }
                tc.add(new TripleImpl(subject, predicate, text));
            }
        } else if (random <= b) {
            // bnode
            BlankNode bnode;
            if (random <= nb) {
                bnode = new BlankNode();
                bNodes.add(bnode);
            } else {
                // >nb <b
                int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
                bnode = bNodes.get(rndIndex);
            }
            tc.add(new TripleImpl(subject, predicate, bnode));
        } else {
            // IRI
            tc.add(new TripleImpl(subject, predicate, new IRI(URI_PREFIX + count * random)));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Random(java.util.Random) Language(org.apache.clerezza.commons.rdf.Language) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 9 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class LocationEnhancementEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    IRI contentItemId = ci.getUri();
    Graph graph = ci.getMetadata();
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    // get all the textAnnotations
    /*
         * this Map holds the name as key and all the text annotations of
         * dc:type dbpedia:Place that select this name as value
         * this map is used to avoid multiple lookups for text annotations
         * selecting the same name.
         */
    Map<String, Collection<BlankNodeOrIRI>> name2placeEnhancementMap = new HashMap<String, Collection<BlankNodeOrIRI>>();
    Iterator<Triple> iterator = graph.filter(null, DC_TYPE, DBPEDIA_PLACE);
    while (iterator.hasNext()) {
        // the enhancement annotating an place
        BlankNodeOrIRI placeEnhancement = iterator.next().getSubject();
        // this can still be an TextAnnotation of an EntityAnnotation
        // so we need to filter TextAnnotation
        Triple isTextAnnotation = new TripleImpl(placeEnhancement, RDF_TYPE, ENHANCER_TEXTANNOTATION);
        if (graph.contains(isTextAnnotation)) {
            // now get the name
            String name = EnhancementEngineHelper.getString(graph, placeEnhancement, ENHANCER_SELECTED_TEXT);
            if (name == null) {
                log.warn("Unable to process TextAnnotation " + placeEnhancement + " because property" + ENHANCER_SELECTED_TEXT + " is not present");
            } else {
                Collection<BlankNodeOrIRI> placeEnhancements = name2placeEnhancementMap.get(name);
                if (placeEnhancements == null) {
                    placeEnhancements = new ArrayList<BlankNodeOrIRI>();
                    name2placeEnhancementMap.put(name, placeEnhancements);
                }
                placeEnhancements.add(placeEnhancement);
            }
        } else {
        // TODO: if we also ant to process EntityAnnotations with the dc:type dbpedia:Place
        // than we need to parse the name based on the enhancer:entity-name property
        }
    }
    // Now we do have all the names we need to lookup
    Map<SearchRequestPropertyEnum, Collection<String>> requestParams = new EnumMap<SearchRequestPropertyEnum, Collection<String>>(SearchRequestPropertyEnum.class);
    if (getMaxLocationEnhancements() != null) {
        requestParams.put(SearchRequestPropertyEnum.maxRows, Collections.singleton(getMaxLocationEnhancements().toString()));
    }
    for (Map.Entry<String, Collection<BlankNodeOrIRI>> entry : name2placeEnhancementMap.entrySet()) {
        List<Toponym> results;
        try {
            requestParams.put(SearchRequestPropertyEnum.name, Collections.singleton(entry.getKey()));
            results = geonamesService.searchToponyms(requestParams);
        } catch (Exception e) {
            /*
                     * TODO: Review if it makes sense to catch here for each name, or
                     * to catch the whole loop.
                     * This depends if single requests can result in Exceptions
                     * (e.g. because of encoding problems) or if usually Exceptions
                     * are thrown because of general things like connection issues
                     * or service unavailability.
                     */
            throw new EngineException(this, ci, e);
        }
        if (results != null) {
            Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
            for (Toponym result : results) {
                log.debug("process result {} {}", result.getGeoNameId(), result.getName());
                Double score = getToponymScore(result, maxScore);
                log.debug("  > score {}", score);
                if (score != null) {
                    if (score < minScore) {
                        // if score is lower than the under bound, than stop
                        break;
                    }
                } else {
                    log.warn("NULL returned as Score for " + result.getGeoNameId() + " " + result.getName());
                /*
                         * NOTE: If score is not present all suggestions are
                         * added as enhancements to the metadata of the content
                         * item.
                         */
                }
                // write the enhancement!
                BlankNodeOrIRI locationEnhancement = writeEntityEnhancement(contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
                log.debug("  > {}  >= {}", score, minHierarchyScore);
                if (score != null && score >= minHierarchyScore) {
                    log.debug("  > getHierarchy for {} {}", result.getGeoNameId(), result.getName());
                    // get the hierarchy
                    try {
                        Iterator<Toponym> hierarchy = getHierarchy(result).iterator();
                        for (int level = 0; hierarchy.hasNext(); level++) {
                            Toponym hierarchyEntry = hierarchy.next();
                            // maybe add an configuration
                            if (level == 0) {
                                // Mother earth -> ignore
                                continue;
                            }
                            // write it as dependent to the locationEnhancement
                            if (result.getGeoNameId() != hierarchyEntry.getGeoNameId()) {
                                // TODO: add additional checks based on possible
                                // configuration here!
                                log.debug("    - write hierarchy {} {}", hierarchyEntry.getGeoNameId(), hierarchyEntry.getName());
                                /*
                                     * The hierarchy service dose not provide a score, because it would be 1.0
                                     * so we need to set the score to this value.
                                     * Currently is is set to the value of the suggested entry
                                     */
                                writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry, null, Collections.singletonList(locationEnhancement), 1.0);
                            }
                        }
                    } catch (Exception e) {
                        log.warn("Unable to get Hierarchy for " + result.getGeoNameId() + " " + result.getName(), e);
                    }
                }
            }
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) EnumMap(java.util.EnumMap) SearchRequestPropertyEnum(org.apache.stanbol.enhancer.engines.geonames.impl.GeonamesAPIWrapper.SearchRequestPropertyEnum) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) ConfigurationException(org.osgi.service.cm.ConfigurationException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph) Collection(java.util.Collection) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap)

Example 10 with LiteralFactory

use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.

the class CeliLemmatizerEnhancementEngine method addMorphoAnalysisEnhancement.

private void addMorphoAnalysisEnhancement(ContentItem ci, String text, String language, Graph g) throws EngineException {
    // clerezza language for PlainLiterals
    Language lang = new Language(language);
    List<LexicalEntry> terms;
    try {
        terms = this.client.performMorfologicalAnalysis(text, language);
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI Lemmatizer" + " service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI lemmatizer service!", e);
    }
    // get a write lock before writing the enhancements
    ci.getLock().writeLock().lock();
    try {
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        for (LexicalEntry le : terms) {
            List<CeliMorphoFeatures> mFeatures = this.convertLexicalEntryToMorphFeatures(le, language);
            for (CeliMorphoFeatures feat : mFeatures) {
                // Create a text annotation for each interpretation produced by the morphological analyzer
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(le.getWordForm(), lang)));
                if (le.from >= 0 && le.to > 0) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(le.from)));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(le.to)));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, le.getWordForm(), le.from), lang)));
                }
                g.addAll(feat.featuresAsTriples(textAnnotation, lang));
            }
        }
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : CeliMorphoFeatures(org.apache.stanbol.enhancer.engines.celi.CeliMorphoFeatures) IRI(org.apache.clerezza.commons.rdf.IRI) Language(org.apache.clerezza.commons.rdf.Language) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) SOAPException(javax.xml.soap.SOAPException) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Aggregations

LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)24 IRI (org.apache.clerezza.commons.rdf.IRI)22 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)20 Graph (org.apache.clerezza.commons.rdf.Graph)15 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)10 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)9 Language (org.apache.clerezza.commons.rdf.Language)8 Triple (org.apache.clerezza.commons.rdf.Triple)7 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)5 Literal (org.apache.clerezza.commons.rdf.Literal)4 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)4 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)4 ArrayList (java.util.ArrayList)3 Date (java.util.Date)3 Map (java.util.Map)3 SOAPException (javax.xml.soap.SOAPException)3 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)3