Search in sources :

Example 46 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class GraphMultiplexer method buildPublicKey.

/**
     * Creates an {@link OWLOntologyID} object by combining the ontologyIRI and the versionIRI, where
     * applicable, of the stored graph.
     * 
     * @param resource
     *            the ontology
     * @return
     */
protected OWLOntologyID buildPublicKey(final IRI resource) {
    // TODO desanitize?
    org.semanticweb.owlapi.model.IRI oiri = null, viri = null;
    Iterator<Triple> it = meta.filter(resource, HAS_ONTOLOGY_IRI_URIREF, null);
    if (it.hasNext()) {
        RDFTerm obj = it.next().getObject();
        if (obj instanceof IRI)
            oiri = org.semanticweb.owlapi.model.IRI.create(((IRI) obj).getUnicodeString());
        else if (obj instanceof Literal)
            oiri = org.semanticweb.owlapi.model.IRI.create(((Literal) obj).getLexicalForm());
    } else {
        // Anonymous ontology? Decode the resource itself (which is not null)
        return OntologyUtils.decode(resource.getUnicodeString());
    }
    it = meta.filter(resource, HAS_VERSION_IRI_URIREF, null);
    if (it.hasNext()) {
        RDFTerm obj = it.next().getObject();
        if (obj instanceof IRI)
            viri = org.semanticweb.owlapi.model.IRI.create(((IRI) obj).getUnicodeString());
        else if (obj instanceof Literal)
            viri = org.semanticweb.owlapi.model.IRI.create(((Literal) obj).getLexicalForm());
    }
    if (viri == null)
        return new OWLOntologyID(oiri);
    else
        return new OWLOntologyID(oiri, viri);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) OWLOntologyID(org.semanticweb.owlapi.model.OWLOntologyID) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 47 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class GraphMultiplexer method getSize.

@Override
public int getSize(OWLOntologyID publicKey) {
    IRI subj = buildResource(publicKey);
    Iterator<Triple> it = meta.filter(subj, SIZE_IN_TRIPLES_URIREF, null);
    if (it.hasNext()) {
        RDFTerm obj = it.next().getObject();
        if (obj instanceof Literal) {
            String s = ((Literal) obj).getLexicalForm();
            try {
                return Integer.parseInt(s);
            } catch (Exception ex) {
                log.warn("Not a valid integer value {} for size of {}", s, publicKey);
                return -1;
            }
        }
    }
    return 0;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 48 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class GraphMultiplexer method buildResource.

/**
     * Creates an {@link IRI} out of an {@link OWLOntologyID}, so it can be used as an identifier. This
     * does NOT necessarily correspond to the IRI that identifies the stored graph. In order to obtain
     * that, check the objects of any MAPS_TO_GRAPH assertions.
     * 
     * @param publicKey
     * @return
     */
protected IRI buildResource(final OWLOntologyID publicKey) {
    if (publicKey == null)
        throw new IllegalArgumentException("Cannot build a IRI resource on a null public key!");
    // The IRI is of the form ontologyIRI[:::versionIRI] (TODO use something less conventional?)
    // XXX should versionIRI also include the version IRI set by owners? Currently not
    // Remember not to sanitize logical identifiers.
    org.semanticweb.owlapi.model.IRI ontologyIri = publicKey.getOntologyIRI(), versionIri = publicKey.getVersionIRI();
    if (ontologyIri == null)
        throw new IllegalArgumentException("Cannot build a IRI resource on an anonymous public key!");
    log.debug("Searching for a meta graph entry for public key:");
    log.debug(" -- {}", publicKey);
    IRI match = null;
    LiteralFactory lf = LiteralFactory.getInstance();
    Literal oiri = lf.createTypedLiteral(new IRI(ontologyIri.toString()));
    Literal viri = versionIri == null ? null : lf.createTypedLiteral(new IRI(versionIri.toString()));
    for (Iterator<Triple> it = meta.filter(null, HAS_ONTOLOGY_IRI_URIREF, oiri); it.hasNext(); ) {
        RDFTerm subj = it.next().getSubject();
        log.debug(" -- Ontology IRI match found. Scanning");
        log.debug(" -- RDFTerm : {}", subj);
        if (!(subj instanceof IRI)) {
            log.debug(" ---- (uncomparable: skipping...)");
            continue;
        }
        if (viri != null) {
            // Must find matching versionIRI
            if (meta.contains(new TripleImpl((IRI) subj, HAS_VERSION_IRI_URIREF, viri))) {
                log.debug(" ---- Version IRI match!");
                match = (IRI) subj;
                // Found
                break;
            } else {
                log.debug(" ---- Expected version IRI match not found.");
                // There could be another with the right versionIRI.
                continue;
            }
        } else {
            // Must find unversioned resource
            if (meta.filter((IRI) subj, HAS_VERSION_IRI_URIREF, null).hasNext()) {
                log.debug(" ---- Unexpected version IRI found. Skipping.");
                continue;
            } else {
                log.debug(" ---- Unversioned match!");
                match = (IRI) subj;
                // Found
                break;
            }
        }
    }
    log.debug("Matching IRI in graph : {}", match);
    if (match == null)
        return new IRI(OntologyUtils.encode(publicKey));
    else
        return match;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 49 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class TypedLiteralAtom method adapt.

@SuppressWarnings("unchecked")
@Override
public <T> T adapt(RuleAtom ruleAtom) throws RuleAtomCallExeption {
    org.apache.stanbol.rules.manager.atoms.TypedLiteralAtom tmp = (org.apache.stanbol.rules.manager.atoms.TypedLiteralAtom) ruleAtom;
    ExpressionAtom expressionAtom = tmp.getValue();
    Literal literal = null;
    if (expressionAtom instanceof StringAtom) {
        String value = expressionAtom.toString();
        literal = LiteralFactory.getInstance().createTypedLiteral(value);
    } else if (expressionAtom instanceof NumberAtom) {
        Number number = ((NumberAtom) expressionAtom).getNumberValue();
        literal = LiteralFactory.getInstance().createTypedLiteral(number);
    } else {
        throw new org.apache.stanbol.rules.base.api.RuleAtomCallExeption(getClass());
    }
    LiteralExpression literalExpression = new LiteralExpression(literal);
    return (T) new ClerezzaSparqlObject(literalExpression);
}
Also used : StringAtom(org.apache.stanbol.rules.manager.atoms.StringAtom) LiteralExpression(org.apache.clerezza.rdf.core.sparql.query.LiteralExpression) RuleAtomCallExeption(org.apache.stanbol.rules.base.api.RuleAtomCallExeption) ExpressionAtom(org.apache.stanbol.rules.manager.atoms.ExpressionAtom) NumberAtom(org.apache.stanbol.rules.manager.atoms.NumberAtom) Literal(org.apache.clerezza.commons.rdf.Literal) ClerezzaSparqlObject(org.apache.stanbol.rules.adapters.clerezza.ClerezzaSparqlObject)

Example 50 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class EntityLinker method matchLabels.

/**
     * Matches the labels of the parsed {@link Representation} with the Tokens of
     * the texts (beginning with the currently active 
     * {@link ProcessingState#getToken() token}).<p>
     * The field used to get the labels is retrieved from 
     * {@link EntitySearcher#getNameField()}. Only labels with no language or the
     * language of the current sentence are considered. If less than 
     * {@link EntityLinkerConfig#getMinFoundTokens()} tokens match with an
     * label the Concept is only considered to match if the label is
     * {@link String#equalsIgnoreCase(String)} to the text covered by the
     * matched token(s). Otherwise also {@link MATCH#FULL} and {@link MATCH#PARTIAL}
     * results are allowed.
     * @param entity The entity including at least the data for the
     * {@link EntitySearcher#getNameField()} property.
     * @return The result of the matching.
     */
private Suggestion matchLabels(List<TokenData> searchTokens, Entity entity) {
    //language of the current sentence
    String curLang = documentLang;
    //configured default language 
    String defLang = defaultLang;
    String mainLang = documentMainLang;
    Collection<Literal> mainLangLabels;
    if (documentMainLang != null) {
        mainLang = documentMainLang;
        mainLangLabels = new ArrayList<Literal>();
    } else {
        mainLang = documentLang;
        mainLangLabels = Collections.emptyList();
    }
    Iterator<Literal> labels = entity.getText(linkerConfig.getNameField());
    Suggestion match = new Suggestion(entity);
    Collection<Literal> defaultLabels = new ArrayList<Literal>();
    boolean matchedLangLabel = false;
    //avoid matching multiple labels with the exact same lexical.
    Set<String> matchedLabels = new HashSet<String>();
    while (labels.hasNext()) {
        Literal label = labels.next();
        //numLabels++;
        String lang = label.getLanguage() != null ? label.getLanguage().toString() : null;
        String text = label.getLexicalForm();
        //if case-insensitive matching ... compare lower case versions
        if (!linkerConfig.isCaseSensitiveMatching()) {
            text = text.toLowerCase(Locale.ROOT);
        }
        if ((lang == null && curLang == null) || (lang != null && curLang != null && lang.equalsIgnoreCase(curLang))) {
            if (!matchedLabels.contains(text)) {
                matchLabel(searchTokens, match, label);
                matchedLabels.add(text);
                matchedLangLabel = true;
            } else if (!matchedLangLabel) {
                //found a equivalent label in the matchlang
                matchedLangLabel = true;
            }
        } else if ((lang == null && mainLang == null) || (lang != null && mainLang != null && lang.equalsIgnoreCase(mainLang))) {
            mainLangLabels.add(label);
        } else if ((lang == null && defLang == null) || (lang != null && defLang != null && lang.startsWith(defLang))) {
            defaultLabels.add(label);
        }
    }
    //try to match main language labels
    if (!matchedLangLabel || match.getMatch() == MATCH.NONE) {
        for (Literal mainLangLabel : mainLangLabels) {
            if (!matchedLabels.contains(mainLangLabel.getLexicalForm())) {
                matchLabel(searchTokens, match, mainLangLabel);
                matchedLabels.add(mainLangLabel.getLexicalForm());
                matchedLangLabel = true;
            }
        }
    }
    // * no MATCH was found in the current language
    if (!matchedLangLabel || match.getMatch() == MATCH.NONE) {
        for (Literal defaultLangLabel : defaultLabels) {
            if (!matchedLabels.contains(defaultLangLabel.getLexicalForm())) {
                matchLabel(searchTokens, match, defaultLangLabel);
                matchedLabels.add(defaultLangLabel.getLexicalForm());
            }
        }
    }
    return match;
}
Also used : Literal(org.apache.clerezza.commons.rdf.Literal) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet)

Aggregations

Literal (org.apache.clerezza.commons.rdf.Literal)71 IRI (org.apache.clerezza.commons.rdf.IRI)35 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)35 Triple (org.apache.clerezza.commons.rdf.Triple)30 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)22 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)20 ArrayList (java.util.ArrayList)16 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 Language (org.apache.clerezza.commons.rdf.Language)12 Graph (org.apache.clerezza.commons.rdf.Graph)11 Test (org.junit.Test)10 HashSet (java.util.HashSet)9 Date (java.util.Date)8 Lock (java.util.concurrent.locks.Lock)6 Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)5 HashMap (java.util.HashMap)4 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)4 NoConvertorException (org.apache.clerezza.rdf.core.NoConvertorException)4 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)4 Collection (java.util.Collection)3