Search in sources :

Example 1 with Constraint

use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.

the class SparqlQueryUtils method addFieldConstraint.

 * Adds the WHERE clause of the SPARQL query.
 * <p>
 * If the {@link SparqlEndpointTypeEnum SPARQL endpoint} supports SPARQL 1.1 subqueries, than this adds also the
 * LIMIT and OFFSET to in inner SELECT that only selects the id.
 * @param queryString
 *            the SPARQL query string to add the WHERE
 * @param query
 *            the query
 * @param selectedFields
 *            the selected fields
 * @param endpointType
 *            The type of the endpoint (used to write optimised queries for endpoint type specific
 *            extensions
private static void addFieldConstraint(final StringBuilder queryString, SparqlFieldQuery query, Map<String, String> selectedFields, SparqlEndpointTypeEnum endpointType) {
    // we need temporary variables with unique names
    String varPrefix = "tmp";
    int[] varNum = new int[] { 1 };
    // used to open brackets for the select part of the constraints
    boolean first = true;
    // determine if sub-selects are supported and if we need a sub-select
    // (more than the id is selected)
    boolean subSelectState = isSubSelectState(endpointType, selectedFields);
    log.trace("add field constraints is in a sub-select-state [{}].", (subSelectState ? "yes" : "no"));
    // if we uses a sub query to select the ids, we need to add the graph
    // pattern
    // of all selected fields outside of the sub query
    Map<String, String> tmpSelectedFields = subSelectState ? new HashMap<String, String>(selectedFields) : null;
    String intend;
    if (subSelectState) {
        // additional intend because of sub query (3*2)
        intend = "      ";
    } else {
        // normal intend (2*2)
        intend = "    ";
    Iterator<Entry<String, Constraint>> constraintIterator = query.iterator();
    while (constraintIterator.hasNext()) {
        Entry<String, Constraint> fieldConstraint =;
        if (first) {
            queryString.append("  { \n");
            if (subSelectState) {
                String rootVarName = selectedFields.get(null);
                queryString.append("    SELECT ?").append(rootVarName).append(" \n");
                queryString.append("    WHERE { \n");
            first = false;
        String field = fieldConstraint.getKey();
        Constraint constraint = fieldConstraint.getValue();
        log.trace("adding a constraint [type :: {}][field :: {}][prefix :: {}][intent :: {}].", new Object[] { constraint.getType(), field, varPrefix, intend });
        boolean added = true;
        switch(constraint.getType()) {
            case value:
                addValueConstraint(queryString, field, (ValueConstraint) constraint, selectedFields, varPrefix, varNum, intend);
            case text:
                String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
                addTextConstraint(queryString, var, (TextConstraint) constraint, endpointType, intend);
            case range:
                var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
                addRangeConstriant(queryString, var, (RangeConstraint) constraint, intend);
                log.warn("Constraint Type '{}' not supported in SPARQL! Constriant {} " + "will be not included in the query!", fieldConstraint.getValue().getType(), fieldConstraint.getValue());
                added = false;
        if (added) {
            queryString.append(" . \n");
    // rank the graph selected by the query
    if (subSelectState) {
        addRankingConstraints(endpointType, queryString, selectedFields.get(null));
    if (!first) {
        if (subSelectState) {
            queryString.append("    } \n");
            // re-add all selected fields to be added as selects because in
            // the sub-query we only select the ID!
            selectedFields = tmpSelectedFields;
            // ranking needs also to be added to the sub-query (to correctly
            // process LIMIT and OFFSET
            addRankingOrder(endpointType, queryString, selectedFields.get(null), "    ");
            // add LIMIT and OFFSET to the sub-query!
            // TODO: add link to the email
            queryString.append("    ");
            addLimit(query.getLimit(), queryString);
            queryString.append("    ");
            addOffset(query, queryString);
            queryString.append("    ");
        queryString.append("  } \n");
    // if no subqueries are used we need now to add the ranking constraints
    if (!subSelectState) {
        addRankingConstraints(endpointType, queryString, selectedFields.get(null));
    // root variable
    while (selectedFields.size() > 1) {
        // if this is the only left element
        // we are done
        Iterator<String> it = selectedFields.keySet().iterator();
        // we need to get a non null value from the map
        String actField;
        do {
            // the outer while ensures an non null value so we need not to
            // use hasNext
            actField =;
        } while (actField == null);
        queryString.append("  OPTIONAL { ");
        // NOTE the following Method removes the written mapping from the
        // Map
        addFieldGraphPattern(queryString, actField, selectedFields, varPrefix, varNum, "");
        queryString.append(". } \n");
Also used : Entry(java.util.Map.Entry) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)

Example 2 with Constraint

use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.

the class FieldQueryReader method parseSimilarityConstraint.

private static Constraint parseSimilarityConstraint(JSONObject jConstraint, NamespacePrefixService nsPrefixService) throws JSONException {
    String context = jConstraint.optString("context");
    if (context == null) {
        throw new IllegalArgumentException("SimilarityConstraints MUST define a \"context\": \n " + jConstraint.toString(4));
    JSONArray addFields = jConstraint.optJSONArray("addFields");
    final List<String> fields;
    if (addFields != null && addFields.length() > 0) {
        fields = new ArrayList<String>(addFields.length());
        for (int i = 0; i < addFields.length(); i++) {
            String field = addFields.optString(i);
            field = field != null ? nsPrefixService.getFullName(field) : null;
            if (field != null && !field.isEmpty()) {
    } else {
        fields = null;
    return new SimilarityConstraint(context, fields);
Also used : SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) JSONArray(org.codehaus.jettison.json.JSONArray) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)

Example 3 with Constraint

use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.

the class FieldQueryReader method parseRangeConstraint.

 * @param jConstraint
 * @return
 * @throws JSONException
private static Constraint parseRangeConstraint(JSONObject jConstraint, NamespacePrefixService nsPrefixService) throws JSONException {
    Constraint constraint;
    boolean inclusive;
    if (jConstraint.has("inclusive")) {
        inclusive = jConstraint.getBoolean("inclusive");
    } else {
        log.debug("RangeConstraint does not define the field 'inclusive'. Use false as default!");
        inclusive = false;
    Object upperBound = jConstraint.opt("upperBound");
    Object lowerBound = jConstraint.opt("lowerBound");
    Collection<String> datatypes = parseDatatypeProperty(jConstraint, nsPrefixService);
    if (datatypes != null && !datatypes.isEmpty()) {
        Iterator<String> it = datatypes.iterator();
        String datatype =;
        if (datatypes.size() > 1) {
            // write warning in case of multiple values
            log.warn("Multiple datatypes are not supported by RangeConstriants!");
            log.warn("  used: {}", datatype);
            while (it.hasNext()) {
                log.warn("  ignored: {}",;
        StringBuilder convertingError = null;
        if (upperBound != null) {
            Object convertedUpperBound = converterFactory.convert(upperBound, datatype, valueFactory);
            if (convertedUpperBound == null) {
                log.warn("Unable to convert upper bound {} to data type {}", upperBound, datatype);
                convertingError = new StringBuilder();
                convertingError.append("Unable to convert the parsed upper bound value ").append(upperBound).append(" to data type ").append(datatype);
            } else {
                // set the converted upper bound
                upperBound = convertedUpperBound;
        if (lowerBound != null) {
            Object convertedLowerBound = converterFactory.convert(lowerBound, datatype, valueFactory);
            if (convertedLowerBound == null) {
                log.warn("Unable to convert lower bound {} to data type {}", lowerBound, datatype);
                if (convertingError == null) {
                    convertingError = new StringBuilder();
                } else {
                convertingError.append("Unable to convert the parsed value ").append(lowerBound).append(" to data type ").append(datatype);
            } else {
                // set the converted lower bound
                lowerBound = convertedLowerBound;
        if (convertingError != null) {
            // if there was an error throw an exception
            convertingError.append("Parsed Constraint: \n");
            throw new IllegalArgumentException(convertingError.toString());
    if (upperBound == null && lowerBound == null) {
        log.warn("Range Constraint does not define an 'upperBound' nor an 'lowerBound'! " + "At least one of the two MUST BE parsed for a valid RangeConstraint.");
        StringBuilder message = new StringBuilder();
        message.append("Range Constraint does not define an 'upperBound' nor an 'lowerBound'!");
        message.append(" At least one of the two MUST BE parsed for a valid RangeConstraint.\n");
        message.append("Parsed Constraint: \n");
        throw new IllegalArgumentException(message.toString());
    } else {
        constraint = new RangeConstraint(lowerBound, upperBound, inclusive);
    return constraint;
Also used : RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint) JSONObject(org.codehaus.jettison.json.JSONObject)

Example 4 with Constraint

use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.

the class FieldMappingUtils method parseFieldMapping.

 * Parses fieldMappings from a String formated like follows
 * <code><pre>
 *    fieldPattern &gt; mapping_1 mapping_2 ... mapping_n
 * </pre></code>
 * Parsing is done like follows:
 * <ul>
 * <li> The elements of the parsed string are split by spaces. Leading and
 *      tailing spaces are ignored.
 * <li> the <code>fieldPattern</code> supports {@link PatternType#wildcard}.
 *      '*' and '?' within this part are interpreted accordingly
 * <li> Each mapping can have an optional Filter. The filter section starts with
 *      <code>" | "</code> and ends with the next space.<br>
 *      Currently two types of Filters are supported.<br>
 *      <b>Language Filter:</b> Syntax:<code>@=&lt;lang-1&gt;,&lt;lang-2&gt;,
 *      ... ,&lt;lang-n&gt;</code>. <br>The default language can be activated by
 *      using an empty String (e.g. <code> "@=en,,de"</code>) or null
 *      (e.g.<code>"@=en,null,de</code>).<br>
 *      <b>Data Type Filter:</b> Syntax:<code>d=&lt;type-1&gt;,&lt;type-2&gt;,
 *      ... ,&lt;type-n&gt;</code>. Types can be specified by the full URI
 *      however the preferred way is to use the prefix and the local name
 *      ( allow all kind of floating point values one could use a
 *      filter like <code>"d=xsd:decimal,xsd:float,xsd:double"</code>).
 * <li> If the field should be mapped to one or more other fields, than the
 *      second element of the field MUST BE equals to <code>'&gt'</code>
 * <li> If the second element equals to '&gt', than all further Elements are
 *      interpreted as mapping target by field names that match the
 *      FieldPattern define in the first element.
 * </ul>
 * Examples:
 * <ul>
 * <li> To copy all fields define the Mapping<br>
 *      <code><pre>*</pre></code>
 * <li> This pattern copy all fields of the foaf namespace<br>
 *      <code><pre>*</pre></code>
 * <li> The following Pattern uses the values of the foaf:name field as
 *      entityhub symbol label<br>
 *      <code><pre> &gt;</pre></code>
 * </ul>
 * Notes:
 * <ul>
 * <li> The combination of patterns for the source field and the definition of
 *      mappings is possible, but typically results in situations where all
 *      field names matched by the defined pattern are copied as values of the
 *      mapped field.
 * </ul>
 * TODO: Add Support for {@link Constraint}s on the field values.
 * @param mapping The mapping
 * @param nps Optionally a namespace prefix service used to convert
 * '{prefix}:{localname}' configurations to full URIs
 * @return the parsed {@link FieldMapping} or <code>null</code> if the parsed
 *    String can not be parsed.
public static FieldMapping parseFieldMapping(String mapping, NamespacePrefixService nps) {
    if (mapping == null) {
        return null;
    if (mapping.isEmpty()) {
        return null;
    if (mapping.charAt(0) == '#') {
        // commend
        return null;
    final boolean ignore = mapping.charAt(0) == '!';
    if (ignore) {
        mapping = mapping.substring(1);
    // needed by the split(" ") used to get the parts.
    if (mapping.charAt(0) == '|') {
        // thats because the Apache Felix Webconsole likes to call trim and
        // users do like to ignore (the silly) required of leading spaces ...
        mapping = ' ' + mapping;
    // TODO: maybe we should not use the spaces here
    String[] parts = mapping.split(" ");
    List<String> mappedTo = Collections.emptyList();
    String fieldPattern;
    if (!parts[0].isEmpty() && !parts[0].equals("*")) {
        try {
            fieldPattern = NamespaceMappingUtils.getConfiguredUri(nps, parts[0]);
        } catch (IllegalArgumentException e) {
            log.warn("Unable to parse fieldMapping because of unknown namespace prefix", e);
            return null;
    } else {
        fieldPattern = parts[0];
    Constraint filter = null;
    for (int i = 1; i < parts.length; i++) {
        if ("|".equals(parts[i]) && parts.length > i + 1) {
            filter = parseConstraint(parts[i + 1]);
        if (">".equals(parts[i]) && parts.length > i + 1) {
            mappedTo = parseMappings(parts, i + 1, nps);
    if (ignore && filter != null) {
        log.warn("Filters are not supported for '!<fieldPatter>' type field mappings! Filter {} ignored", filter);
        filter = null;
    try {
        return new FieldMapping(fieldPattern, filter, mappedTo.toArray(new String[mappedTo.size()]));
    } catch (RuntimeException e) {
        log.warn(String.format("Unable to parse FieldMapping from Line '%s'", mapping), e);
        return null;
Also used : Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) FieldMapping(org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)

Example 5 with Constraint

use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.

the class NamedEntityTaggingEngine method computeEntityRecommentations.

 * Computes the Enhancements
 * @param site
 *            The {@link SiteException} id or <code>null</code> to use the {@link Entityhub}
 * @param literalFactory
 *            the {@link LiteralFactory} used to create RDF Literals
 * @param contentItemId
 *            the id of the contentItem
 * @param textAnnotation
 *            the text annotation to enhance
 * @param subsumedAnnotations
 *            other text annotations for the same entity
 * @param language
 *            the language of the analysed text or <code>null</code> if not available.
 * @return the suggestions for the parsed {@link NamedEntity}
 * @throws EntityhubException
 *             On any Error while looking up Entities via the Entityhub
protected final List<Suggestion> computeEntityRecommentations(Site site, NamedEntity namedEntity, List<IRI> subsumedAnnotations, String language) throws EntityhubException {
    // First get the required properties for the parsed textAnnotation
    // ... and check the values
    log.debug("Process {}", namedEntity);
    // if site is NULL use
    // the Entityhub
    FieldQueryFactory queryFactory = site == null ? entityhub.getQueryFactory() : site.getQueryFactory();
    log.trace("Will use a query-factory of type [{}].", queryFactory.getClass().toString());
    FieldQuery query = queryFactory.createFieldQuery();
    // replace spaces with plus to create an AND search for all words in the
    // name!
    Constraint labelConstraint;
    // TODO: make case sensitivity configurable
    boolean casesensitive = false;
    String namedEntityLabel = casesensitive ? namedEntity.getName() : namedEntity.getName().toLowerCase();
    if (language != null) {
        // search labels in the language and without language
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive, language, null);
    } else {
        labelConstraint = new TextConstraint(namedEntityLabel, casesensitive);
    query.setConstraint(nameField, labelConstraint);
    if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
        if (personState) {
            if (personType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(personType));
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
    } else if (DBPEDIA_ORGANISATION.equals(namedEntity.getType())) {
        if (orgState) {
            if (orgType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(orgType));
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
    } else if (OntologicalClasses.DBPEDIA_PLACE.equals(namedEntity.getType())) {
        if (this.placeState) {
            if (this.placeType != null) {
                query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(placeType));
        // else no type constraint
        } else {
            // ignore people
            return Collections.emptyList();
    query.setLimit(Math.max(20, this.numSuggestions * 3));
    log.trace("A query has been created of type [{}] and the following settings:\n{}", query.getClass().toString(), query.toString());
    if (null == site)
        log.trace("A query will be sent to the entity-hub of type [{}].", entityhub.getClass());
        log.trace("A query will be sent to a site [id :: {}][type :: {}].", site.getId(), site.getClass());
    QueryResultList<Entity> results = // if site is NULL
    site == null ? entityhub.findEntities(query) : // use the Entityhub
    site.findEntities(// else the referenced site
    log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
    if (results.isEmpty()) {
        // no results nothing to do
        return Collections.emptyList();
    // we need to normalise the confidence values from [0..1]
    // * levenshtein distance as absolute (1.0 for exact match)
    // * Solr scores * levenshtein to rank entities relative to each other
    Float maxScore = null;
    Float maxExactScore = null;
    List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
    // assumes entities are sorted by score
    for (Iterator<Entity> guesses = results.iterator(); guesses.hasNext(); ) {
        Suggestion match = new Suggestion(;
        Representation rep = match.getEntity().getRepresentation();
        Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
        if (maxScore == null) {
            maxScore = score;
        Iterator<Text> labels = rep.getText(nameField);
        while (labels.hasNext() && match.getLevenshtein() < 1.0) {
            Text label =;
            if (// if the content language is unknown ->
            language == null || // accept all labels
            label.getLanguage() == // accept labels with no
            null || // and labels in the same language as the content
            (language != null && label.getLanguage().startsWith(language))) {
                double actMatch = levenshtein(casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
                if (actMatch > match.getLevenshtein()) {
        if (match.getMatchedLabel() != null) {
            if (match.getLevenshtein() == 1.0) {
                if (maxExactScore == null) {
                    maxExactScore = score;
                // normalise exact matches against the best exact score
                match.setScore(score.doubleValue() / maxExactScore.doubleValue());
            } else {
                // normalise partial matches against the best match and the
                // Levenshtein similarity with the label
                match.setScore(score.doubleValue() * match.getLevenshtein() / maxScore.doubleValue());
        } else {
            log.debug("No value of {} for Entity {}!", nameField, match.getEntity().getId());
    // now sort the results
    return matches.subList(0, Math.min(matches.size(), numSuggestions));
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ArrayList(java.util.ArrayList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) FieldQueryFactory(org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)


Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)15 TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)15 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)12 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)11 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)11 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)10 JSONArray (org.codehaus.jettison.json.JSONArray)6 JSONObject (org.codehaus.jettison.json.JSONObject)5 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)4 ArrayList (java.util.ArrayList)2 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)2 FieldQueryFactory (org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory)2 MODE (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Entry (java.util.Map.Entry)1 WebApplicationException ( SolrQuery (org.apache.solr.client.solrj.SolrQuery)1 FieldQueryImpl (org.apache.stanbol.entityhub.core.query.FieldQueryImpl)1 LDPathFieldQueryImpl (org.apache.stanbol.entityhub.ldpath.query.LDPathFieldQueryImpl)1