use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.
the class SparqlQueryUtils method addFieldConstraint.
/**
* Adds the WHERE clause of the SPARQL query.
* <p>
* If the {@link SparqlEndpointTypeEnum SPARQL endpoint} supports SPARQL 1.1 subqueries, than this adds also the
* LIMIT and OFFSET to in inner SELECT that only selects the id.
*
* @param queryString
* the SPARQL query string to add the WHERE
* @param query
* the query
* @param selectedFields
* the selected fields
* @param endpointType
* The type of the endpoint (used to write optimised queries for endpoint type specific
* extensions
*/
private static void addFieldConstraint(final StringBuilder queryString, SparqlFieldQuery query, Map<String, String> selectedFields, SparqlEndpointTypeEnum endpointType) {
// we need temporary variables with unique names
String varPrefix = "tmp";
int[] varNum = new int[] { 1 };
// used to open brackets for the select part of the constraints
boolean first = true;
// determine if sub-selects are supported and if we need a sub-select
// (more than the id is selected)
boolean subSelectState = isSubSelectState(endpointType, selectedFields);
log.trace("add field constraints is in a sub-select-state [{}].", (subSelectState ? "yes" : "no"));
// if we uses a sub query to select the ids, we need to add the graph
// pattern
// of all selected fields outside of the sub query
Map<String, String> tmpSelectedFields = subSelectState ? new HashMap<String, String>(selectedFields) : null;
String intend;
if (subSelectState) {
// additional intend because of sub query (3*2)
intend = " ";
} else {
// normal intend (2*2)
intend = " ";
}
Iterator<Entry<String, Constraint>> constraintIterator = query.iterator();
while (constraintIterator.hasNext()) {
Entry<String, Constraint> fieldConstraint = constraintIterator.next();
if (first) {
queryString.append(" { \n");
if (subSelectState) {
String rootVarName = selectedFields.get(null);
queryString.append(" SELECT ?").append(rootVarName).append(" \n");
queryString.append(" WHERE { \n");
}
first = false;
}
String field = fieldConstraint.getKey();
Constraint constraint = fieldConstraint.getValue();
log.trace("adding a constraint [type :: {}][field :: {}][prefix :: {}][intent :: {}].", new Object[] { constraint.getType(), field, varPrefix, intend });
boolean added = true;
switch(constraint.getType()) {
case value:
addValueConstraint(queryString, field, (ValueConstraint) constraint, selectedFields, varPrefix, varNum, intend);
break;
case text:
String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
addTextConstraint(queryString, var, (TextConstraint) constraint, endpointType, intend);
break;
case range:
var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
addRangeConstriant(queryString, var, (RangeConstraint) constraint, intend);
break;
default:
log.warn("Constraint Type '{}' not supported in SPARQL! Constriant {} " + "will be not included in the query!", fieldConstraint.getValue().getType(), fieldConstraint.getValue());
added = false;
break;
}
if (added) {
queryString.append(" . \n");
}
}
// rank the graph selected by the query
if (subSelectState) {
addRankingConstraints(endpointType, queryString, selectedFields.get(null));
}
if (!first) {
if (subSelectState) {
queryString.append(" } \n");
// re-add all selected fields to be added as selects because in
// the sub-query we only select the ID!
selectedFields = tmpSelectedFields;
// ranking needs also to be added to the sub-query (to correctly
// process LIMIT and OFFSET
addRankingOrder(endpointType, queryString, selectedFields.get(null), " ");
// add LIMIT and OFFSET to the sub-query!
// TODO: add link to the email
queryString.append(" ");
addLimit(query.getLimit(), queryString);
queryString.append(" ");
addOffset(query, queryString);
queryString.append(" ");
}
queryString.append(" } \n");
}
// if no subqueries are used we need now to add the ranking constraints
if (!subSelectState) {
addRankingConstraints(endpointType, queryString, selectedFields.get(null));
}
// root variable
while (selectedFields.size() > 1) {
// if this is the only left element
// we are done
Iterator<String> it = selectedFields.keySet().iterator();
// we need to get a non null value from the map
String actField;
do {
// the outer while ensures an non null value so we need not to
// use hasNext
actField = it.next();
} while (actField == null);
queryString.append(" OPTIONAL { ");
// NOTE the following Method removes the written mapping from the
// Map
addFieldGraphPattern(queryString, actField, selectedFields, varPrefix, varNum, "");
queryString.append(". } \n");
}
}
use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.
the class FieldQueryReader method parseSimilarityConstraint.
private static Constraint parseSimilarityConstraint(JSONObject jConstraint, NamespacePrefixService nsPrefixService) throws JSONException {
String context = jConstraint.optString("context");
if (context == null) {
throw new IllegalArgumentException("SimilarityConstraints MUST define a \"context\": \n " + jConstraint.toString(4));
}
JSONArray addFields = jConstraint.optJSONArray("addFields");
final List<String> fields;
if (addFields != null && addFields.length() > 0) {
fields = new ArrayList<String>(addFields.length());
for (int i = 0; i < addFields.length(); i++) {
String field = addFields.optString(i);
field = field != null ? nsPrefixService.getFullName(field) : null;
if (field != null && !field.isEmpty()) {
fields.add(field);
}
}
} else {
fields = null;
}
return new SimilarityConstraint(context, fields);
}
use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.
the class FieldQueryReader method parseRangeConstraint.
/**
* @param jConstraint
* @return
* @throws JSONException
*/
private static Constraint parseRangeConstraint(JSONObject jConstraint, NamespacePrefixService nsPrefixService) throws JSONException {
Constraint constraint;
boolean inclusive;
if (jConstraint.has("inclusive")) {
inclusive = jConstraint.getBoolean("inclusive");
} else {
log.debug("RangeConstraint does not define the field 'inclusive'. Use false as default!");
inclusive = false;
}
Object upperBound = jConstraint.opt("upperBound");
Object lowerBound = jConstraint.opt("lowerBound");
Collection<String> datatypes = parseDatatypeProperty(jConstraint, nsPrefixService);
if (datatypes != null && !datatypes.isEmpty()) {
Iterator<String> it = datatypes.iterator();
String datatype = it.next();
if (datatypes.size() > 1) {
// write warning in case of multiple values
log.warn("Multiple datatypes are not supported by RangeConstriants!");
log.warn(" used: {}", datatype);
while (it.hasNext()) {
log.warn(" ignored: {}", it.next());
}
}
StringBuilder convertingError = null;
if (upperBound != null) {
Object convertedUpperBound = converterFactory.convert(upperBound, datatype, valueFactory);
if (convertedUpperBound == null) {
log.warn("Unable to convert upper bound {} to data type {}", upperBound, datatype);
convertingError = new StringBuilder();
convertingError.append("Unable to convert the parsed upper bound value ").append(upperBound).append(" to data type ").append(datatype);
} else {
// set the converted upper bound
upperBound = convertedUpperBound;
}
}
if (lowerBound != null) {
Object convertedLowerBound = converterFactory.convert(lowerBound, datatype, valueFactory);
if (convertedLowerBound == null) {
log.warn("Unable to convert lower bound {} to data type {}", lowerBound, datatype);
if (convertingError == null) {
convertingError = new StringBuilder();
} else {
convertingError.append('\n');
}
convertingError.append("Unable to convert the parsed value ").append(lowerBound).append(" to data type ").append(datatype);
} else {
// set the converted lower bound
lowerBound = convertedLowerBound;
}
}
if (convertingError != null) {
// if there was an error throw an exception
convertingError.append("Parsed Constraint: \n");
convertingError.append(jConstraint.toString(4));
throw new IllegalArgumentException(convertingError.toString());
}
}
if (upperBound == null && lowerBound == null) {
log.warn("Range Constraint does not define an 'upperBound' nor an 'lowerBound'! " + "At least one of the two MUST BE parsed for a valid RangeConstraint.");
StringBuilder message = new StringBuilder();
message.append("Range Constraint does not define an 'upperBound' nor an 'lowerBound'!");
message.append(" At least one of the two MUST BE parsed for a valid RangeConstraint.\n");
message.append("Parsed Constraint: \n");
message.append(jConstraint.toString(4));
throw new IllegalArgumentException(message.toString());
} else {
constraint = new RangeConstraint(lowerBound, upperBound, inclusive);
}
return constraint;
}
use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.
the class FieldMappingUtils method parseFieldMapping.
/**
* Parses fieldMappings from a String formated like follows
* <code><pre>
* fieldPattern > mapping_1 mapping_2 ... mapping_n
* </pre></code>
* Parsing is done like follows:
* <ul>
* <li> The elements of the parsed string are split by spaces. Leading and
* tailing spaces are ignored.
* <li> the <code>fieldPattern</code> supports {@link PatternType#wildcard}.
* '*' and '?' within this part are interpreted accordingly
* <li> Each mapping can have an optional Filter. The filter section starts with
* <code>" | "</code> and ends with the next space.<br>
* Currently two types of Filters are supported.<br>
* <b>Language Filter:</b> Syntax:<code>@=<lang-1>,<lang-2>,
* ... ,<lang-n></code>. <br>The default language can be activated by
* using an empty String (e.g. <code> "@=en,,de"</code>) or null
* (e.g.<code>"@=en,null,de</code>).<br>
* <b>Data Type Filter:</b> Syntax:<code>d=<type-1>,<type-2>,
* ... ,<type-n></code>. Types can be specified by the full URI
* however the preferred way is to use the prefix and the local name
* (e.g.to allow all kind of floating point values one could use a
* filter like <code>"d=xsd:decimal,xsd:float,xsd:double"</code>).
* <li> If the field should be mapped to one or more other fields, than the
* second element of the field MUST BE equals to <code>'>'</code>
* <li> If the second element equals to '>', than all further Elements are
* interpreted as mapping target by field names that match the
* FieldPattern define in the first element.
* </ul>
* Examples:
* <ul>
* <li> To copy all fields define the Mapping<br>
* <code><pre>*</pre></code>
* <li> This pattern copy all fields of the foaf namespace<br>
* <code><pre>http://xmlns.com/foaf/0.1/*</pre></code>
* <li> The following Pattern uses the values of the foaf:name field as
* entityhub symbol label<br>
* <code><pre>http://xmlns.com/foaf/0.1/name > http://www.iks-project.eu/ontology/entityhub/model/label</pre></code>
* </ul>
* Notes:
* <ul>
* <li> The combination of patterns for the source field and the definition of
* mappings is possible, but typically results in situations where all
* field names matched by the defined pattern are copied as values of the
* mapped field.
* </ul>
* TODO: Add Support for {@link Constraint}s on the field values.
* @param mapping The mapping
* @param nps Optionally a namespace prefix service used to convert
* '{prefix}:{localname}' configurations to full URIs
* @return the parsed {@link FieldMapping} or <code>null</code> if the parsed
* String can not be parsed.
*/
public static FieldMapping parseFieldMapping(String mapping, NamespacePrefixService nps) {
if (mapping == null) {
return null;
}
if (mapping.isEmpty()) {
return null;
}
if (mapping.charAt(0) == '#') {
// commend
return null;
}
final boolean ignore = mapping.charAt(0) == '!';
if (ignore) {
mapping = mapping.substring(1);
}
// needed by the split(" ") used to get the parts.
if (mapping.charAt(0) == '|') {
// thats because the Apache Felix Webconsole likes to call trim and
// users do like to ignore (the silly) required of leading spaces ...
mapping = ' ' + mapping;
}
// TODO: maybe we should not use the spaces here
String[] parts = mapping.split(" ");
List<String> mappedTo = Collections.emptyList();
String fieldPattern;
if (!parts[0].isEmpty() && !parts[0].equals("*")) {
try {
fieldPattern = NamespaceMappingUtils.getConfiguredUri(nps, parts[0]);
} catch (IllegalArgumentException e) {
log.warn("Unable to parse fieldMapping because of unknown namespace prefix", e);
return null;
}
} else {
fieldPattern = parts[0];
}
Constraint filter = null;
for (int i = 1; i < parts.length; i++) {
if ("|".equals(parts[i]) && parts.length > i + 1) {
filter = parseConstraint(parts[i + 1]);
}
if (">".equals(parts[i]) && parts.length > i + 1) {
mappedTo = parseMappings(parts, i + 1, nps);
}
}
if (ignore && filter != null) {
log.warn("Filters are not supported for '!<fieldPatter>' type field mappings! Filter {} ignored", filter);
filter = null;
}
try {
return new FieldMapping(fieldPattern, filter, mappedTo.toArray(new String[mappedTo.size()]));
} catch (RuntimeException e) {
log.warn(String.format("Unable to parse FieldMapping from Line '%s'", mapping), e);
return null;
}
}
use of org.apache.stanbol.entityhub.servicesapi.query.Constraint in project stanbol by apache.
the class NamedEntityTaggingEngine method computeEntityRecommentations.
/**
* Computes the Enhancements
*
* @param site
* The {@link SiteException} id or <code>null</code> to use the {@link Entityhub}
* @param literalFactory
* the {@link LiteralFactory} used to create RDF Literals
* @param contentItemId
* the id of the contentItem
* @param textAnnotation
* the text annotation to enhance
* @param subsumedAnnotations
* other text annotations for the same entity
* @param language
* the language of the analysed text or <code>null</code> if not available.
* @return the suggestions for the parsed {@link NamedEntity}
* @throws EntityhubException
* On any Error while looking up Entities via the Entityhub
*/
protected final List<Suggestion> computeEntityRecommentations(Site site, NamedEntity namedEntity, List<IRI> subsumedAnnotations, String language) throws EntityhubException {
// First get the required properties for the parsed textAnnotation
// ... and check the values
log.debug("Process {}", namedEntity);
// if site is NULL use
// the Entityhub
FieldQueryFactory queryFactory = site == null ? entityhub.getQueryFactory() : site.getQueryFactory();
log.trace("Will use a query-factory of type [{}].", queryFactory.getClass().toString());
FieldQuery query = queryFactory.createFieldQuery();
// replace spaces with plus to create an AND search for all words in the
// name!
Constraint labelConstraint;
// TODO: make case sensitivity configurable
boolean casesensitive = false;
String namedEntityLabel = casesensitive ? namedEntity.getName() : namedEntity.getName().toLowerCase();
if (language != null) {
// search labels in the language and without language
labelConstraint = new TextConstraint(namedEntityLabel, casesensitive, language, null);
} else {
labelConstraint = new TextConstraint(namedEntityLabel, casesensitive);
}
query.setConstraint(nameField, labelConstraint);
if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
if (personState) {
if (personType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(personType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
} else if (DBPEDIA_ORGANISATION.equals(namedEntity.getType())) {
if (orgState) {
if (orgType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(orgType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
} else if (OntologicalClasses.DBPEDIA_PLACE.equals(namedEntity.getType())) {
if (this.placeState) {
if (this.placeType != null) {
query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(placeType));
}
// else no type constraint
} else {
// ignore people
return Collections.emptyList();
}
}
query.setLimit(Math.max(20, this.numSuggestions * 3));
log.trace("A query has been created of type [{}] and the following settings:\n{}", query.getClass().toString(), query.toString());
if (null == site)
log.trace("A query will be sent to the entity-hub of type [{}].", entityhub.getClass());
else
log.trace("A query will be sent to a site [id :: {}][type :: {}].", site.getId(), site.getClass());
QueryResultList<Entity> results = // if site is NULL
site == null ? entityhub.findEntities(query) : // use the Entityhub
site.findEntities(// else the referenced site
query);
log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
if (results.isEmpty()) {
// no results nothing to do
return Collections.emptyList();
}
// we need to normalise the confidence values from [0..1]
// * levenshtein distance as absolute (1.0 for exact match)
// * Solr scores * levenshtein to rank entities relative to each other
Float maxScore = null;
Float maxExactScore = null;
List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
// assumes entities are sorted by score
for (Iterator<Entity> guesses = results.iterator(); guesses.hasNext(); ) {
Suggestion match = new Suggestion(guesses.next());
Representation rep = match.getEntity().getRepresentation();
Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
if (maxScore == null) {
maxScore = score;
}
Iterator<Text> labels = rep.getText(nameField);
while (labels.hasNext() && match.getLevenshtein() < 1.0) {
Text label = labels.next();
if (// if the content language is unknown ->
language == null || // accept all labels
label.getLanguage() == // accept labels with no
null || // and labels in the same language as the content
(language != null && label.getLanguage().startsWith(language))) {
double actMatch = levenshtein(casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
if (actMatch > match.getLevenshtein()) {
match.setLevenshtein(actMatch);
match.setMatchedLabel(label);
}
}
}
if (match.getMatchedLabel() != null) {
if (match.getLevenshtein() == 1.0) {
if (maxExactScore == null) {
maxExactScore = score;
}
// normalise exact matches against the best exact score
match.setScore(score.doubleValue() / maxExactScore.doubleValue());
} else {
// normalise partial matches against the best match and the
// Levenshtein similarity with the label
match.setScore(score.doubleValue() * match.getLevenshtein() / maxScore.doubleValue());
}
matches.add(match);
} else {
log.debug("No value of {} for Entity {}!", nameField, match.getEntity().getId());
}
}
// now sort the results
Collections.sort(matches);
return matches.subList(0, Math.min(matches.size(), numSuggestions));
}
Aggregations