use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.
the class SparqlQueryUtils method createFullTextQueryString.
/**
* (Creates AND Text) OR (Query AND String) like queries based on the parsed TextConstraint as used by
* {@link SparqlEndpointTypeEnum#LARQ LARQ} and {@link SparqlEndpointTypeEnum#Virtuoso VIRTUOSO} SPARQL endpoints to
* speed up full text queries.
*
* @param constraints
* the as returned by {@link TextConstraint#getTexts()}
* @return the full text query string
*/
protected static String createFullTextQueryString(Collection<String> constraints) {
StringBuilder textQuery = new StringBuilder();
boolean firstText = true;
for (String constraintText : constraints) {
if (constraintText != null && !constraintText.isEmpty()) {
if (firstText) {
firstText = false;
} else {
textQuery.append(" OR ");
}
// TODO: maybe we should use a word tokenizer here
String[] words = constraintText.split("\\W+");
if (words.length > 1) {
// not perfect because words might contain empty string, but
// it will eliminate most unnecessary brackets .
textQuery.append('(');
}
boolean firstAndWord = true;
for (String word : words) {
word = word.trim();
boolean hasAlphaNumeric = false;
for (int i = 0; i < word.length() && !hasAlphaNumeric; i++) {
char ch = word.charAt(i);
if (Character.isLetter(ch) || Character.isDigit(ch)) {
hasAlphaNumeric = true;
}
}
if (hasAlphaNumeric) {
if (firstAndWord) {
firstAndWord = false;
} else {
textQuery.append(" AND ");
}
textQuery.append('"');
textQuery.append(word);
//escapes are no longer needed with the "\W" regex tokenizer
//addGrammarEscapedValue(textQuery, word);
textQuery.append('"');
}
}
if (words.length > 1) {
textQuery.append(')');
}
}
// end if not null and not empty
}
return textQuery.toString();
}
use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.
the class SparqlQueryUtils method addFieldConstraint.
/**
* Adds the WHERE clause of the SPARQL query.
* <p>
* If the {@link SparqlEndpointTypeEnum SPARQL endpoint} supports SPARQL 1.1 subqueries, than this adds also the
* LIMIT and OFFSET to in inner SELECT that only selects the id.
*
* @param queryString
* the SPARQL query string to add the WHERE
* @param query
* the query
* @param selectedFields
* the selected fields
* @param endpointType
* The type of the endpoint (used to write optimised queries for endpoint type specific
* extensions
*/
private static void addFieldConstraint(final StringBuilder queryString, SparqlFieldQuery query, Map<String, String> selectedFields, SparqlEndpointTypeEnum endpointType) {
// we need temporary variables with unique names
String varPrefix = "tmp";
int[] varNum = new int[] { 1 };
// used to open brackets for the select part of the constraints
boolean first = true;
// determine if sub-selects are supported and if we need a sub-select
// (more than the id is selected)
boolean subSelectState = isSubSelectState(endpointType, selectedFields);
log.trace("add field constraints is in a sub-select-state [{}].", (subSelectState ? "yes" : "no"));
// if we uses a sub query to select the ids, we need to add the graph
// pattern
// of all selected fields outside of the sub query
Map<String, String> tmpSelectedFields = subSelectState ? new HashMap<String, String>(selectedFields) : null;
String intend;
if (subSelectState) {
// additional intend because of sub query (3*2)
intend = " ";
} else {
// normal intend (2*2)
intend = " ";
}
Iterator<Entry<String, Constraint>> constraintIterator = query.iterator();
while (constraintIterator.hasNext()) {
Entry<String, Constraint> fieldConstraint = constraintIterator.next();
if (first) {
queryString.append(" { \n");
if (subSelectState) {
String rootVarName = selectedFields.get(null);
queryString.append(" SELECT ?").append(rootVarName).append(" \n");
queryString.append(" WHERE { \n");
}
first = false;
}
String field = fieldConstraint.getKey();
Constraint constraint = fieldConstraint.getValue();
log.trace("adding a constraint [type :: {}][field :: {}][prefix :: {}][intent :: {}].", new Object[] { constraint.getType(), field, varPrefix, intend });
boolean added = true;
switch(constraint.getType()) {
case value:
addValueConstraint(queryString, field, (ValueConstraint) constraint, selectedFields, varPrefix, varNum, intend);
break;
case text:
String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
addTextConstraint(queryString, var, (TextConstraint) constraint, endpointType, intend);
break;
case range:
var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
addRangeConstriant(queryString, var, (RangeConstraint) constraint, intend);
break;
default:
log.warn("Constraint Type '{}' not supported in SPARQL! Constriant {} " + "will be not included in the query!", fieldConstraint.getValue().getType(), fieldConstraint.getValue());
added = false;
break;
}
if (added) {
queryString.append(" . \n");
}
}
// rank the graph selected by the query
if (subSelectState) {
addRankingConstraints(endpointType, queryString, selectedFields.get(null));
}
if (!first) {
if (subSelectState) {
queryString.append(" } \n");
// re-add all selected fields to be added as selects because in
// the sub-query we only select the ID!
selectedFields = tmpSelectedFields;
// ranking needs also to be added to the sub-query (to correctly
// process LIMIT and OFFSET
addRankingOrder(endpointType, queryString, selectedFields.get(null), " ");
// add LIMIT and OFFSET to the sub-query!
// TODO: add link to the email
queryString.append(" ");
addLimit(query.getLimit(), queryString);
queryString.append(" ");
addOffset(query, queryString);
queryString.append(" ");
}
queryString.append(" } \n");
}
// if no subqueries are used we need now to add the ranking constraints
if (!subSelectState) {
addRankingConstraints(endpointType, queryString, selectedFields.get(null));
}
// root variable
while (selectedFields.size() > 1) {
// if this is the only left element
// we are done
Iterator<String> it = selectedFields.keySet().iterator();
// we need to get a non null value from the map
String actField;
do {
// the outer while ensures an non null value so we need not to
// use hasNext
actField = it.next();
} while (actField == null);
queryString.append(" OPTIONAL { ");
// NOTE the following Method removes the written mapping from the
// Map
addFieldGraphPattern(queryString, actField, selectedFields, varPrefix, varNum, "");
queryString.append(". } \n");
}
}
use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.
the class SparqlQueryUtils method main.
public static void main(String[] args) {
SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
// query.setConstraint("urn:field1", new
// ReferenceConstraint("urn:testReference"));
// query.setConstraint("urn:field1", new ReferenceConstraint(
// Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3"),MODE.any));
// query.setConstraint(SpecialFieldEnum.references.getUri(), new
// ReferenceConstraint(
// Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3")));
// query.setConstraint("urn:field1a", new ValueConstraint(null,
// Arrays.asList(
// DataTypeEnum.Float.getUri())));
// query.addSelectedField("urn:field1a");
// query.setConstraint("urn:field1b", new ValueConstraint(9, Arrays.asList(
// DataTypeEnum.Float.getUri())));
// query.setConstraint("urn:field1b", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
// DataTypeEnum.Float.getUri()),MODE.any));
// query.setConstraint("urn:field1c", new ValueConstraint(null, Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.addSelectedField("urn:field1c");
// query.setConstraint("urn:field1d", new ValueConstraint(9, Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.setConstraint("urn:field1d", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
// DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
// query.setConstraint("urn:field2", new TextConstraint("test value"));
// query.setConstraint("urn:field3", new TextConstraint(Arrays.asList(
// "text value","anothertest","some more values"),true));
// query.setConstraint(SpecialFieldEnum.fullText.getUri(), new TextConstraint(Arrays.asList(
// "text value","anothertest","some more values"),true));
// query.setConstraint("urn:field2a", new TextConstraint(":-]"));
// //tests escaping of REGEX
query.setConstraint("urn:field3", new TextConstraint("\"quote", PatternType.none, true, "en", null));
//query.setConstraint("urn:field4", new TextConstraint("multi language text", "en", "de", null));
// query.setConstraint("urn:field5", new
// TextConstraint("wildcar*",PatternType.wildcard,false,"en","de"));
// query.addSelectedField("urn:field5");
// query.setConstraint("urn:field6", new TextConstraint("^regex",PatternType.REGEX,true));
// query.setConstraint("urn:field7", new
// TextConstraint("par*",PatternType.WildCard,false,"en","de",null));
// query.setConstraint("urn:field8", new TextConstraint(null,"en","de",null));
// query.setConstraint("urn:field9", new RangeConstraint((int)5, (int)10, true));
// query.setConstraint("urn:field10", new RangeConstraint((int)5, (int)10, false));
// query.setConstraint("urn:field11", new RangeConstraint(null, (int)10, true));
// query.setConstraint("urn:field12", new RangeConstraint((int)5, null, true));
//query.setConstraint("urn:field12", new RangeConstraint(new Date(), null, true));
query.setConstraint("urn:similarity", new SimilarityConstraint(Collections.singleton("This is a test"), DataTypeEnum.Text));
// query.addSelectedField("urn:field2a");
// query.addSelectedField("urn:field3");
query.setLimit(5);
query.setOffset(5);
System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.LARQ));
System.out.println();
System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Virtuoso));
System.out.println();
System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard));
System.out.println();
System.out.println(createSparqlConstructQuery(query, 0, SparqlEndpointTypeEnum.Virtuoso));
}
use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.
the class SparqlQueryUtilsTest method testMultiWordTextConstraints.
/**
* Tests word level matching for {@link TextConstraint}s (STANBOL-1277)
*/
@Test
public void testMultiWordTextConstraints() {
//queries for a TextConstraint with {text1} or {text2} in the languages
// {lang1} or {lang2} are expected to look like:
//
// select ?entity, ?label where {
// ?entity rdfs:label ?label
// FILTER((regex(str(?label),"\\b{text1}\\b","i") || regex(str(?label),"\\b{text2}\\b","i"))
// && ((lang(?label) = "{lang1}") || (lang(?label) = "{lang2}"))) .
// }
//first test a pattern type NONE
SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Global", "Toy"), PatternType.none, false, "en", null));
String queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlobal\\\\b\",\"i\") " + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
//also test for pattern type WILDCARD
query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Glo?al", "Toy"), PatternType.wildcard, false, "en", null));
queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlo.al\\\\b\",\"i\") " + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
}
use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.
the class YardTest method testFindTextWildcards.
@Test
public void testFindTextWildcards() {
//init the test data
FieldQueryTestData data = getFieldQueryTestData();
//prefix search with *
FieldQuery query = getYard().getQueryFactory().createFieldQuery();
String wildcard = data.textValue1.getText();
wildcard = wildcard.substring(0, wildcard.length() - 1) + "*";
query.setConstraint(data.textField, new TextConstraint(wildcard, PatternType.wildcard, false, "en"));
query.addSelectedField(data.refField);
query.addSelectedField(data.textField);
validateQueryResults(query, getYard().find(query), Arrays.asList(data.r1en.getId(), data.r2en.getId()), Arrays.asList(data.refField, data.textField));
//wildcard with ?
query = getYard().getQueryFactory().createFieldQuery();
//selects r1en and r2en
wildcard = data.textValue1.getText();
wildcard = wildcard.substring(0, wildcard.length() - 1) + "?";
query.setConstraint(data.textField, new TextConstraint(wildcard, PatternType.wildcard, false, "de"));
query.addSelectedField(data.refField);
query.addSelectedField(data.textField);
validateQueryResults(query, getYard().find(query), Arrays.asList(data.r1de.getId(), data.r2de.getId()), Arrays.asList(data.refField, data.textField));
}
Aggregations