Search in sources :

Example 11 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SparqlQueryUtils method createFullTextQueryString.

/**
     * (Creates AND Text) OR (Query AND String) like queries based on the parsed TextConstraint as used by
     * {@link SparqlEndpointTypeEnum#LARQ LARQ} and {@link SparqlEndpointTypeEnum#Virtuoso VIRTUOSO} SPARQL endpoints to
     * speed up full text queries.
     * 
     * @param constraints
     *            the as returned by {@link TextConstraint#getTexts()}
     * @return the full text query string
     */
protected static String createFullTextQueryString(Collection<String> constraints) {
    StringBuilder textQuery = new StringBuilder();
    boolean firstText = true;
    for (String constraintText : constraints) {
        if (constraintText != null && !constraintText.isEmpty()) {
            if (firstText) {
                firstText = false;
            } else {
                textQuery.append(" OR ");
            }
            // TODO: maybe we should use a word tokenizer here
            String[] words = constraintText.split("\\W+");
            if (words.length > 1) {
                // not perfect because words might contain empty string, but
                // it will eliminate most unnecessary brackets .
                textQuery.append('(');
            }
            boolean firstAndWord = true;
            for (String word : words) {
                word = word.trim();
                boolean hasAlphaNumeric = false;
                for (int i = 0; i < word.length() && !hasAlphaNumeric; i++) {
                    char ch = word.charAt(i);
                    if (Character.isLetter(ch) || Character.isDigit(ch)) {
                        hasAlphaNumeric = true;
                    }
                }
                if (hasAlphaNumeric) {
                    if (firstAndWord) {
                        firstAndWord = false;
                    } else {
                        textQuery.append(" AND ");
                    }
                    textQuery.append('"');
                    textQuery.append(word);
                    //escapes are no longer needed with the "\W" regex tokenizer
                    //addGrammarEscapedValue(textQuery, word);
                    textQuery.append('"');
                }
            }
            if (words.length > 1) {
                textQuery.append(')');
            }
        }
    // end if not null and not empty
    }
    return textQuery.toString();
}
Also used : ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)

Example 12 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SparqlQueryUtils method addFieldConstraint.

/**
     * Adds the WHERE clause of the SPARQL query.
     * <p>
     * If the {@link SparqlEndpointTypeEnum SPARQL endpoint} supports SPARQL 1.1 subqueries, than this adds also the
     * LIMIT and OFFSET to in inner SELECT that only selects the id.
     * 
     * @param queryString
     *            the SPARQL query string to add the WHERE
     * @param query
     *            the query
     * @param selectedFields
     *            the selected fields
     * @param endpointType
     *            The type of the endpoint (used to write optimised queries for endpoint type specific
     *            extensions
     */
private static void addFieldConstraint(final StringBuilder queryString, SparqlFieldQuery query, Map<String, String> selectedFields, SparqlEndpointTypeEnum endpointType) {
    // we need temporary variables with unique names
    String varPrefix = "tmp";
    int[] varNum = new int[] { 1 };
    // used to open brackets for the select part of the constraints
    boolean first = true;
    // determine if sub-selects are supported and if we need a sub-select
    // (more than the id is selected)
    boolean subSelectState = isSubSelectState(endpointType, selectedFields);
    log.trace("add field constraints is in a sub-select-state [{}].", (subSelectState ? "yes" : "no"));
    // if we uses a sub query to select the ids, we need to add the graph
    // pattern
    // of all selected fields outside of the sub query
    Map<String, String> tmpSelectedFields = subSelectState ? new HashMap<String, String>(selectedFields) : null;
    String intend;
    if (subSelectState) {
        // additional intend because of sub query (3*2)
        intend = "      ";
    } else {
        // normal intend (2*2)
        intend = "    ";
    }
    Iterator<Entry<String, Constraint>> constraintIterator = query.iterator();
    while (constraintIterator.hasNext()) {
        Entry<String, Constraint> fieldConstraint = constraintIterator.next();
        if (first) {
            queryString.append("  { \n");
            if (subSelectState) {
                String rootVarName = selectedFields.get(null);
                queryString.append("    SELECT ?").append(rootVarName).append(" \n");
                queryString.append("    WHERE { \n");
            }
            first = false;
        }
        String field = fieldConstraint.getKey();
        Constraint constraint = fieldConstraint.getValue();
        log.trace("adding a constraint [type :: {}][field :: {}][prefix :: {}][intent :: {}].", new Object[] { constraint.getType(), field, varPrefix, intend });
        boolean added = true;
        switch(constraint.getType()) {
            case value:
                addValueConstraint(queryString, field, (ValueConstraint) constraint, selectedFields, varPrefix, varNum, intend);
                break;
            case text:
                String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
                addTextConstraint(queryString, var, (TextConstraint) constraint, endpointType, intend);
                break;
            case range:
                var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
                addRangeConstriant(queryString, var, (RangeConstraint) constraint, intend);
                break;
            default:
                log.warn("Constraint Type '{}' not supported in SPARQL! Constriant {} " + "will be not included in the query!", fieldConstraint.getValue().getType(), fieldConstraint.getValue());
                added = false;
                break;
        }
        if (added) {
            queryString.append(" . \n");
        }
    }
    // rank the graph selected by the query
    if (subSelectState) {
        addRankingConstraints(endpointType, queryString, selectedFields.get(null));
    }
    if (!first) {
        if (subSelectState) {
            queryString.append("    } \n");
            // re-add all selected fields to be added as selects because in
            // the sub-query we only select the ID!
            selectedFields = tmpSelectedFields;
            // ranking needs also to be added to the sub-query (to correctly
            // process LIMIT and OFFSET
            addRankingOrder(endpointType, queryString, selectedFields.get(null), "    ");
            // add LIMIT and OFFSET to the sub-query!
            // TODO: add link to the email
            queryString.append("    ");
            addLimit(query.getLimit(), queryString);
            queryString.append("    ");
            addOffset(query, queryString);
            queryString.append("    ");
        }
        queryString.append("  } \n");
    }
    // if no subqueries are used we need now to add the ranking constraints
    if (!subSelectState) {
        addRankingConstraints(endpointType, queryString, selectedFields.get(null));
    }
    // root variable
    while (selectedFields.size() > 1) {
        // if this is the only left element
        // we are done
        Iterator<String> it = selectedFields.keySet().iterator();
        // we need to get a non null value from the map
        String actField;
        do {
            // the outer while ensures an non null value so we need not to
            // use hasNext
            actField = it.next();
        } while (actField == null);
        queryString.append("  OPTIONAL { ");
        // NOTE the following Method removes the written mapping from the
        // Map
        addFieldGraphPattern(queryString, actField, selectedFields, varPrefix, varNum, "");
        queryString.append(". } \n");
    }
}
Also used : Entry(java.util.Map.Entry) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)

Example 13 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SparqlQueryUtils method main.

public static void main(String[] args) {
    SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
    // query.setConstraint("urn:field1", new
    // ReferenceConstraint("urn:testReference"));
    // query.setConstraint("urn:field1", new ReferenceConstraint(
    // Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3"),MODE.any));
    // query.setConstraint(SpecialFieldEnum.references.getUri(), new
    // ReferenceConstraint(
    // Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3")));
    // query.setConstraint("urn:field1a", new ValueConstraint(null,
    // Arrays.asList(
    // DataTypeEnum.Float.getUri())));
    // query.addSelectedField("urn:field1a");
    // query.setConstraint("urn:field1b", new ValueConstraint(9, Arrays.asList(
    // DataTypeEnum.Float.getUri())));
    // query.setConstraint("urn:field1b", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
    // DataTypeEnum.Float.getUri()),MODE.any));
    // query.setConstraint("urn:field1c", new ValueConstraint(null, Arrays.asList(
    // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
    // query.addSelectedField("urn:field1c");
    // query.setConstraint("urn:field1d", new ValueConstraint(9, Arrays.asList(
    // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
    // query.setConstraint("urn:field1d", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
    // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
    // query.setConstraint("urn:field2", new TextConstraint("test value"));
    // query.setConstraint("urn:field3", new TextConstraint(Arrays.asList(
    // "text value","anothertest","some more values"),true));
    // query.setConstraint(SpecialFieldEnum.fullText.getUri(), new TextConstraint(Arrays.asList(
    // "text value","anothertest","some more values"),true));
    // query.setConstraint("urn:field2a", new TextConstraint(":-]"));
    // //tests escaping of REGEX
    query.setConstraint("urn:field3", new TextConstraint("\"quote", PatternType.none, true, "en", null));
    //query.setConstraint("urn:field4", new TextConstraint("multi language text", "en", "de", null));
    // query.setConstraint("urn:field5", new
    // TextConstraint("wildcar*",PatternType.wildcard,false,"en","de"));
    // query.addSelectedField("urn:field5");
    // query.setConstraint("urn:field6", new TextConstraint("^regex",PatternType.REGEX,true));
    // query.setConstraint("urn:field7", new
    // TextConstraint("par*",PatternType.WildCard,false,"en","de",null));
    // query.setConstraint("urn:field8", new TextConstraint(null,"en","de",null));
    // query.setConstraint("urn:field9", new RangeConstraint((int)5, (int)10, true));
    // query.setConstraint("urn:field10", new RangeConstraint((int)5, (int)10, false));
    // query.setConstraint("urn:field11", new RangeConstraint(null, (int)10, true));
    // query.setConstraint("urn:field12", new RangeConstraint((int)5, null, true));
    //query.setConstraint("urn:field12", new RangeConstraint(new Date(), null, true));
    query.setConstraint("urn:similarity", new SimilarityConstraint(Collections.singleton("This is a test"), DataTypeEnum.Text));
    // query.addSelectedField("urn:field2a");
    // query.addSelectedField("urn:field3");
    query.setLimit(5);
    query.setOffset(5);
    System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.LARQ));
    System.out.println();
    System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Virtuoso));
    System.out.println();
    System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard));
    System.out.println();
    System.out.println(createSparqlConstructQuery(query, 0, SparqlEndpointTypeEnum.Virtuoso));
}
Also used : SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 14 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SparqlQueryUtilsTest method testMultiWordTextConstraints.

/**
     * Tests word level matching for {@link TextConstraint}s (STANBOL-1277)
     */
@Test
public void testMultiWordTextConstraints() {
    //queries for a TextConstraint with {text1} or {text2} in the languages
    // {lang1} or {lang2} are expected to look like:
    //
    //    select ?entity, ?label where {
    //        ?entity rdfs:label ?label
    //        FILTER((regex(str(?label),"\\b{text1}\\b","i") || regex(str(?label),"\\b{text2}\\b","i")) 
    //            && ((lang(?label) = "{lang1}") || (lang(?label) = "{lang2}"))) . 
    //    }
    //first test a pattern type NONE
    SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
    query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Global", "Toy"), PatternType.none, false, "en", null));
    String queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
    Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlobal\\\\b\",\"i\") " + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
    //also test for pattern type WILDCARD
    query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
    query.setConstraint("urn:field4", new TextConstraint(Arrays.asList("Glo?al", "Toy"), PatternType.wildcard, false, "en", null));
    queryString = SparqlQueryUtils.createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard);
    Assert.assertTrue(queryString.contains("regex(str(?tmp1),\"\\\\bGlo.al\\\\b\",\"i\") " + "|| regex(str(?tmp1),\"\\\\bToy\\\\b\",\"i\")"));
}
Also used : SparqlFieldQuery(org.apache.stanbol.entityhub.query.sparql.SparqlFieldQuery) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) Test(org.junit.Test)

Example 15 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class YardTest method testFindTextWildcards.

@Test
public void testFindTextWildcards() {
    //init the test data
    FieldQueryTestData data = getFieldQueryTestData();
    //prefix search with *
    FieldQuery query = getYard().getQueryFactory().createFieldQuery();
    String wildcard = data.textValue1.getText();
    wildcard = wildcard.substring(0, wildcard.length() - 1) + "*";
    query.setConstraint(data.textField, new TextConstraint(wildcard, PatternType.wildcard, false, "en"));
    query.addSelectedField(data.refField);
    query.addSelectedField(data.textField);
    validateQueryResults(query, getYard().find(query), Arrays.asList(data.r1en.getId(), data.r2en.getId()), Arrays.asList(data.refField, data.textField));
    //wildcard with ?
    query = getYard().getQueryFactory().createFieldQuery();
    //selects r1en and r2en
    wildcard = data.textValue1.getText();
    wildcard = wildcard.substring(0, wildcard.length() - 1) + "?";
    query.setConstraint(data.textField, new TextConstraint(wildcard, PatternType.wildcard, false, "de"));
    query.addSelectedField(data.refField);
    query.addSelectedField(data.textField);
    validateQueryResults(query, getYard().find(query), Arrays.asList(data.r1de.getId(), data.r2de.getId()), Arrays.asList(data.refField, data.textField));
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) Test(org.junit.Test)

Aggregations

TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)31 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)17 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)10 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)8 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)8 Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)7 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)7 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)5 HashSet (java.util.HashSet)3 DataTypeEnum (org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum)3 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)3 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)3 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)3 FieldQueryFactory (org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory)3 ReconcileValue (org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue)2 SparqlFieldQuery (org.apache.stanbol.entityhub.query.sparql.SparqlFieldQuery)2 FieldMapping (org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping)2 Yard (org.apache.stanbol.entityhub.servicesapi.yard.Yard)2 YardTest (org.apache.stanbol.entityhub.test.yard.YardTest)2