Search in sources :

Example 1 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class EntitySearcherUtils method createFieldQuery.

/**
     * Validated the parsed parameter as parsed to 
     * {@link EntitySearcher#lookup(String, Set, List, String...)}
     * and creates a fieldQuery for the parsed parameter
     * @param field
     * @param includeFields
     * @param search
     * @param languages
     * @return
     */
public static final FieldQuery createFieldQuery(FieldQueryFactory factory, String field, Set<String> includeFields, List<String> search, String... languages) {
    if (field == null || field.isEmpty()) {
        throw new IllegalArgumentException("The parsed search field MUST NOT be NULL nor empty");
    }
    if (search == null || search.isEmpty()) {
        throw new IllegalArgumentException("The parsed list of search strings MUST NOT be NULL nor empty");
    }
    //build the query and than return the result
    FieldQuery query = factory.createFieldQuery();
    if (includeFields == null) {
        query.addSelectedField(field);
    } else {
        if (!includeFields.contains(field)) {
            query.addSelectedField(field);
        }
        for (String select : includeFields) {
            query.addSelectedField(select);
        }
    }
    //TODO make configurable
    query.setLimit(20);
    query.setConstraint(field, new TextConstraint(search, languages));
    return query;
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 2 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class AbstractBackend method listSubjects.

@Override
public Collection<Object> listSubjects(Object property, Object object) {
    FieldQuery query = createQuery();
    if (this.isURI(object)) {
        query.setConstraint(property.toString(), new ReferenceConstraint(object.toString()));
    } else if (object instanceof Text) {
        Text text = (Text) object;
        TextConstraint constraint;
        if (text.getLanguage() == null) {
            constraint = new TextConstraint(text.getText(), PatternType.none, true);
        } else {
            constraint = new TextConstraint(text.getText(), PatternType.none, true, text.getLanguage());
        }
        query.setConstraint(property.toString(), constraint);
    } else {
        Set<DataTypeEnum> dataTypes = DataTypeEnum.getPrimaryDataTypes(object.getClass());
        if (dataTypes == null || dataTypes.isEmpty()) {
            query.setConstraint(property.toString(), new ValueConstraint(object));
        } else {
            Collection<String> types = new ArrayList<String>(dataTypes.size());
            for (DataTypeEnum type : dataTypes) {
                types.add(type.getUri());
            }
            query.setConstraint(property.toString(), new ValueConstraint(object, types));
        }
    }
    query.setLimit(Integer.valueOf(DEFAULT_MAX_SELECT));
    QueryResultList<String> results;
    try {
        results = query(query);
    } catch (EntityhubException e) {
        throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
    }
    Collection<Object> references;
    if (results.isEmpty()) {
        references = Collections.emptySet();
    } else if (results.size() == 1) {
        //assuming that a single result is a likely case
        references = Collections.singleton((Object) getValueFactory().createReference(results.iterator().next()));
    } else {
        int offset = 0;
        references = new HashSet<Object>(results.size());
        for (String result : results) {
            references.add(getValueFactory().createReference(result));
        }
        while (results.size() >= DEFAULT_MAX_SELECT && references.size() <= DEFAULT_MAX_RESULTS - DEFAULT_MAX_SELECT) {
            offset = offset + results.size();
            query.setOffset(offset);
            try {
                results = query(query);
            } catch (EntityhubException e) {
                throw new IllegalStateException("Unable to query for resources with value '" + object + "' on property '" + property + "'!", e);
            }
            for (String result : results) {
                references.add(getValueFactory().createReference(result));
            }
        }
    }
    return references;
}
Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) DataTypeEnum(org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Example 3 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SparqlQueryUtils method createFullTextQueryString.

/**
     * (Creates AND Text) OR (Query AND String) like queries based on the parsed TextConstraint as used by
     * {@link SparqlEndpointTypeEnum#LARQ LARQ} and {@link SparqlEndpointTypeEnum#Virtuoso VIRTUOSO} SPARQL endpoints to
     * speed up full text queries.
     * 
     * @param constraints
     *            the as returned by {@link TextConstraint#getTexts()}
     * @return the full text query string
     */
protected static String createFullTextQueryString(Collection<String> constraints) {
    StringBuilder textQuery = new StringBuilder();
    boolean firstText = true;
    for (String constraintText : constraints) {
        if (constraintText != null && !constraintText.isEmpty()) {
            if (firstText) {
                firstText = false;
            } else {
                textQuery.append(" OR ");
            }
            // TODO: maybe we should use a word tokenizer here
            String[] words = constraintText.split("\\W+");
            if (words.length > 1) {
                // not perfect because words might contain empty string, but
                // it will eliminate most unnecessary brackets .
                textQuery.append('(');
            }
            boolean firstAndWord = true;
            for (String word : words) {
                word = word.trim();
                boolean hasAlphaNumeric = false;
                for (int i = 0; i < word.length() && !hasAlphaNumeric; i++) {
                    char ch = word.charAt(i);
                    if (Character.isLetter(ch) || Character.isDigit(ch)) {
                        hasAlphaNumeric = true;
                    }
                }
                if (hasAlphaNumeric) {
                    if (firstAndWord) {
                        firstAndWord = false;
                    } else {
                        textQuery.append(" AND ");
                    }
                    textQuery.append('"');
                    textQuery.append(word);
                    //escapes are no longer needed with the "\W" regex tokenizer
                    //addGrammarEscapedValue(textQuery, word);
                    textQuery.append('"');
                }
            }
            if (words.length > 1) {
                textQuery.append(')');
            }
        }
    // end if not null and not empty
    }
    return textQuery.toString();
}
Also used : ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)

Example 4 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SparqlQueryUtils method addFieldConstraint.

/**
     * Adds the WHERE clause of the SPARQL query.
     * <p>
     * If the {@link SparqlEndpointTypeEnum SPARQL endpoint} supports SPARQL 1.1 subqueries, than this adds also the
     * LIMIT and OFFSET to in inner SELECT that only selects the id.
     * 
     * @param queryString
     *            the SPARQL query string to add the WHERE
     * @param query
     *            the query
     * @param selectedFields
     *            the selected fields
     * @param endpointType
     *            The type of the endpoint (used to write optimised queries for endpoint type specific
     *            extensions
     */
private static void addFieldConstraint(final StringBuilder queryString, SparqlFieldQuery query, Map<String, String> selectedFields, SparqlEndpointTypeEnum endpointType) {
    // we need temporary variables with unique names
    String varPrefix = "tmp";
    int[] varNum = new int[] { 1 };
    // used to open brackets for the select part of the constraints
    boolean first = true;
    // determine if sub-selects are supported and if we need a sub-select
    // (more than the id is selected)
    boolean subSelectState = isSubSelectState(endpointType, selectedFields);
    log.trace("add field constraints is in a sub-select-state [{}].", (subSelectState ? "yes" : "no"));
    // if we uses a sub query to select the ids, we need to add the graph
    // pattern
    // of all selected fields outside of the sub query
    Map<String, String> tmpSelectedFields = subSelectState ? new HashMap<String, String>(selectedFields) : null;
    String intend;
    if (subSelectState) {
        // additional intend because of sub query (3*2)
        intend = "      ";
    } else {
        // normal intend (2*2)
        intend = "    ";
    }
    Iterator<Entry<String, Constraint>> constraintIterator = query.iterator();
    while (constraintIterator.hasNext()) {
        Entry<String, Constraint> fieldConstraint = constraintIterator.next();
        if (first) {
            queryString.append("  { \n");
            if (subSelectState) {
                String rootVarName = selectedFields.get(null);
                queryString.append("    SELECT ?").append(rootVarName).append(" \n");
                queryString.append("    WHERE { \n");
            }
            first = false;
        }
        String field = fieldConstraint.getKey();
        Constraint constraint = fieldConstraint.getValue();
        log.trace("adding a constraint [type :: {}][field :: {}][prefix :: {}][intent :: {}].", new Object[] { constraint.getType(), field, varPrefix, intend });
        boolean added = true;
        switch(constraint.getType()) {
            case value:
                addValueConstraint(queryString, field, (ValueConstraint) constraint, selectedFields, varPrefix, varNum, intend);
                break;
            case text:
                String var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
                addTextConstraint(queryString, var, (TextConstraint) constraint, endpointType, intend);
                break;
            case range:
                var = addFieldGraphPattern(queryString, field, selectedFields, varPrefix, varNum, intend);
                addRangeConstriant(queryString, var, (RangeConstraint) constraint, intend);
                break;
            default:
                log.warn("Constraint Type '{}' not supported in SPARQL! Constriant {} " + "will be not included in the query!", fieldConstraint.getValue().getType(), fieldConstraint.getValue());
                added = false;
                break;
        }
        if (added) {
            queryString.append(" . \n");
        }
    }
    // rank the graph selected by the query
    if (subSelectState) {
        addRankingConstraints(endpointType, queryString, selectedFields.get(null));
    }
    if (!first) {
        if (subSelectState) {
            queryString.append("    } \n");
            // re-add all selected fields to be added as selects because in
            // the sub-query we only select the ID!
            selectedFields = tmpSelectedFields;
            // ranking needs also to be added to the sub-query (to correctly
            // process LIMIT and OFFSET
            addRankingOrder(endpointType, queryString, selectedFields.get(null), "    ");
            // add LIMIT and OFFSET to the sub-query!
            // TODO: add link to the email
            queryString.append("    ");
            addLimit(query.getLimit(), queryString);
            queryString.append("    ");
            addOffset(query, queryString);
            queryString.append("    ");
        }
        queryString.append("  } \n");
    }
    // if no subqueries are used we need now to add the ranking constraints
    if (!subSelectState) {
        addRankingConstraints(endpointType, queryString, selectedFields.get(null));
    }
    // root variable
    while (selectedFields.size() > 1) {
        // if this is the only left element
        // we are done
        Iterator<String> it = selectedFields.keySet().iterator();
        // we need to get a non null value from the map
        String actField;
        do {
            // the outer while ensures an non null value so we need not to
            // use hasNext
            actField = it.next();
        } while (actField == null);
        queryString.append("  OPTIONAL { ");
        // NOTE the following Method removes the written mapping from the
        // Map
        addFieldGraphPattern(queryString, actField, selectedFields, varPrefix, varNum, "");
        queryString.append(". } \n");
    }
}
Also used : Entry(java.util.Map.Entry) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)

Example 5 with TextConstraint

use of org.apache.stanbol.entityhub.servicesapi.query.TextConstraint in project stanbol by apache.

the class SparqlQueryUtils method main.

public static void main(String[] args) {
    SparqlFieldQuery query = SparqlFieldQueryFactory.getInstance().createFieldQuery();
    // query.setConstraint("urn:field1", new
    // ReferenceConstraint("urn:testReference"));
    // query.setConstraint("urn:field1", new ReferenceConstraint(
    // Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3"),MODE.any));
    // query.setConstraint(SpecialFieldEnum.references.getUri(), new
    // ReferenceConstraint(
    // Arrays.asList("urn:testReference","urn:testReference1","urn:testReference3")));
    // query.setConstraint("urn:field1a", new ValueConstraint(null,
    // Arrays.asList(
    // DataTypeEnum.Float.getUri())));
    // query.addSelectedField("urn:field1a");
    // query.setConstraint("urn:field1b", new ValueConstraint(9, Arrays.asList(
    // DataTypeEnum.Float.getUri())));
    // query.setConstraint("urn:field1b", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
    // DataTypeEnum.Float.getUri()),MODE.any));
    // query.setConstraint("urn:field1c", new ValueConstraint(null, Arrays.asList(
    // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
    // query.addSelectedField("urn:field1c");
    // query.setConstraint("urn:field1d", new ValueConstraint(9, Arrays.asList(
    // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
    // query.setConstraint("urn:field1d", new ValueConstraint(Arrays.asList(9,10,11), Arrays.asList(
    // DataTypeEnum.Float.getUri(),DataTypeEnum.Double.getUri(),DataTypeEnum.Decimal.getUri())));
    // query.setConstraint("urn:field2", new TextConstraint("test value"));
    // query.setConstraint("urn:field3", new TextConstraint(Arrays.asList(
    // "text value","anothertest","some more values"),true));
    // query.setConstraint(SpecialFieldEnum.fullText.getUri(), new TextConstraint(Arrays.asList(
    // "text value","anothertest","some more values"),true));
    // query.setConstraint("urn:field2a", new TextConstraint(":-]"));
    // //tests escaping of REGEX
    query.setConstraint("urn:field3", new TextConstraint("\"quote", PatternType.none, true, "en", null));
    //query.setConstraint("urn:field4", new TextConstraint("multi language text", "en", "de", null));
    // query.setConstraint("urn:field5", new
    // TextConstraint("wildcar*",PatternType.wildcard,false,"en","de"));
    // query.addSelectedField("urn:field5");
    // query.setConstraint("urn:field6", new TextConstraint("^regex",PatternType.REGEX,true));
    // query.setConstraint("urn:field7", new
    // TextConstraint("par*",PatternType.WildCard,false,"en","de",null));
    // query.setConstraint("urn:field8", new TextConstraint(null,"en","de",null));
    // query.setConstraint("urn:field9", new RangeConstraint((int)5, (int)10, true));
    // query.setConstraint("urn:field10", new RangeConstraint((int)5, (int)10, false));
    // query.setConstraint("urn:field11", new RangeConstraint(null, (int)10, true));
    // query.setConstraint("urn:field12", new RangeConstraint((int)5, null, true));
    //query.setConstraint("urn:field12", new RangeConstraint(new Date(), null, true));
    query.setConstraint("urn:similarity", new SimilarityConstraint(Collections.singleton("This is a test"), DataTypeEnum.Text));
    // query.addSelectedField("urn:field2a");
    // query.addSelectedField("urn:field3");
    query.setLimit(5);
    query.setOffset(5);
    System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.LARQ));
    System.out.println();
    System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Virtuoso));
    System.out.println();
    System.out.println(createSparqlSelectQuery(query, true, 0, SparqlEndpointTypeEnum.Standard));
    System.out.println();
    System.out.println(createSparqlConstructQuery(query, 0, SparqlEndpointTypeEnum.Virtuoso));
}
Also used : SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)

Aggregations

TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)31 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)17 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)10 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)8 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)8 Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)7 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)7 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)5 HashSet (java.util.HashSet)3 DataTypeEnum (org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum)3 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)3 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)3 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)3 FieldQueryFactory (org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory)3 ReconcileValue (org.apache.stanbol.entityhub.jersey.grefine.ReconcileValue)2 SparqlFieldQuery (org.apache.stanbol.entityhub.query.sparql.SparqlFieldQuery)2 FieldMapping (org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping)2 Yard (org.apache.stanbol.entityhub.servicesapi.yard.Yard)2 YardTest (org.apache.stanbol.entityhub.test.yard.YardTest)2