Search in sources :

Example 1 with IndexField

use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.

the class SolrQueryFactory method initTextConstraint.

/**
     * @param indexConstraint
     * @param textConstraint
     */
private void initTextConstraint(IndexConstraint indexConstraint) {
    TextConstraint textConstraint = (TextConstraint) indexConstraint.getConstraint();
    ConstraintValue constraintValue = new ConstraintValue();
    //init the boost
    addBoost(constraintValue, textConstraint);
    //init the Phrase Query based on the ProximityRanking state
    if (textConstraint.isProximityRanking() != null) {
        constraintValue.setProperty(QueryConst.PHRASE_QUERY_STATE, textConstraint.isProximityRanking());
    } else {
        //TODO: maybe make the default configureable for the SolrYard
        constraintValue.setProperty(QueryConst.PHRASE_QUERY_STATE, QueryConst.DEFAULT_PHRASE_QUERY_STATE);
    }
    for (String text : textConstraint.getTexts()) {
        constraintValue.getValues().add(indexValueFactory.createIndexValue(valueFactory.createText(text)));
    }
    //use a index field for DataType, Languages and the Field
    indexConstraint.setIndexFieldConstraints(new IndexField(indexConstraint.getPath(), IndexDataTypeEnum.TXT.getIndexType(), textConstraint.getLanguages()));
    //add the value for the constraint
    switch(textConstraint.getPatternType()) {
        case none:
            indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
            break;
        case wildcard:
            indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.WILDCARD, constraintValue);
            break;
        case regex:
            indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.REGEX, constraintValue);
            break;
        default:
            indexConstraint.setInvalid(String.format("PatterType %s not supported for Solr Index Queries!", textConstraint.getPatternType()));
    }
}
Also used : TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 2 with IndexField

use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.

the class SolrQueryFactory method parseFieldQuery.

/**
     * Converts the field query to a SolrQuery. In addition changes the parsed
     * FieldQuery (e.g. removing unsupported features, setting defaults for
     * missing parameters)
     * @param fieldQuery the field query (will be modified to reflect the query
     * as executed)
     * @param select the SELECT mode
     * @return the SolrQuery
     */
public SolrQuery parseFieldQuery(FieldQuery fieldQuery, SELECT select) {
    SolrQuery query = initSolrQuery(fieldQuery);
    setSelected(query, fieldQuery, select);
    StringBuilder queryString = new StringBuilder();
    Map<String, Constraint> processedFieldConstraints = new HashMap<String, Constraint>();
    boolean firstConstraint = true;
    boolean similarityConstraintPresent = false;
    for (Entry<String, Constraint> fieldConstraint : fieldQuery) {
        if (fieldConstraint.getValue().getType() == ConstraintType.similarity) {
            // TODO: log make the FieldQuery ensure that there is no more than one instead of similarity
            // constraint per query
            List<String> fields = new ArrayList<String>();
            fields.add(fieldConstraint.getKey());
            SimilarityConstraint simConstraint = (SimilarityConstraint) fieldConstraint.getValue();
            final IndexValue contextValue = indexValueFactory.createIndexValue(simConstraint.getContext());
            fields.addAll(simConstraint.getAdditionalFields());
            if (!similarityConstraintPresent) {
                //similarity constraint present
                similarityConstraintPresent = true;
                //add the constraint to the query
                query.setRequestHandler(MLT_QUERY_TYPE);
                query.set(MATCH_INCLUDE, false);
                query.set(MIN_DOC_FREQ, 1);
                query.set(MIN_TERM_FREQ, 1);
                query.set(INTERESTING_TERMS, "details");
                //testing
                query.set("mlt.boost", true);
                List<String> indexFields = new ArrayList<String>();
                for (String field : fields) {
                    //we need to get the actual fields in the index for the
                    //logical fields parsed with the constraint
                    IndexDataTypeEnum mapedIndexTypeEnum = IndexDataTypeEnum.forDataTyoe(simConstraint.getContextType());
                    IndexField indexField = new IndexField(Collections.singletonList(field), mapedIndexTypeEnum == null ? null : mapedIndexTypeEnum.getIndexType(), simConstraint.getLanguages());
                    indexFields.addAll(fieldMapper.getQueryFieldNames(indexField));
                }
                query.set(SIMILARITY_FIELDS, indexFields.toArray(new String[fields.size()]));
                query.set(STREAM_BODY, contextValue.getValue());
                processedFieldConstraints.put(fieldConstraint.getKey(), fieldConstraint.getValue());
            } else {
                //similarity constraint already present -> ignore further
                //NOTE: users are informed about that by NOT including further
                //      similarity constraints in the query included in the
                //      response
                log.warn("The parsed FieldQuery contains multiple Similarity constraints." + "However only a single one can be supported per query. Because of " + "this all further Similarity constraints will be ignored!");
                log.warn("Ignore SimilarityConstraint:");
                log.warn(" > Field      : {}", fieldConstraint.getKey());
                log.warn(" > Context    : {}", simConstraint.getContext());
                log.warn(" > Add Fields : {}", simConstraint.getAdditionalFields());
            }
        } else {
            IndexConstraint indexConstraint = createIndexConstraint(fieldConstraint);
            if (indexConstraint.isInvalid()) {
                log.warn("Unable to create IndexConstraint for Constraint {} (type: {}) and Field {} (Reosens: {})", new Object[] { fieldConstraint.getValue(), fieldConstraint.getValue().getType(), fieldConstraint.getKey(), indexConstraint.getInvalidMessages() });
            } else {
                if (firstConstraint) {
                    queryString.append('(');
                    firstConstraint = false;
                } else {
                    queryString.append(") AND (");
                }
                indexConstraint.encode(queryString);
                //set the constraint (may be changed because of some unsupported features)
                processedFieldConstraints.put(fieldConstraint.getKey(), //if null
                indexConstraint.getFieldQueryConstraint() == null ? //assume no change and add the parsed one
                fieldConstraint.getValue() : //add the changed version
                indexConstraint.getFieldQueryConstraint());
            }
        }
    }
    if (!firstConstraint) {
        queryString.append(')');
    }
    //set the constraints as processed to the parsed query
    fieldQuery.removeAllConstraints();
    for (Entry<String, Constraint> constraint : processedFieldConstraints.entrySet()) {
        fieldQuery.setConstraint(constraint.getKey(), constraint.getValue());
    }
    if (queryString.length() > 0) {
        String qs = queryString.toString();
        log.debug("QueryString: {}", qs);
        if (MLT_QUERY_TYPE.equals(query.getRequestHandler())) {
            query.set(CommonParams.FQ, qs);
        } else {
            query.setQuery(qs);
        }
    }
    log.debug("Solr Query: {}", query);
    return query;
}
Also used : SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) HashMap(java.util.HashMap) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) ArrayList(java.util.ArrayList) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) SolrQuery(org.apache.solr.client.solrj.SolrQuery) IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 3 with IndexField

use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.

the class SolrQueryFactory method initValueConstraint.

/**
     * @param indexConstraint
     * @param refConstraint
     */
private void initValueConstraint(IndexConstraint indexConstraint) {
    ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
    if (valueConstraint.getValues() == null) {
        indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
    } else {
        // first process the parsed dataTypes to get the supported types
        List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
        List<String> acceptedDataTypes = new ArrayList<String>();
        if (valueConstraint.getDataTypes() != null) {
            for (String dataType : valueConstraint.getDataTypes()) {
                IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
                if (indexDataTypeEnumEntry != null) {
                    indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
                    acceptedDataTypes.add(dataType);
                } else {
                    // TODO: Add possibility to add warnings to indexConstraints
                    log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
                }
            }
        }
        //we support only a single dataType ...
        //  ... therefore remove additional data types from the ValueConstraint
        IndexDataType indexDataType = null;
        if (!indexDataTypes.isEmpty()) {
            indexDataType = indexDataTypes.get(0);
            if (indexDataTypes.size() > 1) {
                log.warn("Only a single DataType is supported for ValueConstraints!");
                while (acceptedDataTypes.size() > 1) {
                    String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
                    log.warn("  > ignore parsed dataType {}", ignored);
                }
            }
        }
        //else empty we will initialise based on the first parsed value!
        ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
        //init the boost
        addBoost(constraintValue, valueConstraint);
        for (Object value : valueConstraint.getValues()) {
            IndexValue indexValue;
            if (indexDataType == null) {
                // get the dataType based on the type of the value
                try {
                    indexValue = indexValueFactory.createIndexValue(value);
                } catch (NoConverterException e) {
                    // if not found use the toString() and string as type
                    log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
                    indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
                }
                //initialise the IndexDataType for this query based on the first parsed value
                indexDataType = indexValue.getType();
            } else {
                indexValue = new IndexValue(value.toString(), indexDataType);
            }
            //add the constraint
            constraintValue.getValues().add(indexValue);
        }
        //indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
        IndexField indexField;
        if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
            //NOTE: in case of TEXT we need also to add the language to create a valid
            //query!
            // * We take the language of the first parsed element
            indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
        } else {
            indexField = new IndexField(indexConstraint.getPath(), indexDataType);
        }
        //set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
        indexConstraint.setIndexFieldConstraints(indexField);
        //set the VALUE
        //TODO: We need to somehow pass the MODE so that the encoder knows how
        //      to encode the values
        indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
        //update this constraint!
        if (valueConstraint instanceof ReferenceConstraint) {
            indexConstraint.setFieldQueryConstraint(valueConstraint);
        } else {
            indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
        }
    }
}
Also used : IndexDataType(org.apache.stanbol.entityhub.yard.solr.model.IndexDataType) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) ArrayList(java.util.ArrayList) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) NoConverterException(org.apache.stanbol.entityhub.yard.solr.model.NoConverterException) IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 4 with IndexField

use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.

the class SolrFieldMapper method parseIndexField.

/**
     * This method does the dirty work of parsing the different parts of the field in the SolrDocument to the
     * logical field as used by the semantic indexing API. This method assumes the following encoding
     * <code><pre>
     *   .        ... path separator
     *   _        ... special field indicator
     *   __       ... escaped special field
     *   !        ... merger - collected values of other fields.
     *                Such fields do not have an mapping to logical IndexFields.
     *                All mergers are created by copyField configurations within the
     *                Solr Schema configuration
     *   @        ... '@' indicates a field in a given language
     *     _@.&lt;field&gt;: A value for a field with no language defined
     *     _@en.&lt;field&gt;: A value for a field in English
     *     _!@.&lt;field&gt;: Contains all labels regardless of language
     *     _!@en.&lt;field&gt;: Contains all labels of languages that start with "en"
     *   &lt;prefix&gt; ... indicates an dataType that used this prefix
     *     _str.&lt;field&gt;: A string field (containing no language)
     *     _ref.&lt;field&gt;: A reference (similar to xsd:anyURI)
     *     _bool.&lt;field&gt;: A boolean value
     * 
     * NOTE: Prefixes/Suffixes can be used to define a hierarchy of data types
     * e.g. use Prefixes for dataTypes:
     *   _n   ... any kind of numeric value
     *   _ni  ... any kind of integer value (BigInteger)
     *   _nib ... a byte
     *   _nii ... a integer
     *   _nil ... a long
     *   _nd  ... a decimal value
     *   _ndf ... float
     *   _ndd ... double
     *   _s   ... any kind of string value
     *   _si  ... an string based ID
     *   _sr  ... a reference
     * e.g. use Suffixes for semantic meanings
     *   ._ct ... a tag
     *   ._cr ... a category using a reference to an entity ID (xsd:anyURI)
     *   ._ci ... a categorisation using an local id (e.g 2 letter country codes)
     * 
     *  one can now create Solr copyField commands to support searches spanning
     *  over multiple types
     *  _!n  ... search for any kind of numbers
     *  _!ni ... search for any kind of integers
     *  _!s  ... search in all kind of string values
     *  _!sc ... search for all categories of this document
     * 
     * </pre><code>
     * 
     * @param prefix
     * @param suffix
     * @param pathElements
     * @return
     */
private IndexField parseIndexField(final String prefix, final String suffix, final String[] pathElements) {
    final String language;
    boolean isLanguage = false;
    final String dataTypePrefix;
    // -> note that the prefix might also be used for the data type!
    if (prefix != null && !prefix.isEmpty()) {
        if (prefix.charAt(0) == SolrConst.LANG_INDICATOR) {
            isLanguage = true;
            // it is a language prefix!
            // set dataTypePrefix to null
            dataTypePrefix = null;
            if (prefix.length() > 1) {
                language = prefix.substring(1);
            } else {
                // it is a language prefix, but for the default language!
                language = null;
            }
        } else {
            // it is no language prefix
            language = null;
            isLanguage = false;
            dataTypePrefix = prefix;
        }
    } else {
        // no prefix at all
        // set no-language and the dataType prefix to null;
        isLanguage = false;
        language = null;
        dataTypePrefix = null;
    }
    // now parse the indexDataType!
    IndexDataTypeEnum dataTypeEnumEntry = IndexDataTypeEnum.forPrefixSuffix(dataTypePrefix, suffix);
    if (dataTypeEnumEntry == null) {
        log.warn(String.format("No IndexDataType registered for prefix: %s and suffix: %s -> unable to process path %s", dataTypePrefix, suffix, Arrays.toString(pathElements)));
        // we might also throw an exception at this point
        return null;
    }
    // parse the path
    List<String> path = new ArrayList<String>(pathElements.length);
    for (String pathElement : pathElements) {
        if (pathElement.charAt(0) == SolrConst.SPECIAL_FIELD_PREFIX) {
            if (pathElement.charAt(1) == SolrConst.SPECIAL_FIELD_PREFIX) {
                path.add(getFullFieldName(pathElement.substring(1)));
            } else {
                throw new IllegalStateException(String.format("Found special field \"%s\" within the path \"%s\" -> Special fields are only allowed as prefix and suffix!", pathElement, Arrays.toString(pathElements)));
            }
        } else {
            String fullName = getFullFieldName(pathElement);
            if (fullName == null) {
                throw new IllegalStateException(String.format("Unable to map PathElement %s to it's full Name (path=%s)!", pathElement, Arrays.toString(pathElements)));
            } else {
                path.add(fullName);
            }
        }
    }
    if (isLanguage) {
        return new IndexField(path, dataTypeEnumEntry.getIndexType(), language);
    } else {
        return new IndexField(path, dataTypeEnumEntry.getIndexType());
    }
}
Also used : IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) ArrayList(java.util.ArrayList) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 5 with IndexField

use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.

the class SolrFieldMapper method getField.

@Override
public IndexField getField(String fieldName) {
    if (fieldName == null || fieldName.isEmpty()) {
        throw new IllegalArgumentException("The parsed field name MUST NOT be NULL!");
    }
    IndexField field = fieldMappings.get(fieldName);
    if (field == null) {
        if (getDocumentIdField().equals(fieldName) || fieldName.charAt(0) == SolrConst.SPECIAL_FIELD_PREFIX) {
            // an logical IndexField and should be ignored
            return null;
        } else if (SolrConst.SCORE_FIELD.equals(fieldName)) {
            return scoreField;
        }
        // parse the prefix and suffix
        String[] tokens = fieldName.split(Character.toString(SolrConst.PATH_SEPERATOR));
        int numTokens = tokens.length;
        int pathElements = numTokens;
        String prefix = null;
        String suffix = null;
        if (tokens.length >= 2) {
            prefix = tokens[0];
            pathElements--;
        }
        if (tokens.length >= 3) {
            suffix = tokens[numTokens - 1].substring(1);
            pathElements--;
        }
        // parse the path
        String[] path = new String[pathElements];
        System.arraycopy(tokens, prefix == null ? 0 : 1, path, 0, pathElements);
        tokens = null;
        // process the parsed data
        field = parseIndexField(prefix, suffix, path);
        if (field != null) {
            fieldMappings.put(fieldName, field);
        }
    }
    return field;
}
Also used : IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Aggregations

IndexField (org.apache.stanbol.entityhub.yard.solr.model.IndexField)7 IndexDataTypeEnum (org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum)4 ArrayList (java.util.ArrayList)3 IndexValue (org.apache.stanbol.entityhub.yard.solr.model.IndexValue)3 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)2 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)2 TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)2 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)2 IndexDataType (org.apache.stanbol.entityhub.yard.solr.model.IndexDataType)2 HashMap (java.util.HashMap)1 SolrQuery (org.apache.solr.client.solrj.SolrQuery)1 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)1 Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)1 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)1 NoConverterException (org.apache.stanbol.entityhub.yard.solr.model.NoConverterException)1