Search in sources :

Example 1 with IndexDataTypeEnum

use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.

the class SolrFieldMapper method getFieldNames.

@Override
public List<String> getFieldNames(IndexField indexField) throws IllegalArgumentException {
    if (indexField == null) {
        throw new IllegalArgumentException("The parsed IndexField name MUST NOT be NULL!");
    }
    List<String> fieldNames = indexFieldMappings.get(indexField);
    if (fieldNames == null) {
        // check for special field;
        SpecialFieldEnum specialField = indexField.getSpecialField();
        if (specialField != null) {
            switch(specialField) {
                case fullText:
                    fieldNames = Collections.singletonList(getFullTextSearchField());
                    break;
                case references:
                    fieldNames = Collections.singletonList(getReferredDocumentField());
                    break;
                default:
                    throw new IllegalStateException("Unsupported Special Field '" + specialField.getUri() + "! Please report this to the " + "Stanbol Developer Mailing list or create an according" + "JIRA issue at https://issues.apache.org/jira/browse/STANBOL!");
            }
        } else {
            // typically only 1 or 2 values
            fieldNames = new ArrayList<String>(2);
            IndexDataTypeEnum dataTypeConfig = IndexDataTypeEnum.forIndexType(indexField.getDataType());
            if (dataTypeConfig == null) {
                throw new IllegalStateException(String.format("No Config found for the parsed IndexDataType %s", indexField.getDataType()));
            }
            // Three things need to be done
            // 1) Encode the Path
            String pathName = encodePathName(indexField);
            // 2) Encode the DataType
            fieldNames.addAll(encodeDataType(pathName, dataTypeConfig));
            // 3) Encode the Languages
            if (indexField.hasLanguage()) {
                fieldNames.addAll(encodeLanguages(pathName, indexField.getLanguages()));
            }
            // language texts)
            if (dataTypeConfig.isLanguageType()) {
                fieldNames.add(SolrConst.LANG_MERGER_FIELD + pathName);
            }
        }
        // cache the mappings
        indexFieldMappings.put(indexField, fieldNames);
    }
    return fieldNames;
}
Also used : SpecialFieldEnum(org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum) IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum)

Example 2 with IndexDataTypeEnum

use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.

the class SolrQueryFactory method parseFieldQuery.

/**
 * Converts the field query to a SolrQuery. In addition changes the parsed
 * FieldQuery (e.g. removing unsupported features, setting defaults for
 * missing parameters)
 * @param fieldQuery the field query (will be modified to reflect the query
 * as executed)
 * @param select the SELECT mode
 * @return the SolrQuery
 */
public SolrQuery parseFieldQuery(FieldQuery fieldQuery, SELECT select) {
    SolrQuery query = initSolrQuery(fieldQuery);
    setSelected(query, fieldQuery, select);
    StringBuilder queryString = new StringBuilder();
    Map<String, Constraint> processedFieldConstraints = new HashMap<String, Constraint>();
    boolean firstConstraint = true;
    boolean similarityConstraintPresent = false;
    for (Entry<String, Constraint> fieldConstraint : fieldQuery) {
        if (fieldConstraint.getValue().getType() == ConstraintType.similarity) {
            // TODO: log make the FieldQuery ensure that there is no more than one instead of similarity
            // constraint per query
            List<String> fields = new ArrayList<String>();
            fields.add(fieldConstraint.getKey());
            SimilarityConstraint simConstraint = (SimilarityConstraint) fieldConstraint.getValue();
            final IndexValue contextValue = indexValueFactory.createIndexValue(simConstraint.getContext());
            fields.addAll(simConstraint.getAdditionalFields());
            if (!similarityConstraintPresent) {
                // similarity constraint present
                similarityConstraintPresent = true;
                // add the constraint to the query
                query.setRequestHandler(MLT_QUERY_TYPE);
                query.set(MATCH_INCLUDE, false);
                query.set(MIN_DOC_FREQ, 1);
                query.set(MIN_TERM_FREQ, 1);
                query.set(INTERESTING_TERMS, "details");
                // testing
                query.set("mlt.boost", true);
                List<String> indexFields = new ArrayList<String>();
                for (String field : fields) {
                    // we need to get the actual fields in the index for the
                    // logical fields parsed with the constraint
                    IndexDataTypeEnum mapedIndexTypeEnum = IndexDataTypeEnum.forDataTyoe(simConstraint.getContextType());
                    IndexField indexField = new IndexField(Collections.singletonList(field), mapedIndexTypeEnum == null ? null : mapedIndexTypeEnum.getIndexType(), simConstraint.getLanguages());
                    indexFields.addAll(fieldMapper.getQueryFieldNames(indexField));
                }
                query.set(SIMILARITY_FIELDS, indexFields.toArray(new String[fields.size()]));
                query.set(STREAM_BODY, contextValue.getValue());
                processedFieldConstraints.put(fieldConstraint.getKey(), fieldConstraint.getValue());
            } else {
                // similarity constraint already present -> ignore further
                // NOTE: users are informed about that by NOT including further
                // similarity constraints in the query included in the
                // response
                log.warn("The parsed FieldQuery contains multiple Similarity constraints." + "However only a single one can be supported per query. Because of " + "this all further Similarity constraints will be ignored!");
                log.warn("Ignore SimilarityConstraint:");
                log.warn(" > Field      : {}", fieldConstraint.getKey());
                log.warn(" > Context    : {}", simConstraint.getContext());
                log.warn(" > Add Fields : {}", simConstraint.getAdditionalFields());
            }
        } else {
            IndexConstraint indexConstraint = createIndexConstraint(fieldConstraint);
            if (indexConstraint.isInvalid()) {
                log.warn("Unable to create IndexConstraint for Constraint {} (type: {}) and Field {} (Reosens: {})", new Object[] { fieldConstraint.getValue(), fieldConstraint.getValue().getType(), fieldConstraint.getKey(), indexConstraint.getInvalidMessages() });
            } else {
                if (firstConstraint) {
                    queryString.append('(');
                    firstConstraint = false;
                } else {
                    queryString.append(") AND (");
                }
                indexConstraint.encode(queryString);
                // set the constraint (may be changed because of some unsupported features)
                processedFieldConstraints.put(fieldConstraint.getKey(), // if null
                indexConstraint.getFieldQueryConstraint() == null ? // assume no change and add the parsed one
                fieldConstraint.getValue() : // add the changed version
                indexConstraint.getFieldQueryConstraint());
            }
        }
    }
    if (!firstConstraint) {
        queryString.append(')');
    }
    // set the constraints as processed to the parsed query
    fieldQuery.removeAllConstraints();
    for (Entry<String, Constraint> constraint : processedFieldConstraints.entrySet()) {
        fieldQuery.setConstraint(constraint.getKey(), constraint.getValue());
    }
    if (queryString.length() > 0) {
        String qs = queryString.toString();
        log.debug("QueryString: {}", qs);
        if (MLT_QUERY_TYPE.equals(query.getRequestHandler())) {
            query.set(CommonParams.FQ, qs);
        } else {
            query.setQuery(qs);
        }
    }
    log.debug("Solr Query: {}", query);
    return query;
}
Also used : SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) SimilarityConstraint(org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint) RangeConstraint(org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) TextConstraint(org.apache.stanbol.entityhub.servicesapi.query.TextConstraint) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) SolrQuery(org.apache.solr.client.solrj.SolrQuery) IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 3 with IndexDataTypeEnum

use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.

the class SolrQueryFactory method initValueConstraint.

/**
 * @param indexConstraint
 * @param refConstraint
 */
private void initValueConstraint(IndexConstraint indexConstraint) {
    ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
    if (valueConstraint.getValues() == null) {
        indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
    } else {
        // first process the parsed dataTypes to get the supported types
        List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
        List<String> acceptedDataTypes = new ArrayList<String>();
        if (valueConstraint.getDataTypes() != null) {
            for (String dataType : valueConstraint.getDataTypes()) {
                IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
                if (indexDataTypeEnumEntry != null) {
                    indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
                    acceptedDataTypes.add(dataType);
                } else {
                    // TODO: Add possibility to add warnings to indexConstraints
                    log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
                }
            }
        }
        // we support only a single dataType ...
        // ... therefore remove additional data types from the ValueConstraint
        IndexDataType indexDataType = null;
        if (!indexDataTypes.isEmpty()) {
            indexDataType = indexDataTypes.get(0);
            if (indexDataTypes.size() > 1) {
                log.warn("Only a single DataType is supported for ValueConstraints!");
                while (acceptedDataTypes.size() > 1) {
                    String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
                    log.warn("  > ignore parsed dataType {}", ignored);
                }
            }
        }
        // else empty we will initialise based on the first parsed value!
        ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
        // init the boost
        addBoost(constraintValue, valueConstraint);
        for (Object value : valueConstraint.getValues()) {
            IndexValue indexValue;
            if (indexDataType == null) {
                // get the dataType based on the type of the value
                try {
                    indexValue = indexValueFactory.createIndexValue(value);
                } catch (NoConverterException e) {
                    // if not found use the toString() and string as type
                    log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
                    indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
                }
                // initialise the IndexDataType for this query based on the first parsed value
                indexDataType = indexValue.getType();
            } else {
                indexValue = new IndexValue(value.toString(), indexDataType);
            }
            // add the constraint
            constraintValue.getValues().add(indexValue);
        }
        // indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
        IndexField indexField;
        if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
            // NOTE: in case of TEXT we need also to add the language to create a valid
            // query!
            // * We take the language of the first parsed element
            indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
        } else {
            indexField = new IndexField(indexConstraint.getPath(), indexDataType);
        }
        // set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
        indexConstraint.setIndexFieldConstraints(indexField);
        // set the VALUE
        // TODO: We need to somehow pass the MODE so that the encoder knows how
        // to encode the values
        indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
        // update this constraint!
        if (valueConstraint instanceof ReferenceConstraint) {
            indexConstraint.setFieldQueryConstraint(valueConstraint);
        } else {
            indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
        }
    }
}
Also used : IndexDataType(org.apache.stanbol.entityhub.yard.solr.model.IndexDataType) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) ArrayList(java.util.ArrayList) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) NoConverterException(org.apache.stanbol.entityhub.yard.solr.model.NoConverterException) IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 4 with IndexDataTypeEnum

use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.

the class SolrFieldMapper method parseIndexField.

/**
 * This method does the dirty work of parsing the different parts of the field in the SolrDocument to the
 * logical field as used by the semantic indexing API. This method assumes the following encoding
 * <code><pre>
 *   .        ... path separator
 *   _        ... special field indicator
 *   __       ... escaped special field
 *   !        ... merger - collected values of other fields.
 *                Such fields do not have an mapping to logical IndexFields.
 *                All mergers are created by copyField configurations within the
 *                Solr Schema configuration
 *   @        ... '@' indicates a field in a given language
 *     _@.&lt;field&gt;: A value for a field with no language defined
 *     _@en.&lt;field&gt;: A value for a field in English
 *     _!@.&lt;field&gt;: Contains all labels regardless of language
 *     _!@en.&lt;field&gt;: Contains all labels of languages that start with "en"
 *   &lt;prefix&gt; ... indicates an dataType that used this prefix
 *     _str.&lt;field&gt;: A string field (containing no language)
 *     _ref.&lt;field&gt;: A reference (similar to xsd:anyURI)
 *     _bool.&lt;field&gt;: A boolean value
 *
 * NOTE: Prefixes/Suffixes can be used to define a hierarchy of data types
 * e.g. use Prefixes for dataTypes:
 *   _n   ... any kind of numeric value
 *   _ni  ... any kind of integer value (BigInteger)
 *   _nib ... a byte
 *   _nii ... a integer
 *   _nil ... a long
 *   _nd  ... a decimal value
 *   _ndf ... float
 *   _ndd ... double
 *   _s   ... any kind of string value
 *   _si  ... an string based ID
 *   _sr  ... a reference
 * e.g. use Suffixes for semantic meanings
 *   ._ct ... a tag
 *   ._cr ... a category using a reference to an entity ID (xsd:anyURI)
 *   ._ci ... a categorisation using an local id (e.g 2 letter country codes)
 *
 *  one can now create Solr copyField commands to support searches spanning
 *  over multiple types
 *  _!n  ... search for any kind of numbers
 *  _!ni ... search for any kind of integers
 *  _!s  ... search in all kind of string values
 *  _!sc ... search for all categories of this document
 *
 * </pre><code>
 *
 * @param prefix
 * @param suffix
 * @param pathElements
 * @return
 */
private IndexField parseIndexField(final String prefix, final String suffix, final String[] pathElements) {
    final String language;
    boolean isLanguage = false;
    final String dataTypePrefix;
    // -> note that the prefix might also be used for the data type!
    if (prefix != null && !prefix.isEmpty()) {
        if (prefix.charAt(0) == SolrConst.LANG_INDICATOR) {
            isLanguage = true;
            // it is a language prefix!
            // set dataTypePrefix to null
            dataTypePrefix = null;
            if (prefix.length() > 1) {
                language = prefix.substring(1);
            } else {
                // it is a language prefix, but for the default language!
                language = null;
            }
        } else {
            // it is no language prefix
            language = null;
            isLanguage = false;
            dataTypePrefix = prefix;
        }
    } else {
        // no prefix at all
        // set no-language and the dataType prefix to null;
        isLanguage = false;
        language = null;
        dataTypePrefix = null;
    }
    // now parse the indexDataType!
    IndexDataTypeEnum dataTypeEnumEntry = IndexDataTypeEnum.forPrefixSuffix(dataTypePrefix, suffix);
    if (dataTypeEnumEntry == null) {
        log.warn(String.format("No IndexDataType registered for prefix: %s and suffix: %s -> unable to process path %s", dataTypePrefix, suffix, Arrays.toString(pathElements)));
        // we might also throw an exception at this point
        return null;
    }
    // parse the path
    List<String> path = new ArrayList<String>(pathElements.length);
    for (String pathElement : pathElements) {
        if (pathElement.charAt(0) == SolrConst.SPECIAL_FIELD_PREFIX) {
            if (pathElement.charAt(1) == SolrConst.SPECIAL_FIELD_PREFIX) {
                path.add(getFullFieldName(pathElement.substring(1)));
            } else {
                throw new IllegalStateException(String.format("Found special field \"%s\" within the path \"%s\" -> Special fields are only allowed as prefix and suffix!", pathElement, Arrays.toString(pathElements)));
            }
        } else {
            String fullName = getFullFieldName(pathElement);
            if (fullName == null) {
                throw new IllegalStateException(String.format("Unable to map PathElement %s to it's full Name (path=%s)!", pathElement, Arrays.toString(pathElements)));
            } else {
                path.add(fullName);
            }
        }
    }
    if (isLanguage) {
        return new IndexField(path, dataTypeEnumEntry.getIndexType(), language);
    } else {
        return new IndexField(path, dataTypeEnumEntry.getIndexType());
    }
}
Also used : IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) ArrayList(java.util.ArrayList) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 5 with IndexDataTypeEnum

use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.

the class SolrYard method createRepresentation.

/**
 * Creates the Representation for the parsed SolrDocument!
 *
 * @param doc
 *            The Solr Document to convert
 * @param fields
 *            if NOT NULL only this fields are added to the Representation
 * @return the Representation
 */
protected final Representation createRepresentation(SolrDocument doc, Set<String> fields) {
    if (fieldMapper == null) {
        throw new IllegalArgumentException("The parsed FieldMapper MUST NOT be NULL!");
    }
    if (doc == null) {
        throw new IllegalArgumentException("The parsed SolrDocument MUST NOT be NULL!");
    }
    Object id = doc.getFirstValue(fieldMapper.getDocumentIdField());
    if (id == null) {
        throw new IllegalStateException(String.format("The parsed Solr Document does not contain a value for the %s Field!", fieldMapper.getDocumentIdField()));
    }
    Representation rep = getValueFactory().createRepresentation(id.toString());
    for (String fieldName : doc.getFieldNames()) {
        IndexField indexField = fieldMapper.getField(fieldName);
        if (indexField != null && indexField.getPath().size() == 1) {
            String lang = indexField.getLanguages().isEmpty() ? null : indexField.getLanguages().iterator().next();
            if (fields == null || fields.contains(indexField.getPath().get(0))) {
                for (Object value : doc.getFieldValues(fieldName)) {
                    if (value != null) {
                        IndexDataTypeEnum dataTypeEnumEntry = IndexDataTypeEnum.forIndexType(indexField.getDataType());
                        if (dataTypeEnumEntry != null) {
                            Object javaValue = indexValueFactory.createValue(dataTypeEnumEntry.getJavaType(), indexField.getDataType(), value, lang);
                            if (javaValue != null) {
                                rep.add(indexField.getPath().iterator().next(), javaValue);
                            } else {
                                log.warn(String.format("java value=null for index value %s", value));
                            }
                        } else {
                            log.warn(String.format("No DataType Configuration found for Index Data Type %s!", indexField.getDataType()));
                        }
                    }
                // else index value == null -> ignore
                }
            // end for all values
            }
        } else {
            if (indexField != null) {
                log.warn(String.format("Unable to prozess Index Field %s (for IndexDocument Field: %s)", indexField, fieldName));
            }
        }
    }
    // end for all fields
    return rep;
}
Also used : IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Aggregations

IndexDataTypeEnum (org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum)5 IndexField (org.apache.stanbol.entityhub.yard.solr.model.IndexField)4 ArrayList (java.util.ArrayList)3 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)2 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)2 IndexValue (org.apache.stanbol.entityhub.yard.solr.model.IndexValue)2 HashMap (java.util.HashMap)1 SolrQuery (org.apache.solr.client.solrj.SolrQuery)1 SpecialFieldEnum (org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum)1 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)1 Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)1 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)1 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)1 TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)1 IndexDataType (org.apache.stanbol.entityhub.yard.solr.model.IndexDataType)1 NoConverterException (org.apache.stanbol.entityhub.yard.solr.model.NoConverterException)1