use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.
the class SolrQueryFactory method initTextConstraint.
/**
* @param indexConstraint
* @param textConstraint
*/
private void initTextConstraint(IndexConstraint indexConstraint) {
TextConstraint textConstraint = (TextConstraint) indexConstraint.getConstraint();
ConstraintValue constraintValue = new ConstraintValue();
//init the boost
addBoost(constraintValue, textConstraint);
//init the Phrase Query based on the ProximityRanking state
if (textConstraint.isProximityRanking() != null) {
constraintValue.setProperty(QueryConst.PHRASE_QUERY_STATE, textConstraint.isProximityRanking());
} else {
//TODO: maybe make the default configureable for the SolrYard
constraintValue.setProperty(QueryConst.PHRASE_QUERY_STATE, QueryConst.DEFAULT_PHRASE_QUERY_STATE);
}
for (String text : textConstraint.getTexts()) {
constraintValue.getValues().add(indexValueFactory.createIndexValue(valueFactory.createText(text)));
}
//use a index field for DataType, Languages and the Field
indexConstraint.setIndexFieldConstraints(new IndexField(indexConstraint.getPath(), IndexDataTypeEnum.TXT.getIndexType(), textConstraint.getLanguages()));
//add the value for the constraint
switch(textConstraint.getPatternType()) {
case none:
indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
break;
case wildcard:
indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.WILDCARD, constraintValue);
break;
case regex:
indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.REGEX, constraintValue);
break;
default:
indexConstraint.setInvalid(String.format("PatterType %s not supported for Solr Index Queries!", textConstraint.getPatternType()));
}
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.
the class SolrQueryFactory method parseFieldQuery.
/**
* Converts the field query to a SolrQuery. In addition changes the parsed
* FieldQuery (e.g. removing unsupported features, setting defaults for
* missing parameters)
* @param fieldQuery the field query (will be modified to reflect the query
* as executed)
* @param select the SELECT mode
* @return the SolrQuery
*/
public SolrQuery parseFieldQuery(FieldQuery fieldQuery, SELECT select) {
SolrQuery query = initSolrQuery(fieldQuery);
setSelected(query, fieldQuery, select);
StringBuilder queryString = new StringBuilder();
Map<String, Constraint> processedFieldConstraints = new HashMap<String, Constraint>();
boolean firstConstraint = true;
boolean similarityConstraintPresent = false;
for (Entry<String, Constraint> fieldConstraint : fieldQuery) {
if (fieldConstraint.getValue().getType() == ConstraintType.similarity) {
// TODO: log make the FieldQuery ensure that there is no more than one instead of similarity
// constraint per query
List<String> fields = new ArrayList<String>();
fields.add(fieldConstraint.getKey());
SimilarityConstraint simConstraint = (SimilarityConstraint) fieldConstraint.getValue();
final IndexValue contextValue = indexValueFactory.createIndexValue(simConstraint.getContext());
fields.addAll(simConstraint.getAdditionalFields());
if (!similarityConstraintPresent) {
//similarity constraint present
similarityConstraintPresent = true;
//add the constraint to the query
query.setRequestHandler(MLT_QUERY_TYPE);
query.set(MATCH_INCLUDE, false);
query.set(MIN_DOC_FREQ, 1);
query.set(MIN_TERM_FREQ, 1);
query.set(INTERESTING_TERMS, "details");
//testing
query.set("mlt.boost", true);
List<String> indexFields = new ArrayList<String>();
for (String field : fields) {
//we need to get the actual fields in the index for the
//logical fields parsed with the constraint
IndexDataTypeEnum mapedIndexTypeEnum = IndexDataTypeEnum.forDataTyoe(simConstraint.getContextType());
IndexField indexField = new IndexField(Collections.singletonList(field), mapedIndexTypeEnum == null ? null : mapedIndexTypeEnum.getIndexType(), simConstraint.getLanguages());
indexFields.addAll(fieldMapper.getQueryFieldNames(indexField));
}
query.set(SIMILARITY_FIELDS, indexFields.toArray(new String[fields.size()]));
query.set(STREAM_BODY, contextValue.getValue());
processedFieldConstraints.put(fieldConstraint.getKey(), fieldConstraint.getValue());
} else {
//similarity constraint already present -> ignore further
//NOTE: users are informed about that by NOT including further
// similarity constraints in the query included in the
// response
log.warn("The parsed FieldQuery contains multiple Similarity constraints." + "However only a single one can be supported per query. Because of " + "this all further Similarity constraints will be ignored!");
log.warn("Ignore SimilarityConstraint:");
log.warn(" > Field : {}", fieldConstraint.getKey());
log.warn(" > Context : {}", simConstraint.getContext());
log.warn(" > Add Fields : {}", simConstraint.getAdditionalFields());
}
} else {
IndexConstraint indexConstraint = createIndexConstraint(fieldConstraint);
if (indexConstraint.isInvalid()) {
log.warn("Unable to create IndexConstraint for Constraint {} (type: {}) and Field {} (Reosens: {})", new Object[] { fieldConstraint.getValue(), fieldConstraint.getValue().getType(), fieldConstraint.getKey(), indexConstraint.getInvalidMessages() });
} else {
if (firstConstraint) {
queryString.append('(');
firstConstraint = false;
} else {
queryString.append(") AND (");
}
indexConstraint.encode(queryString);
//set the constraint (may be changed because of some unsupported features)
processedFieldConstraints.put(fieldConstraint.getKey(), //if null
indexConstraint.getFieldQueryConstraint() == null ? //assume no change and add the parsed one
fieldConstraint.getValue() : //add the changed version
indexConstraint.getFieldQueryConstraint());
}
}
}
if (!firstConstraint) {
queryString.append(')');
}
//set the constraints as processed to the parsed query
fieldQuery.removeAllConstraints();
for (Entry<String, Constraint> constraint : processedFieldConstraints.entrySet()) {
fieldQuery.setConstraint(constraint.getKey(), constraint.getValue());
}
if (queryString.length() > 0) {
String qs = queryString.toString();
log.debug("QueryString: {}", qs);
if (MLT_QUERY_TYPE.equals(query.getRequestHandler())) {
query.set(CommonParams.FQ, qs);
} else {
query.setQuery(qs);
}
}
log.debug("Solr Query: {}", query);
return query;
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.
the class SolrQueryFactory method initValueConstraint.
/**
* @param indexConstraint
* @param refConstraint
*/
private void initValueConstraint(IndexConstraint indexConstraint) {
ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
if (valueConstraint.getValues() == null) {
indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
} else {
// first process the parsed dataTypes to get the supported types
List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
List<String> acceptedDataTypes = new ArrayList<String>();
if (valueConstraint.getDataTypes() != null) {
for (String dataType : valueConstraint.getDataTypes()) {
IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
if (indexDataTypeEnumEntry != null) {
indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
acceptedDataTypes.add(dataType);
} else {
// TODO: Add possibility to add warnings to indexConstraints
log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
}
}
}
//we support only a single dataType ...
// ... therefore remove additional data types from the ValueConstraint
IndexDataType indexDataType = null;
if (!indexDataTypes.isEmpty()) {
indexDataType = indexDataTypes.get(0);
if (indexDataTypes.size() > 1) {
log.warn("Only a single DataType is supported for ValueConstraints!");
while (acceptedDataTypes.size() > 1) {
String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
log.warn(" > ignore parsed dataType {}", ignored);
}
}
}
//else empty we will initialise based on the first parsed value!
ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
//init the boost
addBoost(constraintValue, valueConstraint);
for (Object value : valueConstraint.getValues()) {
IndexValue indexValue;
if (indexDataType == null) {
// get the dataType based on the type of the value
try {
indexValue = indexValueFactory.createIndexValue(value);
} catch (NoConverterException e) {
// if not found use the toString() and string as type
log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
}
//initialise the IndexDataType for this query based on the first parsed value
indexDataType = indexValue.getType();
} else {
indexValue = new IndexValue(value.toString(), indexDataType);
}
//add the constraint
constraintValue.getValues().add(indexValue);
}
//indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
IndexField indexField;
if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
//NOTE: in case of TEXT we need also to add the language to create a valid
//query!
// * We take the language of the first parsed element
indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
} else {
indexField = new IndexField(indexConstraint.getPath(), indexDataType);
}
//set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
indexConstraint.setIndexFieldConstraints(indexField);
//set the VALUE
//TODO: We need to somehow pass the MODE so that the encoder knows how
// to encode the values
indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
//update this constraint!
if (valueConstraint instanceof ReferenceConstraint) {
indexConstraint.setFieldQueryConstraint(valueConstraint);
} else {
indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
}
}
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.
the class SolrFieldMapper method parseIndexField.
/**
* This method does the dirty work of parsing the different parts of the field in the SolrDocument to the
* logical field as used by the semantic indexing API. This method assumes the following encoding
* <code><pre>
* . ... path separator
* _ ... special field indicator
* __ ... escaped special field
* ! ... merger - collected values of other fields.
* Such fields do not have an mapping to logical IndexFields.
* All mergers are created by copyField configurations within the
* Solr Schema configuration
* @ ... '@' indicates a field in a given language
* _@.<field>: A value for a field with no language defined
* _@en.<field>: A value for a field in English
* _!@.<field>: Contains all labels regardless of language
* _!@en.<field>: Contains all labels of languages that start with "en"
* <prefix> ... indicates an dataType that used this prefix
* _str.<field>: A string field (containing no language)
* _ref.<field>: A reference (similar to xsd:anyURI)
* _bool.<field>: A boolean value
*
* NOTE: Prefixes/Suffixes can be used to define a hierarchy of data types
* e.g. use Prefixes for dataTypes:
* _n ... any kind of numeric value
* _ni ... any kind of integer value (BigInteger)
* _nib ... a byte
* _nii ... a integer
* _nil ... a long
* _nd ... a decimal value
* _ndf ... float
* _ndd ... double
* _s ... any kind of string value
* _si ... an string based ID
* _sr ... a reference
* e.g. use Suffixes for semantic meanings
* ._ct ... a tag
* ._cr ... a category using a reference to an entity ID (xsd:anyURI)
* ._ci ... a categorisation using an local id (e.g 2 letter country codes)
*
* one can now create Solr copyField commands to support searches spanning
* over multiple types
* _!n ... search for any kind of numbers
* _!ni ... search for any kind of integers
* _!s ... search in all kind of string values
* _!sc ... search for all categories of this document
*
* </pre><code>
*
* @param prefix
* @param suffix
* @param pathElements
* @return
*/
private IndexField parseIndexField(final String prefix, final String suffix, final String[] pathElements) {
final String language;
boolean isLanguage = false;
final String dataTypePrefix;
// -> note that the prefix might also be used for the data type!
if (prefix != null && !prefix.isEmpty()) {
if (prefix.charAt(0) == SolrConst.LANG_INDICATOR) {
isLanguage = true;
// it is a language prefix!
// set dataTypePrefix to null
dataTypePrefix = null;
if (prefix.length() > 1) {
language = prefix.substring(1);
} else {
// it is a language prefix, but for the default language!
language = null;
}
} else {
// it is no language prefix
language = null;
isLanguage = false;
dataTypePrefix = prefix;
}
} else {
// no prefix at all
// set no-language and the dataType prefix to null;
isLanguage = false;
language = null;
dataTypePrefix = null;
}
// now parse the indexDataType!
IndexDataTypeEnum dataTypeEnumEntry = IndexDataTypeEnum.forPrefixSuffix(dataTypePrefix, suffix);
if (dataTypeEnumEntry == null) {
log.warn(String.format("No IndexDataType registered for prefix: %s and suffix: %s -> unable to process path %s", dataTypePrefix, suffix, Arrays.toString(pathElements)));
// we might also throw an exception at this point
return null;
}
// parse the path
List<String> path = new ArrayList<String>(pathElements.length);
for (String pathElement : pathElements) {
if (pathElement.charAt(0) == SolrConst.SPECIAL_FIELD_PREFIX) {
if (pathElement.charAt(1) == SolrConst.SPECIAL_FIELD_PREFIX) {
path.add(getFullFieldName(pathElement.substring(1)));
} else {
throw new IllegalStateException(String.format("Found special field \"%s\" within the path \"%s\" -> Special fields are only allowed as prefix and suffix!", pathElement, Arrays.toString(pathElements)));
}
} else {
String fullName = getFullFieldName(pathElement);
if (fullName == null) {
throw new IllegalStateException(String.format("Unable to map PathElement %s to it's full Name (path=%s)!", pathElement, Arrays.toString(pathElements)));
} else {
path.add(fullName);
}
}
}
if (isLanguage) {
return new IndexField(path, dataTypeEnumEntry.getIndexType(), language);
} else {
return new IndexField(path, dataTypeEnumEntry.getIndexType());
}
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexField in project stanbol by apache.
the class SolrFieldMapper method getField.
@Override
public IndexField getField(String fieldName) {
if (fieldName == null || fieldName.isEmpty()) {
throw new IllegalArgumentException("The parsed field name MUST NOT be NULL!");
}
IndexField field = fieldMappings.get(fieldName);
if (field == null) {
if (getDocumentIdField().equals(fieldName) || fieldName.charAt(0) == SolrConst.SPECIAL_FIELD_PREFIX) {
// an logical IndexField and should be ignored
return null;
} else if (SolrConst.SCORE_FIELD.equals(fieldName)) {
return scoreField;
}
// parse the prefix and suffix
String[] tokens = fieldName.split(Character.toString(SolrConst.PATH_SEPERATOR));
int numTokens = tokens.length;
int pathElements = numTokens;
String prefix = null;
String suffix = null;
if (tokens.length >= 2) {
prefix = tokens[0];
pathElements--;
}
if (tokens.length >= 3) {
suffix = tokens[numTokens - 1].substring(1);
pathElements--;
}
// parse the path
String[] path = new String[pathElements];
System.arraycopy(tokens, prefix == null ? 0 : 1, path, 0, pathElements);
tokens = null;
// process the parsed data
field = parseIndexField(prefix, suffix, path);
if (field != null) {
fieldMappings.put(fieldName, field);
}
}
return field;
}
Aggregations