use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.
the class SolrFieldMapper method getFieldNames.
@Override
public List<String> getFieldNames(IndexField indexField) throws IllegalArgumentException {
if (indexField == null) {
throw new IllegalArgumentException("The parsed IndexField name MUST NOT be NULL!");
}
List<String> fieldNames = indexFieldMappings.get(indexField);
if (fieldNames == null) {
// check for special field;
SpecialFieldEnum specialField = indexField.getSpecialField();
if (specialField != null) {
switch(specialField) {
case fullText:
fieldNames = Collections.singletonList(getFullTextSearchField());
break;
case references:
fieldNames = Collections.singletonList(getReferredDocumentField());
break;
default:
throw new IllegalStateException("Unsupported Special Field '" + specialField.getUri() + "! Please report this to the " + "Stanbol Developer Mailing list or create an according" + "JIRA issue at https://issues.apache.org/jira/browse/STANBOL!");
}
} else {
// typically only 1 or 2 values
fieldNames = new ArrayList<String>(2);
IndexDataTypeEnum dataTypeConfig = IndexDataTypeEnum.forIndexType(indexField.getDataType());
if (dataTypeConfig == null) {
throw new IllegalStateException(String.format("No Config found for the parsed IndexDataType %s", indexField.getDataType()));
}
// Three things need to be done
// 1) Encode the Path
String pathName = encodePathName(indexField);
// 2) Encode the DataType
fieldNames.addAll(encodeDataType(pathName, dataTypeConfig));
// 3) Encode the Languages
if (indexField.hasLanguage()) {
fieldNames.addAll(encodeLanguages(pathName, indexField.getLanguages()));
}
// language texts)
if (dataTypeConfig.isLanguageType()) {
fieldNames.add(SolrConst.LANG_MERGER_FIELD + pathName);
}
}
// cache the mappings
indexFieldMappings.put(indexField, fieldNames);
}
return fieldNames;
}
use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.
the class SolrQueryFactory method parseFieldQuery.
/**
* Converts the field query to a SolrQuery. In addition changes the parsed
* FieldQuery (e.g. removing unsupported features, setting defaults for
* missing parameters)
* @param fieldQuery the field query (will be modified to reflect the query
* as executed)
* @param select the SELECT mode
* @return the SolrQuery
*/
public SolrQuery parseFieldQuery(FieldQuery fieldQuery, SELECT select) {
SolrQuery query = initSolrQuery(fieldQuery);
setSelected(query, fieldQuery, select);
StringBuilder queryString = new StringBuilder();
Map<String, Constraint> processedFieldConstraints = new HashMap<String, Constraint>();
boolean firstConstraint = true;
boolean similarityConstraintPresent = false;
for (Entry<String, Constraint> fieldConstraint : fieldQuery) {
if (fieldConstraint.getValue().getType() == ConstraintType.similarity) {
// TODO: log make the FieldQuery ensure that there is no more than one instead of similarity
// constraint per query
List<String> fields = new ArrayList<String>();
fields.add(fieldConstraint.getKey());
SimilarityConstraint simConstraint = (SimilarityConstraint) fieldConstraint.getValue();
final IndexValue contextValue = indexValueFactory.createIndexValue(simConstraint.getContext());
fields.addAll(simConstraint.getAdditionalFields());
if (!similarityConstraintPresent) {
// similarity constraint present
similarityConstraintPresent = true;
// add the constraint to the query
query.setRequestHandler(MLT_QUERY_TYPE);
query.set(MATCH_INCLUDE, false);
query.set(MIN_DOC_FREQ, 1);
query.set(MIN_TERM_FREQ, 1);
query.set(INTERESTING_TERMS, "details");
// testing
query.set("mlt.boost", true);
List<String> indexFields = new ArrayList<String>();
for (String field : fields) {
// we need to get the actual fields in the index for the
// logical fields parsed with the constraint
IndexDataTypeEnum mapedIndexTypeEnum = IndexDataTypeEnum.forDataTyoe(simConstraint.getContextType());
IndexField indexField = new IndexField(Collections.singletonList(field), mapedIndexTypeEnum == null ? null : mapedIndexTypeEnum.getIndexType(), simConstraint.getLanguages());
indexFields.addAll(fieldMapper.getQueryFieldNames(indexField));
}
query.set(SIMILARITY_FIELDS, indexFields.toArray(new String[fields.size()]));
query.set(STREAM_BODY, contextValue.getValue());
processedFieldConstraints.put(fieldConstraint.getKey(), fieldConstraint.getValue());
} else {
// similarity constraint already present -> ignore further
// NOTE: users are informed about that by NOT including further
// similarity constraints in the query included in the
// response
log.warn("The parsed FieldQuery contains multiple Similarity constraints." + "However only a single one can be supported per query. Because of " + "this all further Similarity constraints will be ignored!");
log.warn("Ignore SimilarityConstraint:");
log.warn(" > Field : {}", fieldConstraint.getKey());
log.warn(" > Context : {}", simConstraint.getContext());
log.warn(" > Add Fields : {}", simConstraint.getAdditionalFields());
}
} else {
IndexConstraint indexConstraint = createIndexConstraint(fieldConstraint);
if (indexConstraint.isInvalid()) {
log.warn("Unable to create IndexConstraint for Constraint {} (type: {}) and Field {} (Reosens: {})", new Object[] { fieldConstraint.getValue(), fieldConstraint.getValue().getType(), fieldConstraint.getKey(), indexConstraint.getInvalidMessages() });
} else {
if (firstConstraint) {
queryString.append('(');
firstConstraint = false;
} else {
queryString.append(") AND (");
}
indexConstraint.encode(queryString);
// set the constraint (may be changed because of some unsupported features)
processedFieldConstraints.put(fieldConstraint.getKey(), // if null
indexConstraint.getFieldQueryConstraint() == null ? // assume no change and add the parsed one
fieldConstraint.getValue() : // add the changed version
indexConstraint.getFieldQueryConstraint());
}
}
}
if (!firstConstraint) {
queryString.append(')');
}
// set the constraints as processed to the parsed query
fieldQuery.removeAllConstraints();
for (Entry<String, Constraint> constraint : processedFieldConstraints.entrySet()) {
fieldQuery.setConstraint(constraint.getKey(), constraint.getValue());
}
if (queryString.length() > 0) {
String qs = queryString.toString();
log.debug("QueryString: {}", qs);
if (MLT_QUERY_TYPE.equals(query.getRequestHandler())) {
query.set(CommonParams.FQ, qs);
} else {
query.setQuery(qs);
}
}
log.debug("Solr Query: {}", query);
return query;
}
use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.
the class SolrQueryFactory method initValueConstraint.
/**
* @param indexConstraint
* @param refConstraint
*/
private void initValueConstraint(IndexConstraint indexConstraint) {
ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
if (valueConstraint.getValues() == null) {
indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
} else {
// first process the parsed dataTypes to get the supported types
List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
List<String> acceptedDataTypes = new ArrayList<String>();
if (valueConstraint.getDataTypes() != null) {
for (String dataType : valueConstraint.getDataTypes()) {
IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
if (indexDataTypeEnumEntry != null) {
indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
acceptedDataTypes.add(dataType);
} else {
// TODO: Add possibility to add warnings to indexConstraints
log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
}
}
}
// we support only a single dataType ...
// ... therefore remove additional data types from the ValueConstraint
IndexDataType indexDataType = null;
if (!indexDataTypes.isEmpty()) {
indexDataType = indexDataTypes.get(0);
if (indexDataTypes.size() > 1) {
log.warn("Only a single DataType is supported for ValueConstraints!");
while (acceptedDataTypes.size() > 1) {
String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
log.warn(" > ignore parsed dataType {}", ignored);
}
}
}
// else empty we will initialise based on the first parsed value!
ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
// init the boost
addBoost(constraintValue, valueConstraint);
for (Object value : valueConstraint.getValues()) {
IndexValue indexValue;
if (indexDataType == null) {
// get the dataType based on the type of the value
try {
indexValue = indexValueFactory.createIndexValue(value);
} catch (NoConverterException e) {
// if not found use the toString() and string as type
log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
}
// initialise the IndexDataType for this query based on the first parsed value
indexDataType = indexValue.getType();
} else {
indexValue = new IndexValue(value.toString(), indexDataType);
}
// add the constraint
constraintValue.getValues().add(indexValue);
}
// indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
IndexField indexField;
if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
// NOTE: in case of TEXT we need also to add the language to create a valid
// query!
// * We take the language of the first parsed element
indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
} else {
indexField = new IndexField(indexConstraint.getPath(), indexDataType);
}
// set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
indexConstraint.setIndexFieldConstraints(indexField);
// set the VALUE
// TODO: We need to somehow pass the MODE so that the encoder knows how
// to encode the values
indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
// update this constraint!
if (valueConstraint instanceof ReferenceConstraint) {
indexConstraint.setFieldQueryConstraint(valueConstraint);
} else {
indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
}
}
}
use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.
the class SolrFieldMapper method parseIndexField.
/**
* This method does the dirty work of parsing the different parts of the field in the SolrDocument to the
* logical field as used by the semantic indexing API. This method assumes the following encoding
* <code><pre>
* . ... path separator
* _ ... special field indicator
* __ ... escaped special field
* ! ... merger - collected values of other fields.
* Such fields do not have an mapping to logical IndexFields.
* All mergers are created by copyField configurations within the
* Solr Schema configuration
* @ ... '@' indicates a field in a given language
* _@.<field>: A value for a field with no language defined
* _@en.<field>: A value for a field in English
* _!@.<field>: Contains all labels regardless of language
* _!@en.<field>: Contains all labels of languages that start with "en"
* <prefix> ... indicates an dataType that used this prefix
* _str.<field>: A string field (containing no language)
* _ref.<field>: A reference (similar to xsd:anyURI)
* _bool.<field>: A boolean value
*
* NOTE: Prefixes/Suffixes can be used to define a hierarchy of data types
* e.g. use Prefixes for dataTypes:
* _n ... any kind of numeric value
* _ni ... any kind of integer value (BigInteger)
* _nib ... a byte
* _nii ... a integer
* _nil ... a long
* _nd ... a decimal value
* _ndf ... float
* _ndd ... double
* _s ... any kind of string value
* _si ... an string based ID
* _sr ... a reference
* e.g. use Suffixes for semantic meanings
* ._ct ... a tag
* ._cr ... a category using a reference to an entity ID (xsd:anyURI)
* ._ci ... a categorisation using an local id (e.g 2 letter country codes)
*
* one can now create Solr copyField commands to support searches spanning
* over multiple types
* _!n ... search for any kind of numbers
* _!ni ... search for any kind of integers
* _!s ... search in all kind of string values
* _!sc ... search for all categories of this document
*
* </pre><code>
*
* @param prefix
* @param suffix
* @param pathElements
* @return
*/
private IndexField parseIndexField(final String prefix, final String suffix, final String[] pathElements) {
final String language;
boolean isLanguage = false;
final String dataTypePrefix;
// -> note that the prefix might also be used for the data type!
if (prefix != null && !prefix.isEmpty()) {
if (prefix.charAt(0) == SolrConst.LANG_INDICATOR) {
isLanguage = true;
// it is a language prefix!
// set dataTypePrefix to null
dataTypePrefix = null;
if (prefix.length() > 1) {
language = prefix.substring(1);
} else {
// it is a language prefix, but for the default language!
language = null;
}
} else {
// it is no language prefix
language = null;
isLanguage = false;
dataTypePrefix = prefix;
}
} else {
// no prefix at all
// set no-language and the dataType prefix to null;
isLanguage = false;
language = null;
dataTypePrefix = null;
}
// now parse the indexDataType!
IndexDataTypeEnum dataTypeEnumEntry = IndexDataTypeEnum.forPrefixSuffix(dataTypePrefix, suffix);
if (dataTypeEnumEntry == null) {
log.warn(String.format("No IndexDataType registered for prefix: %s and suffix: %s -> unable to process path %s", dataTypePrefix, suffix, Arrays.toString(pathElements)));
// we might also throw an exception at this point
return null;
}
// parse the path
List<String> path = new ArrayList<String>(pathElements.length);
for (String pathElement : pathElements) {
if (pathElement.charAt(0) == SolrConst.SPECIAL_FIELD_PREFIX) {
if (pathElement.charAt(1) == SolrConst.SPECIAL_FIELD_PREFIX) {
path.add(getFullFieldName(pathElement.substring(1)));
} else {
throw new IllegalStateException(String.format("Found special field \"%s\" within the path \"%s\" -> Special fields are only allowed as prefix and suffix!", pathElement, Arrays.toString(pathElements)));
}
} else {
String fullName = getFullFieldName(pathElement);
if (fullName == null) {
throw new IllegalStateException(String.format("Unable to map PathElement %s to it's full Name (path=%s)!", pathElement, Arrays.toString(pathElements)));
} else {
path.add(fullName);
}
}
}
if (isLanguage) {
return new IndexField(path, dataTypeEnumEntry.getIndexType(), language);
} else {
return new IndexField(path, dataTypeEnumEntry.getIndexType());
}
}
use of org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum in project stanbol by apache.
the class SolrYard method createRepresentation.
/**
* Creates the Representation for the parsed SolrDocument!
*
* @param doc
* The Solr Document to convert
* @param fields
* if NOT NULL only this fields are added to the Representation
* @return the Representation
*/
protected final Representation createRepresentation(SolrDocument doc, Set<String> fields) {
if (fieldMapper == null) {
throw new IllegalArgumentException("The parsed FieldMapper MUST NOT be NULL!");
}
if (doc == null) {
throw new IllegalArgumentException("The parsed SolrDocument MUST NOT be NULL!");
}
Object id = doc.getFirstValue(fieldMapper.getDocumentIdField());
if (id == null) {
throw new IllegalStateException(String.format("The parsed Solr Document does not contain a value for the %s Field!", fieldMapper.getDocumentIdField()));
}
Representation rep = getValueFactory().createRepresentation(id.toString());
for (String fieldName : doc.getFieldNames()) {
IndexField indexField = fieldMapper.getField(fieldName);
if (indexField != null && indexField.getPath().size() == 1) {
String lang = indexField.getLanguages().isEmpty() ? null : indexField.getLanguages().iterator().next();
if (fields == null || fields.contains(indexField.getPath().get(0))) {
for (Object value : doc.getFieldValues(fieldName)) {
if (value != null) {
IndexDataTypeEnum dataTypeEnumEntry = IndexDataTypeEnum.forIndexType(indexField.getDataType());
if (dataTypeEnumEntry != null) {
Object javaValue = indexValueFactory.createValue(dataTypeEnumEntry.getJavaType(), indexField.getDataType(), value, lang);
if (javaValue != null) {
rep.add(indexField.getPath().iterator().next(), javaValue);
} else {
log.warn(String.format("java value=null for index value %s", value));
}
} else {
log.warn(String.format("No DataType Configuration found for Index Data Type %s!", indexField.getDataType()));
}
}
// else index value == null -> ignore
}
// end for all values
}
} else {
if (indexField != null) {
log.warn(String.format("Unable to prozess Index Field %s (for IndexDocument Field: %s)", indexField, fieldName));
}
}
}
// end for all fields
return rep;
}
Aggregations