use of org.apache.stanbol.entityhub.yard.solr.model.NoConverterException in project stanbol by apache.
the class SolrQueryFactory method initValueConstraint.
/**
* @param indexConstraint
* @param refConstraint
*/
private void initValueConstraint(IndexConstraint indexConstraint) {
ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
if (valueConstraint.getValues() == null) {
indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
} else {
// first process the parsed dataTypes to get the supported types
List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
List<String> acceptedDataTypes = new ArrayList<String>();
if (valueConstraint.getDataTypes() != null) {
for (String dataType : valueConstraint.getDataTypes()) {
IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
if (indexDataTypeEnumEntry != null) {
indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
acceptedDataTypes.add(dataType);
} else {
// TODO: Add possibility to add warnings to indexConstraints
log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
}
}
}
//we support only a single dataType ...
// ... therefore remove additional data types from the ValueConstraint
IndexDataType indexDataType = null;
if (!indexDataTypes.isEmpty()) {
indexDataType = indexDataTypes.get(0);
if (indexDataTypes.size() > 1) {
log.warn("Only a single DataType is supported for ValueConstraints!");
while (acceptedDataTypes.size() > 1) {
String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
log.warn(" > ignore parsed dataType {}", ignored);
}
}
}
//else empty we will initialise based on the first parsed value!
ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
//init the boost
addBoost(constraintValue, valueConstraint);
for (Object value : valueConstraint.getValues()) {
IndexValue indexValue;
if (indexDataType == null) {
// get the dataType based on the type of the value
try {
indexValue = indexValueFactory.createIndexValue(value);
} catch (NoConverterException e) {
// if not found use the toString() and string as type
log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
}
//initialise the IndexDataType for this query based on the first parsed value
indexDataType = indexValue.getType();
} else {
indexValue = new IndexValue(value.toString(), indexDataType);
}
//add the constraint
constraintValue.getValues().add(indexValue);
}
//indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
IndexField indexField;
if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
//NOTE: in case of TEXT we need also to add the language to create a valid
//query!
// * We take the language of the first parsed element
indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
} else {
indexField = new IndexField(indexConstraint.getPath(), indexDataType);
}
//set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
indexConstraint.setIndexFieldConstraints(indexField);
//set the VALUE
//TODO: We need to somehow pass the MODE so that the encoder knows how
// to encode the values
indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
//update this constraint!
if (valueConstraint instanceof ReferenceConstraint) {
indexConstraint.setFieldQueryConstraint(valueConstraint);
} else {
indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
}
}
}
use of org.apache.stanbol.entityhub.yard.solr.model.NoConverterException in project stanbol by apache.
the class SolrYard method createSolrInputDocument.
/**
* Internally used to create Solr input documents for parsed representations.
* <p>
* This method supports boosting of fields. The boost is calculated by combining
* <ol>
* <li>the boot for the whole representation - by calling {@link #getDocumentBoost(Representation)}
* <li>the boost of each field - by using the configured {@link #fieldBoostMap}
* </ol>
*
* @param representation
* the representation
* @return the Solr document for indexing
*/
protected final SolrInputDocument createSolrInputDocument(Representation representation) {
SolrYardConfig config = (SolrYardConfig) getConfig();
SolrInputDocument inputDocument = new SolrInputDocument();
// domain for all added documents!
if (config.isMultiYardIndexLayout()) {
inputDocument.addField(fieldMapper.getDocumentDomainField(), config.getId());
}
// else we need to do nothing
inputDocument.addField(fieldMapper.getDocumentIdField(), representation.getId());
// first process the document boost
Float documentBoost = getDocumentBoost(representation);
// document boosts and are not multiplied with with document boosts
if (documentBoost != null) {
inputDocument.setDocumentBoost(documentBoost);
}
for (Iterator<String> fields = representation.getFieldNames(); fields.hasNext(); ) {
// TODO: maybe add some functionality to prevent indexing of the
// field configured as documentBoostFieldName!
// But this would also prevent the possibility to intentionally
// override the boost.
String field = fields.next();
/*
* With STANBOL-1027 the calculation of the boost has changed to
* consider multiple values for Representation#get(field).
*/
//the boost without considering the number of values per solr field
float baseBoost;
Float fieldBoost = fieldBoostMap == null ? null : fieldBoostMap.get(field);
//used to keep track of field we need boost
final Map<String, int[]> fieldsToBoost;
if (fieldBoost != null) {
baseBoost = documentBoost != null ? fieldBoost * documentBoost : fieldBoost;
fieldsToBoost = new HashMap<String, int[]>();
} else {
baseBoost = -1;
fieldsToBoost = null;
}
// does already exactly that (in an more efficient way)
for (Iterator<Object> values = representation.get(field); values.hasNext(); ) {
// now we need to get the indexField for the value
Object next = values.next();
IndexValue value;
try {
value = indexValueFactory.createIndexValue(next);
for (String fieldName : fieldMapper.getFieldNames(Arrays.asList(field), value)) {
//In step (1) of boosting just keep track of the field
if (fieldBoost != null) {
//wee need to boost in (2)
int[] numValues = fieldsToBoost.get(fieldName);
if (numValues == null) {
numValues = new int[] { 1 };
fieldsToBoost.put(fieldName, numValues);
//the first time add the document with the baseBoost
//as this will be the correct boost for single value fields
inputDocument.addField(fieldName, value.getValue(), baseBoost);
} else {
numValues[0]++;
//for multi valued fields the correct boost is set in (2)
//so we can add here without an boost
inputDocument.addField(fieldName, value.getValue());
}
} else {
//add add the values without boost
inputDocument.addField(fieldName, value.getValue());
}
}
} catch (NoConverterException e) {
log.warn(String.format("Unable to convert value %s (type:%s) for field %s!", next, next.getClass(), field), e);
} catch (IllegalArgumentException e) {
//usually because the Object is NULL or empty
if (log.isDebugEnabled()) {
log.debug(String.format("Illegal Value %s (type:%s) for field %s!", next, next.getClass(), field), e);
}
} catch (RuntimeException e) {
log.warn(String.format("Unable to process value %s (type:%s) for field %s!", next, next.getClass(), field), e);
}
}
if (fieldBoost != null) {
//we need still to do part (2) of setting the correct boost
for (Entry<String, int[]> entry : fieldsToBoost.entrySet()) {
if (entry.getValue()[0] > 1) {
//adapt the boost only for multi valued fields
SolrInputField solrField = inputDocument.getField(entry.getKey());
//the correct bosst is baseBoost (representing entity boost with field
//boost) multiplied with the sqrt(fieldValues). The 2nd part aims to
//compensate the Solr lengthNorm (1/sqrt(fieldTokens))
//see STANBOL-1027 for details
solrField.setBoost(baseBoost * (float) Math.sqrt(entry.getValue()[0]));
}
}
}
}
return inputDocument;
}
Aggregations