use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.
the class SolrYard method createSolrInputDocument.
/**
* Internally used to create Solr input documents for parsed representations.
* <p>
* This method supports boosting of fields. The boost is calculated by combining
* <ol>
* <li>the boot for the whole representation - by calling {@link #getDocumentBoost(Representation)}
* <li>the boost of each field - by using the configured {@link #fieldBoostMap}
* </ol>
*
* @param representation
* the representation
* @return the Solr document for indexing
*/
protected final SolrInputDocument createSolrInputDocument(Representation representation) {
SolrYardConfig config = (SolrYardConfig) getConfig();
SolrInputDocument inputDocument = new SolrInputDocument();
// domain for all added documents!
if (config.isMultiYardIndexLayout()) {
inputDocument.addField(fieldMapper.getDocumentDomainField(), config.getId());
}
// else we need to do nothing
inputDocument.addField(fieldMapper.getDocumentIdField(), representation.getId());
// first process the document boost
Float documentBoost = getDocumentBoost(representation);
// document boosts and are not multiplied with with document boosts
if (documentBoost != null) {
inputDocument.setDocumentBoost(documentBoost);
}
for (Iterator<String> fields = representation.getFieldNames(); fields.hasNext(); ) {
// TODO: maybe add some functionality to prevent indexing of the
// field configured as documentBoostFieldName!
// But this would also prevent the possibility to intentionally
// override the boost.
String field = fields.next();
/*
* With STANBOL-1027 the calculation of the boost has changed to
* consider multiple values for Representation#get(field).
*/
//the boost without considering the number of values per solr field
float baseBoost;
Float fieldBoost = fieldBoostMap == null ? null : fieldBoostMap.get(field);
//used to keep track of field we need boost
final Map<String, int[]> fieldsToBoost;
if (fieldBoost != null) {
baseBoost = documentBoost != null ? fieldBoost * documentBoost : fieldBoost;
fieldsToBoost = new HashMap<String, int[]>();
} else {
baseBoost = -1;
fieldsToBoost = null;
}
// does already exactly that (in an more efficient way)
for (Iterator<Object> values = representation.get(field); values.hasNext(); ) {
// now we need to get the indexField for the value
Object next = values.next();
IndexValue value;
try {
value = indexValueFactory.createIndexValue(next);
for (String fieldName : fieldMapper.getFieldNames(Arrays.asList(field), value)) {
//In step (1) of boosting just keep track of the field
if (fieldBoost != null) {
//wee need to boost in (2)
int[] numValues = fieldsToBoost.get(fieldName);
if (numValues == null) {
numValues = new int[] { 1 };
fieldsToBoost.put(fieldName, numValues);
//the first time add the document with the baseBoost
//as this will be the correct boost for single value fields
inputDocument.addField(fieldName, value.getValue(), baseBoost);
} else {
numValues[0]++;
//for multi valued fields the correct boost is set in (2)
//so we can add here without an boost
inputDocument.addField(fieldName, value.getValue());
}
} else {
//add add the values without boost
inputDocument.addField(fieldName, value.getValue());
}
}
} catch (NoConverterException e) {
log.warn(String.format("Unable to convert value %s (type:%s) for field %s!", next, next.getClass(), field), e);
} catch (IllegalArgumentException e) {
//usually because the Object is NULL or empty
if (log.isDebugEnabled()) {
log.debug(String.format("Illegal Value %s (type:%s) for field %s!", next, next.getClass(), field), e);
}
} catch (RuntimeException e) {
log.warn(String.format("Unable to process value %s (type:%s) for field %s!", next, next.getClass(), field), e);
}
}
if (fieldBoost != null) {
//we need still to do part (2) of setting the correct boost
for (Entry<String, int[]> entry : fieldsToBoost.entrySet()) {
if (entry.getValue()[0] > 1) {
//adapt the boost only for multi valued fields
SolrInputField solrField = inputDocument.getField(entry.getKey());
//the correct bosst is baseBoost (representing entity boost with field
//boost) multiplied with the sqrt(fieldValues). The 2nd part aims to
//compensate the Solr lengthNorm (1/sqrt(fieldTokens))
//see STANBOL-1027 for details
solrField.setBoost(baseBoost * (float) Math.sqrt(entry.getValue()[0]));
}
}
}
}
return inputDocument;
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.
the class AssignmentEncoder method encode.
@Override
public void encode(EncodedConstraintParts constraint, ConstraintValue value) {
if (value == null) {
//if no value is parsed
// add the default
constraint.addEncoded(POS, EQ);
//and return
return;
}
//else encode the values and add them depending on the MODE
Set<String> queryConstraints = new HashSet<String>();
Collection<String> phraseTerms = new ArrayList<String>();
for (IndexValue indexValue : value) {
QueryTerm[] qts = QueryUtils.encodeQueryValue(indexValue, true);
if (qts != null) {
for (QueryTerm qt : qts) {
StringBuilder sb = new StringBuilder(qt.term.length() + (qt.needsQuotes ? 3 : 1));
sb.append(EQ);
if (qt.needsQuotes) {
sb.append('"').append(qt.term).append('"');
} else {
sb.append(qt.term);
}
if (value.getBoost() != null) {
sb.append("^").append(value.getBoost());
}
queryConstraints.add(sb.toString());
if (!qt.hasWildcard && qt.isText) {
phraseTerms.add(qt.term);
}
}
} else {
queryConstraints.add(EQ);
}
if (value.getMode() == MODE.any) {
//in any mode we need to add values separately
constraint.addEncoded(POS, queryConstraints);
//addEncoded copies the added values so we can clear and reuse
queryConstraints.clear();
}
}
if (value.getMode() == MODE.all) {
//in all mode we need to add all values in a single call
constraint.addEncoded(POS, queryConstraints);
//NOTE also that for ALL mode Phrase queries do not make sense, as
// they would weaken the selection criteria
} else {
if (phraseTerms.size() > 1) {
Boolean state = (Boolean) value.getProperty(QueryConst.PHRASE_QUERY_STATE);
if (state != null && state.booleanValue()) {
StringBuilder sb = encodePhraseQuery(phraseTerms);
sb.insert(0, EQ);
if (value.getBoost() != null) {
sb.append("^").append(value.getBoost());
}
constraint.addEncoded(POS, sb.toString());
}
//phrase query deactivated
}
//else for less than two terms we can not build a phrase query
}
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.
the class GtEncoder method encode.
@Override
public void encode(EncodedConstraintParts constraint, Object value) {
IndexValue indexValue;
if (value == null) {
// default value
indexValue = null;
} else if (value instanceof IndexValue) {
indexValue = (IndexValue) value;
} else if (value instanceof ConstraintValue) {
ConstraintValue cv = (ConstraintValue) value;
indexValue = cv.getValues() == null || cv.getValues().isEmpty() ? null : cv.getValues().iterator().next();
} else {
indexValue = indexValueFactory.createIndexValue(value);
}
String geConstraint = String.format("{%s ", indexValue != null && indexValue.getValue() != null && !indexValue.getValue().isEmpty() ? indexValue.getValue() : DEFAULT);
constraint.addEncoded(POS, geConstraint);
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.
the class GeEncoder method encode.
@Override
public void encode(EncodedConstraintParts constraint, Object value) {
IndexValue indexValue;
if (value == null) {
// default value
indexValue = null;
} else if (value instanceof IndexValue) {
indexValue = (IndexValue) value;
} else if (value instanceof ConstraintValue) {
ConstraintValue cv = (ConstraintValue) value;
indexValue = cv.getValues() == null || cv.getValues().isEmpty() ? null : cv.getValues().iterator().next();
} else {
indexValue = indexValueFactory.createIndexValue(value);
}
String geConstraint = String.format("[%s ", indexValue != null && indexValue.getValue() != null && !indexValue.getValue().isEmpty() ? indexValue.getValue() : DEFAULT);
constraint.addEncoded(POS, geConstraint);
}
use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.
the class LtEncoder method encode.
@Override
public void encode(EncodedConstraintParts constraint, Object value) {
Double boost = null;
IndexValue indexValue;
if (value == null) {
// default value
indexValue = null;
} else if (value instanceof IndexValue) {
indexValue = (IndexValue) value;
} else if (value instanceof ConstraintValue) {
ConstraintValue cv = (ConstraintValue) value;
indexValue = cv.getValues() == null || cv.getValues().isEmpty() ? null : cv.getValues().iterator().next();
boost = cv.getBoost();
} else {
indexValue = indexValueFactory.createIndexValue(value);
}
StringBuilder ltConstraint = new StringBuilder("TO ");
if (indexValue != null && indexValue.getValue() != null && !indexValue.getValue().isEmpty()) {
ltConstraint.append(indexValue.getValue());
} else {
ltConstraint.append(DEFAULT);
}
ltConstraint.append('}');
if (boost != null) {
ltConstraint.append("^").append(boost);
}
constraint.addEncoded(POS, ltConstraint.toString());
}
Aggregations