Search in sources :

Example 6 with IndexValue

use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.

the class SolrYard method createSolrInputDocument.

/**
     * Internally used to create Solr input documents for parsed representations.
     * <p>
     * This method supports boosting of fields. The boost is calculated by combining
     * <ol>
     * <li>the boot for the whole representation - by calling {@link #getDocumentBoost(Representation)}
     * <li>the boost of each field - by using the configured {@link #fieldBoostMap}
     * </ol>
     * 
     * @param representation
     *            the representation
     * @return the Solr document for indexing
     */
protected final SolrInputDocument createSolrInputDocument(Representation representation) {
    SolrYardConfig config = (SolrYardConfig) getConfig();
    SolrInputDocument inputDocument = new SolrInputDocument();
    // domain for all added documents!
    if (config.isMultiYardIndexLayout()) {
        inputDocument.addField(fieldMapper.getDocumentDomainField(), config.getId());
    }
    // else we need to do nothing
    inputDocument.addField(fieldMapper.getDocumentIdField(), representation.getId());
    // first process the document boost
    Float documentBoost = getDocumentBoost(representation);
    //      document boosts and are not multiplied with with document boosts
    if (documentBoost != null) {
        inputDocument.setDocumentBoost(documentBoost);
    }
    for (Iterator<String> fields = representation.getFieldNames(); fields.hasNext(); ) {
        // TODO: maybe add some functionality to prevent indexing of the
        // field configured as documentBoostFieldName!
        // But this would also prevent the possibility to intentionally
        // override the boost.
        String field = fields.next();
        /*
             * With STANBOL-1027 the calculation of the boost has changed to
             * consider multiple values for Representation#get(field).
             */
        //the boost without considering the number of values per solr field
        float baseBoost;
        Float fieldBoost = fieldBoostMap == null ? null : fieldBoostMap.get(field);
        //used to keep track of field we need boost
        final Map<String, int[]> fieldsToBoost;
        if (fieldBoost != null) {
            baseBoost = documentBoost != null ? fieldBoost * documentBoost : fieldBoost;
            fieldsToBoost = new HashMap<String, int[]>();
        } else {
            baseBoost = -1;
            fieldsToBoost = null;
        }
        //  does already exactly that (in an more efficient way)
        for (Iterator<Object> values = representation.get(field); values.hasNext(); ) {
            // now we need to get the indexField for the value
            Object next = values.next();
            IndexValue value;
            try {
                value = indexValueFactory.createIndexValue(next);
                for (String fieldName : fieldMapper.getFieldNames(Arrays.asList(field), value)) {
                    //In step (1) of boosting just keep track of the field
                    if (fieldBoost != null) {
                        //wee need to boost in (2)
                        int[] numValues = fieldsToBoost.get(fieldName);
                        if (numValues == null) {
                            numValues = new int[] { 1 };
                            fieldsToBoost.put(fieldName, numValues);
                            //the first time add the document with the baseBoost
                            //as this will be the correct boost for single value fields
                            inputDocument.addField(fieldName, value.getValue(), baseBoost);
                        } else {
                            numValues[0]++;
                            //for multi valued fields the correct boost is set in (2)
                            //so we can add here without an boost
                            inputDocument.addField(fieldName, value.getValue());
                        }
                    } else {
                        //add add the values without boost
                        inputDocument.addField(fieldName, value.getValue());
                    }
                }
            } catch (NoConverterException e) {
                log.warn(String.format("Unable to convert value %s (type:%s) for field %s!", next, next.getClass(), field), e);
            } catch (IllegalArgumentException e) {
                //usually because the Object is NULL or empty
                if (log.isDebugEnabled()) {
                    log.debug(String.format("Illegal Value %s (type:%s) for field %s!", next, next.getClass(), field), e);
                }
            } catch (RuntimeException e) {
                log.warn(String.format("Unable to process value %s (type:%s) for field %s!", next, next.getClass(), field), e);
            }
        }
        if (fieldBoost != null) {
            //we need still to do part (2) of setting the correct boost
            for (Entry<String, int[]> entry : fieldsToBoost.entrySet()) {
                if (entry.getValue()[0] > 1) {
                    //adapt the boost only for multi valued fields
                    SolrInputField solrField = inputDocument.getField(entry.getKey());
                    //the correct bosst is baseBoost (representing entity boost with field
                    //boost) multiplied with the sqrt(fieldValues). The 2nd part aims to
                    //compensate the Solr lengthNorm (1/sqrt(fieldTokens))
                    //see STANBOL-1027 for details
                    solrField.setBoost(baseBoost * (float) Math.sqrt(entry.getValue()[0]));
                }
            }
        }
    }
    return inputDocument;
}
Also used : SolrInputField(org.apache.solr.common.SolrInputField) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) NoConverterException(org.apache.stanbol.entityhub.yard.solr.model.NoConverterException) SolrInputDocument(org.apache.solr.common.SolrInputDocument)

Example 7 with IndexValue

use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.

the class AssignmentEncoder method encode.

@Override
public void encode(EncodedConstraintParts constraint, ConstraintValue value) {
    if (value == null) {
        //if no value is parsed
        // add the default
        constraint.addEncoded(POS, EQ);
        //and return
        return;
    }
    //else encode the values and add them depending on the MODE
    Set<String> queryConstraints = new HashSet<String>();
    Collection<String> phraseTerms = new ArrayList<String>();
    for (IndexValue indexValue : value) {
        QueryTerm[] qts = QueryUtils.encodeQueryValue(indexValue, true);
        if (qts != null) {
            for (QueryTerm qt : qts) {
                StringBuilder sb = new StringBuilder(qt.term.length() + (qt.needsQuotes ? 3 : 1));
                sb.append(EQ);
                if (qt.needsQuotes) {
                    sb.append('"').append(qt.term).append('"');
                } else {
                    sb.append(qt.term);
                }
                if (value.getBoost() != null) {
                    sb.append("^").append(value.getBoost());
                }
                queryConstraints.add(sb.toString());
                if (!qt.hasWildcard && qt.isText) {
                    phraseTerms.add(qt.term);
                }
            }
        } else {
            queryConstraints.add(EQ);
        }
        if (value.getMode() == MODE.any) {
            //in any mode we need to add values separately
            constraint.addEncoded(POS, queryConstraints);
            //addEncoded copies the added values so we can clear and reuse
            queryConstraints.clear();
        }
    }
    if (value.getMode() == MODE.all) {
        //in all mode we need to add all values in a single call
        constraint.addEncoded(POS, queryConstraints);
    //NOTE also that for ALL mode Phrase queries do not make sense, as
    //     they would weaken the selection criteria
    } else {
        if (phraseTerms.size() > 1) {
            Boolean state = (Boolean) value.getProperty(QueryConst.PHRASE_QUERY_STATE);
            if (state != null && state.booleanValue()) {
                StringBuilder sb = encodePhraseQuery(phraseTerms);
                sb.insert(0, EQ);
                if (value.getBoost() != null) {
                    sb.append("^").append(value.getBoost());
                }
                constraint.addEncoded(POS, sb.toString());
            }
        //phrase query deactivated
        }
    //else for less than two terms we can not build a phrase query
    }
}
Also used : ArrayList(java.util.ArrayList) QueryTerm(org.apache.stanbol.entityhub.yard.solr.query.QueryUtils.QueryTerm) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) HashSet(java.util.HashSet)

Example 8 with IndexValue

use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.

the class GtEncoder method encode.

@Override
public void encode(EncodedConstraintParts constraint, Object value) {
    IndexValue indexValue;
    if (value == null) {
        // default value
        indexValue = null;
    } else if (value instanceof IndexValue) {
        indexValue = (IndexValue) value;
    } else if (value instanceof ConstraintValue) {
        ConstraintValue cv = (ConstraintValue) value;
        indexValue = cv.getValues() == null || cv.getValues().isEmpty() ? null : cv.getValues().iterator().next();
    } else {
        indexValue = indexValueFactory.createIndexValue(value);
    }
    String geConstraint = String.format("{%s ", indexValue != null && indexValue.getValue() != null && !indexValue.getValue().isEmpty() ? indexValue.getValue() : DEFAULT);
    constraint.addEncoded(POS, geConstraint);
}
Also used : IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) ConstraintValue(org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue)

Example 9 with IndexValue

use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.

the class GeEncoder method encode.

@Override
public void encode(EncodedConstraintParts constraint, Object value) {
    IndexValue indexValue;
    if (value == null) {
        // default value
        indexValue = null;
    } else if (value instanceof IndexValue) {
        indexValue = (IndexValue) value;
    } else if (value instanceof ConstraintValue) {
        ConstraintValue cv = (ConstraintValue) value;
        indexValue = cv.getValues() == null || cv.getValues().isEmpty() ? null : cv.getValues().iterator().next();
    } else {
        indexValue = indexValueFactory.createIndexValue(value);
    }
    String geConstraint = String.format("[%s ", indexValue != null && indexValue.getValue() != null && !indexValue.getValue().isEmpty() ? indexValue.getValue() : DEFAULT);
    constraint.addEncoded(POS, geConstraint);
}
Also used : IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) ConstraintValue(org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue)

Example 10 with IndexValue

use of org.apache.stanbol.entityhub.yard.solr.model.IndexValue in project stanbol by apache.

the class LtEncoder method encode.

@Override
public void encode(EncodedConstraintParts constraint, Object value) {
    Double boost = null;
    IndexValue indexValue;
    if (value == null) {
        // default value
        indexValue = null;
    } else if (value instanceof IndexValue) {
        indexValue = (IndexValue) value;
    } else if (value instanceof ConstraintValue) {
        ConstraintValue cv = (ConstraintValue) value;
        indexValue = cv.getValues() == null || cv.getValues().isEmpty() ? null : cv.getValues().iterator().next();
        boost = cv.getBoost();
    } else {
        indexValue = indexValueFactory.createIndexValue(value);
    }
    StringBuilder ltConstraint = new StringBuilder("TO ");
    if (indexValue != null && indexValue.getValue() != null && !indexValue.getValue().isEmpty()) {
        ltConstraint.append(indexValue.getValue());
    } else {
        ltConstraint.append(DEFAULT);
    }
    ltConstraint.append('}');
    if (boost != null) {
        ltConstraint.append("^").append(boost);
    }
    constraint.addEncoded(POS, ltConstraint.toString());
}
Also used : IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) ConstraintValue(org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue)

Aggregations

IndexValue (org.apache.stanbol.entityhub.yard.solr.model.IndexValue)11 ArrayList (java.util.ArrayList)4 ConstraintValue (org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue)4 HashSet (java.util.HashSet)3 IndexField (org.apache.stanbol.entityhub.yard.solr.model.IndexField)3 RangeConstraint (org.apache.stanbol.entityhub.servicesapi.query.RangeConstraint)2 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)2 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)2 IndexDataTypeEnum (org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum)2 IndexDataType (org.apache.stanbol.entityhub.yard.solr.model.IndexDataType)2 NoConverterException (org.apache.stanbol.entityhub.yard.solr.model.NoConverterException)2 QueryTerm (org.apache.stanbol.entityhub.yard.solr.query.QueryUtils.QueryTerm)2 HashMap (java.util.HashMap)1 SolrQuery (org.apache.solr.client.solrj.SolrQuery)1 SolrInputDocument (org.apache.solr.common.SolrInputDocument)1 SolrInputField (org.apache.solr.common.SolrInputField)1 Constraint (org.apache.stanbol.entityhub.servicesapi.query.Constraint)1 SimilarityConstraint (org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint)1 TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)1