Search in sources :

Example 1 with NoConverterException

use of org.apache.stanbol.entityhub.yard.solr.model.NoConverterException in project stanbol by apache.

the class SolrQueryFactory method initValueConstraint.

/**
     * @param indexConstraint
     * @param refConstraint
     */
private void initValueConstraint(IndexConstraint indexConstraint) {
    ValueConstraint valueConstraint = (ValueConstraint) indexConstraint.getConstraint();
    if (valueConstraint.getValues() == null) {
        indexConstraint.setInvalid(String.format("ValueConstraint without a value - that check only any value for " + "the parsed datatypes %s is present - can not be supported by a Solr query!", valueConstraint.getDataTypes()));
    } else {
        // first process the parsed dataTypes to get the supported types
        List<IndexDataType> indexDataTypes = new ArrayList<IndexDataType>();
        List<String> acceptedDataTypes = new ArrayList<String>();
        if (valueConstraint.getDataTypes() != null) {
            for (String dataType : valueConstraint.getDataTypes()) {
                IndexDataTypeEnum indexDataTypeEnumEntry = IndexDataTypeEnum.forUri(dataType);
                if (indexDataTypeEnumEntry != null) {
                    indexDataTypes.add(indexDataTypeEnumEntry.getIndexType());
                    acceptedDataTypes.add(dataType);
                } else {
                    // TODO: Add possibility to add warnings to indexConstraints
                    log.warn("A Datatype parsed for a ValueConstraint is not " + "supported and will be ignored (dataTypeUri={})", dataType);
                }
            }
        }
        //we support only a single dataType ...
        //  ... therefore remove additional data types from the ValueConstraint
        IndexDataType indexDataType = null;
        if (!indexDataTypes.isEmpty()) {
            indexDataType = indexDataTypes.get(0);
            if (indexDataTypes.size() > 1) {
                log.warn("Only a single DataType is supported for ValueConstraints!");
                while (acceptedDataTypes.size() > 1) {
                    String ignored = acceptedDataTypes.remove(acceptedDataTypes.size() - 1);
                    log.warn("  > ignore parsed dataType {}", ignored);
                }
            }
        }
        //else empty we will initialise based on the first parsed value!
        ConstraintValue constraintValue = new ConstraintValue(valueConstraint.getMode());
        //init the boost
        addBoost(constraintValue, valueConstraint);
        for (Object value : valueConstraint.getValues()) {
            IndexValue indexValue;
            if (indexDataType == null) {
                // get the dataType based on the type of the value
                try {
                    indexValue = indexValueFactory.createIndexValue(value);
                } catch (NoConverterException e) {
                    // if not found use the toString() and string as type
                    log.warn("Unable to create IndexValue for value {} (type: {}). Create IndexValue manually by using the first parsed IndexDataType {}", new Object[] { value, value.getClass(), IndexDataTypeEnum.STR.getIndexType() });
                    indexValue = new IndexValue(value.toString(), IndexDataTypeEnum.STR.getIndexType());
                }
                //initialise the IndexDataType for this query based on the first parsed value
                indexDataType = indexValue.getType();
            } else {
                indexValue = new IndexValue(value.toString(), indexDataType);
            }
            //add the constraint
            constraintValue.getValues().add(indexValue);
        }
        //indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.DATATYPE, indexDataType);
        IndexField indexField;
        if (IndexDataTypeEnum.TXT.getIndexType().equals(indexDataType)) {
            //NOTE: in case of TEXT we need also to add the language to create a valid
            //query!
            // * We take the language of the first parsed element
            indexField = new IndexField(indexConstraint.getPath(), indexDataType, constraintValue.getValues().iterator().next().getLanguage());
        } else {
            indexField = new IndexField(indexConstraint.getPath(), indexDataType);
        }
        //set FIELD, DATATYPE and LANGUAGE constraint by using the indexField
        indexConstraint.setIndexFieldConstraints(indexField);
        //set the VALUE
        //TODO: We need to somehow pass the MODE so that the encoder knows how
        //      to encode the values
        indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.EQ, constraintValue);
        //update this constraint!
        if (valueConstraint instanceof ReferenceConstraint) {
            indexConstraint.setFieldQueryConstraint(valueConstraint);
        } else {
            indexConstraint.setFieldQueryConstraint(new ValueConstraint(valueConstraint.getValues(), Arrays.asList(indexDataType.getId())));
        }
    }
}
Also used : IndexDataType(org.apache.stanbol.entityhub.yard.solr.model.IndexDataType) ValueConstraint(org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint) ArrayList(java.util.ArrayList) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) ReferenceConstraint(org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint) NoConverterException(org.apache.stanbol.entityhub.yard.solr.model.NoConverterException) IndexDataTypeEnum(org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum) IndexField(org.apache.stanbol.entityhub.yard.solr.model.IndexField)

Example 2 with NoConverterException

use of org.apache.stanbol.entityhub.yard.solr.model.NoConverterException in project stanbol by apache.

the class SolrYard method createSolrInputDocument.

/**
     * Internally used to create Solr input documents for parsed representations.
     * <p>
     * This method supports boosting of fields. The boost is calculated by combining
     * <ol>
     * <li>the boot for the whole representation - by calling {@link #getDocumentBoost(Representation)}
     * <li>the boost of each field - by using the configured {@link #fieldBoostMap}
     * </ol>
     * 
     * @param representation
     *            the representation
     * @return the Solr document for indexing
     */
protected final SolrInputDocument createSolrInputDocument(Representation representation) {
    SolrYardConfig config = (SolrYardConfig) getConfig();
    SolrInputDocument inputDocument = new SolrInputDocument();
    // domain for all added documents!
    if (config.isMultiYardIndexLayout()) {
        inputDocument.addField(fieldMapper.getDocumentDomainField(), config.getId());
    }
    // else we need to do nothing
    inputDocument.addField(fieldMapper.getDocumentIdField(), representation.getId());
    // first process the document boost
    Float documentBoost = getDocumentBoost(representation);
    //      document boosts and are not multiplied with with document boosts
    if (documentBoost != null) {
        inputDocument.setDocumentBoost(documentBoost);
    }
    for (Iterator<String> fields = representation.getFieldNames(); fields.hasNext(); ) {
        // TODO: maybe add some functionality to prevent indexing of the
        // field configured as documentBoostFieldName!
        // But this would also prevent the possibility to intentionally
        // override the boost.
        String field = fields.next();
        /*
             * With STANBOL-1027 the calculation of the boost has changed to
             * consider multiple values for Representation#get(field).
             */
        //the boost without considering the number of values per solr field
        float baseBoost;
        Float fieldBoost = fieldBoostMap == null ? null : fieldBoostMap.get(field);
        //used to keep track of field we need boost
        final Map<String, int[]> fieldsToBoost;
        if (fieldBoost != null) {
            baseBoost = documentBoost != null ? fieldBoost * documentBoost : fieldBoost;
            fieldsToBoost = new HashMap<String, int[]>();
        } else {
            baseBoost = -1;
            fieldsToBoost = null;
        }
        //  does already exactly that (in an more efficient way)
        for (Iterator<Object> values = representation.get(field); values.hasNext(); ) {
            // now we need to get the indexField for the value
            Object next = values.next();
            IndexValue value;
            try {
                value = indexValueFactory.createIndexValue(next);
                for (String fieldName : fieldMapper.getFieldNames(Arrays.asList(field), value)) {
                    //In step (1) of boosting just keep track of the field
                    if (fieldBoost != null) {
                        //wee need to boost in (2)
                        int[] numValues = fieldsToBoost.get(fieldName);
                        if (numValues == null) {
                            numValues = new int[] { 1 };
                            fieldsToBoost.put(fieldName, numValues);
                            //the first time add the document with the baseBoost
                            //as this will be the correct boost for single value fields
                            inputDocument.addField(fieldName, value.getValue(), baseBoost);
                        } else {
                            numValues[0]++;
                            //for multi valued fields the correct boost is set in (2)
                            //so we can add here without an boost
                            inputDocument.addField(fieldName, value.getValue());
                        }
                    } else {
                        //add add the values without boost
                        inputDocument.addField(fieldName, value.getValue());
                    }
                }
            } catch (NoConverterException e) {
                log.warn(String.format("Unable to convert value %s (type:%s) for field %s!", next, next.getClass(), field), e);
            } catch (IllegalArgumentException e) {
                //usually because the Object is NULL or empty
                if (log.isDebugEnabled()) {
                    log.debug(String.format("Illegal Value %s (type:%s) for field %s!", next, next.getClass(), field), e);
                }
            } catch (RuntimeException e) {
                log.warn(String.format("Unable to process value %s (type:%s) for field %s!", next, next.getClass(), field), e);
            }
        }
        if (fieldBoost != null) {
            //we need still to do part (2) of setting the correct boost
            for (Entry<String, int[]> entry : fieldsToBoost.entrySet()) {
                if (entry.getValue()[0] > 1) {
                    //adapt the boost only for multi valued fields
                    SolrInputField solrField = inputDocument.getField(entry.getKey());
                    //the correct bosst is baseBoost (representing entity boost with field
                    //boost) multiplied with the sqrt(fieldValues). The 2nd part aims to
                    //compensate the Solr lengthNorm (1/sqrt(fieldTokens))
                    //see STANBOL-1027 for details
                    solrField.setBoost(baseBoost * (float) Math.sqrt(entry.getValue()[0]));
                }
            }
        }
    }
    return inputDocument;
}
Also used : SolrInputField(org.apache.solr.common.SolrInputField) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) NoConverterException(org.apache.stanbol.entityhub.yard.solr.model.NoConverterException) SolrInputDocument(org.apache.solr.common.SolrInputDocument)

Aggregations

IndexValue (org.apache.stanbol.entityhub.yard.solr.model.IndexValue)2 NoConverterException (org.apache.stanbol.entityhub.yard.solr.model.NoConverterException)2 ArrayList (java.util.ArrayList)1 SolrInputDocument (org.apache.solr.common.SolrInputDocument)1 SolrInputField (org.apache.solr.common.SolrInputField)1 ReferenceConstraint (org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint)1 ValueConstraint (org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint)1 IndexDataTypeEnum (org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum)1 IndexDataType (org.apache.stanbol.entityhub.yard.solr.model.IndexDataType)1 IndexField (org.apache.stanbol.entityhub.yard.solr.model.IndexField)1