Search in sources :

Example 46 with SolrInputField

use of org.apache.solr.common.SolrInputField in project lucene-solr by apache.

the class AllValuesOrNoneFieldMutatingUpdateProcessor method mutate.

protected final SolrInputField mutate(final SolrInputField srcField) {
    Collection<Object> vals = srcField.getValues();
    if (vals == null || vals.isEmpty())
        return srcField;
    List<String> messages = null;
    SolrInputField result = new SolrInputField(srcField.getName());
    for (final Object srcVal : vals) {
        final Object destVal = mutateValue(srcVal);
        if (SKIP_FIELD_VALUE_LIST_SINGLETON == destVal) {
            log.debug("field '{}' {} value '{}' is not mutable, so no values will be mutated", new Object[] { srcField.getName(), srcVal.getClass().getSimpleName(), srcVal });
            return srcField;
        }
        if (DELETE_VALUE_SINGLETON == destVal) {
            if (log.isDebugEnabled()) {
                if (null == messages) {
                    messages = new ArrayList<>();
                }
                messages.add(String.format(Locale.ROOT, "removing value from field '%s': %s '%s'", srcField.getName(), srcVal.getClass().getSimpleName(), srcVal));
            }
        } else {
            if (log.isDebugEnabled()) {
                if (null == messages) {
                    messages = new ArrayList<>();
                }
                messages.add(String.format(Locale.ROOT, "replace value from field '%s': %s '%s' with %s '%s'", srcField.getName(), srcVal.getClass().getSimpleName(), srcVal, destVal.getClass().getSimpleName(), destVal));
            }
            result.addValue(destVal);
        }
    }
    if (null != messages && log.isDebugEnabled()) {
        for (String message : messages) {
            log.debug(message);
        }
    }
    return 0 == result.getValueCount() ? null : result;
}
Also used : SolrInputField(org.apache.solr.common.SolrInputField)

Example 47 with SolrInputField

use of org.apache.solr.common.SolrInputField in project lucene-solr by apache.

the class AtomicUpdateDocumentMerger method doRemove.

protected void doRemove(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) {
    final String name = sif.getName();
    SolrInputField existingField = toDoc.get(name);
    if (existingField == null)
        return;
    SchemaField sf = schema.getField(name);
    if (sf != null) {
        final Collection<Object> original = existingField.getValues();
        if (fieldVal instanceof Collection) {
            for (Object object : (Collection) fieldVal) {
                Object o = sf.getType().toNativeType(object);
                original.remove(o);
            }
        } else {
            original.remove(sf.getType().toNativeType(fieldVal));
        }
        toDoc.setField(name, original);
    }
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) SolrInputField(org.apache.solr.common.SolrInputField) Collection(java.util.Collection)

Example 48 with SolrInputField

use of org.apache.solr.common.SolrInputField in project lucene-solr by apache.

the class CloneFieldUpdateProcessorFactory method getInstance.

@Override
public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
    final FieldNameSelector srcSelector = getSourceSelector();
    return new UpdateRequestProcessor(next) {

        @Override
        public void processAdd(AddUpdateCommand cmd) throws IOException {
            final SolrInputDocument doc = cmd.getSolrInputDocument();
            // destination may be regex replace string, which can cause multiple output fields.
            Map<String, SolrInputField> destMap = new HashMap<>();
            // preserve initial values and boost (if any)
            for (final String fname : doc.getFieldNames()) {
                if (!srcSelector.shouldMutate(fname))
                    continue;
                Collection<Object> srcFieldValues = doc.getFieldValues(fname);
                if (srcFieldValues == null || srcFieldValues.isEmpty())
                    continue;
                String resolvedDest = dest;
                if (pattern != null) {
                    Matcher matcher = pattern.matcher(fname);
                    if (matcher.find()) {
                        resolvedDest = matcher.replaceAll(dest);
                    } else {
                        log.debug("CloneFieldUpdateProcessor.srcSelector.shouldMutate(\"{}\") returned true, " + "but replacement pattern did not match, field skipped.", fname);
                        continue;
                    }
                }
                SolrInputField destField;
                if (doc.containsKey(resolvedDest)) {
                    destField = doc.getField(resolvedDest);
                } else {
                    SolrInputField targetField = destMap.get(resolvedDest);
                    if (targetField == null) {
                        destField = new SolrInputField(resolvedDest);
                    } else {
                        destField = targetField;
                    }
                }
                for (Object val : srcFieldValues) {
                    destField.addValue(val);
                }
                // put it in map to avoid concurrent modification...
                destMap.put(resolvedDest, destField);
            }
            for (String dest : destMap.keySet()) {
                doc.put(dest, destMap.get(dest));
            }
            super.processAdd(cmd);
        }
    };
}
Also used : FieldNameSelector(org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector) SolrInputDocument(org.apache.solr.common.SolrInputDocument) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) SolrInputField(org.apache.solr.common.SolrInputField) AddUpdateCommand(org.apache.solr.update.AddUpdateCommand)

Example 49 with SolrInputField

use of org.apache.solr.common.SolrInputField in project mycore by MyCoRe-Org.

the class MCRSolrFileIndexHandler method getSolrParams.

private ModifiableSolrParams getSolrParams(Path file, BasicFileAttributes attrs) throws IOException {
    ModifiableSolrParams params = new ModifiableSolrParams();
    SolrInputDocument doc = MCRSolrPathDocumentFactory.getInstance().getDocument(file, attrs);
    for (SolrInputField field : doc) {
        String name = "literal." + field.getName();
        if (field.getValueCount() > 1) {
            String[] values = getValues(field.getValues());
            params.set(name, values);
        } else {
            params.set(name, field.getValue().toString());
        }
    }
    return params;
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrInputField(org.apache.solr.common.SolrInputField) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams)

Example 50 with SolrInputField

use of org.apache.solr.common.SolrInputField in project stanbol by apache.

the class SolrYard method createSolrInputDocument.

/**
 * Internally used to create Solr input documents for parsed representations.
 * <p>
 * This method supports boosting of fields. The boost is calculated by combining
 * <ol>
 * <li>the boot for the whole representation - by calling {@link #getDocumentBoost(Representation)}
 * <li>the boost of each field - by using the configured {@link #fieldBoostMap}
 * </ol>
 *
 * @param representation
 *            the representation
 * @return the Solr document for indexing
 */
protected final SolrInputDocument createSolrInputDocument(Representation representation) {
    SolrYardConfig config = (SolrYardConfig) getConfig();
    SolrInputDocument inputDocument = new SolrInputDocument();
    // domain for all added documents!
    if (config.isMultiYardIndexLayout()) {
        inputDocument.addField(fieldMapper.getDocumentDomainField(), config.getId());
    }
    // else we need to do nothing
    inputDocument.addField(fieldMapper.getDocumentIdField(), representation.getId());
    // first process the document boost
    Float documentBoost = getDocumentBoost(representation);
    // document boosts and are not multiplied with with document boosts
    if (documentBoost != null) {
        inputDocument.setDocumentBoost(documentBoost);
    }
    for (Iterator<String> fields = representation.getFieldNames(); fields.hasNext(); ) {
        // TODO: maybe add some functionality to prevent indexing of the
        // field configured as documentBoostFieldName!
        // But this would also prevent the possibility to intentionally
        // override the boost.
        String field = fields.next();
        /*
             * With STANBOL-1027 the calculation of the boost has changed to
             * consider multiple values for Representation#get(field).
             */
        // the boost without considering the number of values per solr field
        float baseBoost;
        Float fieldBoost = fieldBoostMap == null ? null : fieldBoostMap.get(field);
        // used to keep track of field we need boost
        final Map<String, int[]> fieldsToBoost;
        if (fieldBoost != null) {
            baseBoost = documentBoost != null ? fieldBoost * documentBoost : fieldBoost;
            fieldsToBoost = new HashMap<String, int[]>();
        } else {
            baseBoost = -1;
            fieldsToBoost = null;
        }
        // does already exactly that (in an more efficient way)
        for (Iterator<Object> values = representation.get(field); values.hasNext(); ) {
            // now we need to get the indexField for the value
            Object next = values.next();
            IndexValue value;
            try {
                value = indexValueFactory.createIndexValue(next);
                for (String fieldName : fieldMapper.getFieldNames(Arrays.asList(field), value)) {
                    // In step (1) of boosting just keep track of the field
                    if (fieldBoost != null) {
                        // wee need to boost in (2)
                        int[] numValues = fieldsToBoost.get(fieldName);
                        if (numValues == null) {
                            numValues = new int[] { 1 };
                            fieldsToBoost.put(fieldName, numValues);
                            // the first time add the document with the baseBoost
                            // as this will be the correct boost for single value fields
                            inputDocument.addField(fieldName, value.getValue(), baseBoost);
                        } else {
                            numValues[0]++;
                            // for multi valued fields the correct boost is set in (2)
                            // so we can add here without an boost
                            inputDocument.addField(fieldName, value.getValue());
                        }
                    } else {
                        // add add the values without boost
                        inputDocument.addField(fieldName, value.getValue());
                    }
                }
            } catch (NoConverterException e) {
                log.warn(String.format("Unable to convert value %s (type:%s) for field %s!", next, next.getClass(), field), e);
            } catch (IllegalArgumentException e) {
                // usually because the Object is NULL or empty
                if (log.isDebugEnabled()) {
                    log.debug(String.format("Illegal Value %s (type:%s) for field %s!", next, next.getClass(), field), e);
                }
            } catch (RuntimeException e) {
                log.warn(String.format("Unable to process value %s (type:%s) for field %s!", next, next.getClass(), field), e);
            }
        }
        if (fieldBoost != null) {
            // we need still to do part (2) of setting the correct boost
            for (Entry<String, int[]> entry : fieldsToBoost.entrySet()) {
                if (entry.getValue()[0] > 1) {
                    // adapt the boost only for multi valued fields
                    SolrInputField solrField = inputDocument.getField(entry.getKey());
                    // the correct bosst is baseBoost (representing entity boost with field
                    // boost) multiplied with the sqrt(fieldValues). The 2nd part aims to
                    // compensate the Solr lengthNorm (1/sqrt(fieldTokens))
                    // see STANBOL-1027 for details
                    solrField.setBoost(baseBoost * (float) Math.sqrt(entry.getValue()[0]));
                }
            }
        }
    }
    return inputDocument;
}
Also used : SolrInputField(org.apache.solr.common.SolrInputField) IndexValue(org.apache.stanbol.entityhub.yard.solr.model.IndexValue) NoConverterException(org.apache.stanbol.entityhub.yard.solr.model.NoConverterException) SolrInputDocument(org.apache.solr.common.SolrInputDocument)

Aggregations

SolrInputField (org.apache.solr.common.SolrInputField)50 SolrInputDocument (org.apache.solr.common.SolrInputDocument)29 AddUpdateCommand (org.apache.solr.update.AddUpdateCommand)14 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)12 ContentStreamBase (org.apache.solr.common.util.ContentStreamBase)11 JsonLoader (org.apache.solr.handler.loader.JsonLoader)11 SolrQueryResponse (org.apache.solr.response.SolrQueryResponse)11 BufferingRequestProcessor (org.apache.solr.update.processor.BufferingRequestProcessor)11 Test (org.junit.Test)8 SolrException (org.apache.solr.common.SolrException)7 SchemaField (org.apache.solr.schema.SchemaField)7 Collection (java.util.Collection)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 HashSet (java.util.HashSet)3 Matcher (java.util.regex.Matcher)3 NamedList (org.apache.solr.common.util.NamedList)3 IndexSchema (org.apache.solr.schema.IndexSchema)3 DeleteUpdateCommand (org.apache.solr.update.DeleteUpdateCommand)3 IOException (java.io.IOException)2