Search in sources :

Example 11 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class GroupedEndResultTransformer method transform.

/**
   * {@inheritDoc}
   */
@Override
public void transform(Map<String, ?> result, ResponseBuilder rb, SolrDocumentSource solrDocumentSource) {
    NamedList<Object> commands = new SimpleOrderedMap<>();
    for (Map.Entry<String, ?> entry : result.entrySet()) {
        Object value = entry.getValue();
        if (TopGroups.class.isInstance(value)) {
            @SuppressWarnings("unchecked") TopGroups<BytesRef> topGroups = (TopGroups<BytesRef>) value;
            NamedList<Object> command = new SimpleOrderedMap<>();
            command.add("matches", rb.totalHitCount);
            Integer totalGroupCount = rb.mergedGroupCounts.get(entry.getKey());
            if (totalGroupCount != null) {
                command.add("ngroups", totalGroupCount);
            }
            List<NamedList> groups = new ArrayList<>();
            SchemaField groupField = searcher.getSchema().getField(entry.getKey());
            FieldType groupFieldType = groupField.getType();
            for (GroupDocs<BytesRef> group : topGroups.groups) {
                SimpleOrderedMap<Object> groupResult = new SimpleOrderedMap<>();
                if (group.groupValue != null) {
                    groupResult.add("groupValue", groupFieldType.toObject(groupField.createField(group.groupValue.utf8ToString())));
                } else {
                    groupResult.add("groupValue", null);
                }
                SolrDocumentList docList = new SolrDocumentList();
                docList.setNumFound(group.totalHits);
                if (!Float.isNaN(group.maxScore)) {
                    docList.setMaxScore(group.maxScore);
                }
                docList.setStart(rb.getGroupingSpec().getWithinGroupOffset());
                for (ScoreDoc scoreDoc : group.scoreDocs) {
                    docList.add(solrDocumentSource.retrieve(scoreDoc));
                }
                groupResult.add("doclist", docList);
                groups.add(groupResult);
            }
            command.add("groups", groups);
            commands.add(entry.getKey(), command);
        } else if (QueryCommandResult.class.isInstance(value)) {
            QueryCommandResult queryCommandResult = (QueryCommandResult) value;
            NamedList<Object> command = new SimpleOrderedMap<>();
            command.add("matches", queryCommandResult.getMatches());
            SolrDocumentList docList = new SolrDocumentList();
            docList.setNumFound(queryCommandResult.getTopDocs().totalHits);
            if (!Float.isNaN(queryCommandResult.getTopDocs().getMaxScore())) {
                docList.setMaxScore(queryCommandResult.getTopDocs().getMaxScore());
            }
            docList.setStart(rb.getGroupingSpec().getWithinGroupOffset());
            for (ScoreDoc scoreDoc : queryCommandResult.getTopDocs().scoreDocs) {
                docList.add(solrDocumentSource.retrieve(scoreDoc));
            }
            command.add("doclist", docList);
            commands.add(entry.getKey(), command);
        }
    }
    rb.rsp.add("grouped", commands);
}
Also used : NamedList(org.apache.solr.common.util.NamedList) ArrayList(java.util.ArrayList) QueryCommandResult(org.apache.solr.search.grouping.distributed.command.QueryCommandResult) SolrDocumentList(org.apache.solr.common.SolrDocumentList) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) FieldType(org.apache.solr.schema.FieldType) ScoreDoc(org.apache.lucene.search.ScoreDoc) SchemaField(org.apache.solr.schema.SchemaField) TopGroups(org.apache.lucene.search.grouping.TopGroups) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) BytesRef(org.apache.lucene.util.BytesRef)

Example 12 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class CloudMLTQParser method parse.

public Query parse() {
    String id = localParams.get(QueryParsing.V);
    // Do a Real Time Get for the document
    SolrDocument doc = getDocument(id);
    if (doc == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
    }
    String[] qf = localParams.getParams("qf");
    Map<String, Float> boostFields = new HashMap<>();
    MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
    mlt.setMinDocFreq(localParams.getInt("mindf", 0));
    mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
    mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
    mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
    mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
    mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
    Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
    mlt.setBoost(boost);
    mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    String[] fieldNames;
    if (qf != null) {
        ArrayList<String> fields = new ArrayList();
        for (String fieldName : qf) {
            if (!StringUtils.isEmpty(fieldName)) {
                String[] strings = splitList.split(fieldName);
                for (String string : strings) {
                    if (!StringUtils.isEmpty(string)) {
                        fields.add(string);
                    }
                }
            }
        }
        // Parse field names and boosts from the fields
        boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
        fieldNames = boostFields.keySet().toArray(new String[0]);
    } else {
        ArrayList<String> fields = new ArrayList();
        for (String field : doc.getFieldNames()) {
            // Only use fields that are stored and have an explicit analyzer.
            // This makes sense as the query uses tf/idf/.. for query construction.
            // We might want to relook and change this in the future though.
            SchemaField f = req.getSchema().getFieldOrNull(field);
            if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
                fields.add(field);
            }
        }
        fieldNames = fields.toArray(new String[0]);
    }
    if (fieldNames.length < 1) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
    }
    mlt.setFieldNames(fieldNames);
    for (String field : fieldNames) {
        Collection<Object> fieldValues = doc.getFieldValues(field);
        if (fieldValues != null) {
            Collection<Object> values = new ArrayList();
            for (Object val : fieldValues) {
                if (val instanceof IndexableField) {
                    values.add(((IndexableField) val).stringValue());
                } else {
                    values.add(val);
                }
            }
            filteredDocument.put(field, values);
        }
    }
    try {
        Query rawMLTQuery = mlt.like(filteredDocument);
        BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
        if (boost && boostFields.size() > 0) {
            BooleanQuery.Builder newQ = new BooleanQuery.Builder();
            newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
            for (BooleanClause clause : boostedMLTQuery) {
                Query q = clause.getQuery();
                float originalBoost = 1f;
                if (q instanceof BoostQuery) {
                    BoostQuery bq = (BoostQuery) q;
                    q = bq.getQuery();
                    originalBoost = bq.getBoost();
                }
                Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
                q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
                newQ.add(q, clause.getOccur());
            }
            boostedMLTQuery = newQ.build();
        }
        // exclude current document from results
        BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
        realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
        realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
        return realMLTQuery.build();
    } catch (IOException e) {
        e.printStackTrace();
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) BoostQuery(org.apache.lucene.search.BoostQuery) SolrDocument(org.apache.solr.common.SolrDocument) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) BooleanClause(org.apache.lucene.search.BooleanClause) Collection(java.util.Collection)

Example 13 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class SimpleMLTQParser method parse.

public Query parse() {
    String defaultField = req.getSchema().getUniqueKeyField().getName();
    String uniqueValue = localParams.get(QueryParsing.V);
    String[] qf = localParams.getParams("qf");
    SolrIndexSearcher searcher = req.getSearcher();
    Query docIdQuery = createIdQuery(defaultField, uniqueValue);
    Map<String, Float> boostFields = new HashMap<>();
    try {
        TopDocs td = searcher.search(docIdQuery, 1);
        if (td.totalHits != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]");
        ScoreDoc[] scoreDocs = td.scoreDocs;
        MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
        mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
        mlt.setMinDocFreq(localParams.getInt("mindf", MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
        mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
        mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
        mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
        mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
        mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
        Boolean boost = localParams.getBool("boost", false);
        mlt.setBoost(boost);
        String[] fieldNames;
        if (qf != null) {
            ArrayList<String> fields = new ArrayList<>();
            for (String fieldName : qf) {
                if (!StringUtils.isEmpty(fieldName)) {
                    String[] strings = splitList.split(fieldName);
                    for (String string : strings) {
                        if (!StringUtils.isEmpty(string)) {
                            fields.add(string);
                        }
                    }
                }
            }
            // Parse field names and boosts from the fields
            boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
            fieldNames = boostFields.keySet().toArray(new String[0]);
        } else {
            Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
            ArrayList<String> fields = new ArrayList();
            for (String fieldName : fieldDefinitions.keySet()) {
                if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
                    if (fieldDefinitions.get(fieldName).getType().getNumberType() == null)
                        fields.add(fieldName);
            }
            fieldNames = fields.toArray(new String[0]);
        }
        if (fieldNames.length < 1) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
        }
        mlt.setFieldNames(fieldNames);
        mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
        Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
        BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
        if (boost && boostFields.size() > 0) {
            BooleanQuery.Builder newQ = new BooleanQuery.Builder();
            newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
            for (BooleanClause clause : boostedMLTQuery) {
                Query q = clause.getQuery();
                float originalBoost = 1f;
                if (q instanceof BoostQuery) {
                    BoostQuery bq = (BoostQuery) q;
                    q = bq.getQuery();
                    originalBoost = bq.getBoost();
                }
                Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
                q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
                newQ.add(q, clause.getOccur());
            }
            boostedMLTQuery = newQ.build();
        }
        // exclude current document from results
        BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
        realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
        realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
        return realMLTQuery.build();
    } catch (IOException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage());
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) BoostQuery(org.apache.lucene.search.BoostQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause)

Example 14 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class ChildFieldValueSourceParser method parse.

@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
    final String sortFieldName = fp.parseArg();
    final Query query;
    if (fp.hasMoreArguments()) {
        query = fp.parseNestedQuery();
    } else {
        query = fp.subQuery(fp.getParam(CommonParams.Q), BlockJoinParentQParserPlugin.NAME).getQuery();
    }
    BitSetProducer parentFilter;
    BitSetProducer childFilter;
    SchemaField sf;
    try {
        AllParentsAware bjQ;
        if (!(query instanceof AllParentsAware)) {
            throw new SyntaxError("expect a reference to block join query " + AllParentsAware.class.getSimpleName() + " in " + fp.getString());
        }
        bjQ = (AllParentsAware) query;
        parentFilter = BlockJoinParentQParser.getCachedFilter(fp.getReq(), bjQ.getParentQuery()).filter;
        childFilter = BlockJoinParentQParser.getCachedFilter(fp.getReq(), bjQ.getChildQuery()).filter;
        if (sortFieldName == null || sortFieldName.equals("")) {
            throw new SyntaxError("field is omitted in " + fp.getString());
        }
        sf = fp.getReq().getSchema().getFieldOrNull(sortFieldName);
        if (null == sf) {
            throw new SyntaxError(NAME + " sort param field \"" + sortFieldName + "\" can't be found in schema");
        }
    } catch (SyntaxError e) {
        log.error("can't parse " + fp.getString(), e);
        throw e;
    }
    return new BlockJoinSortFieldValueSource(childFilter, parentFilter, sf);
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) Query(org.apache.lucene.search.Query) QueryBitSetProducer(org.apache.lucene.search.join.QueryBitSetProducer) BitSetProducer(org.apache.lucene.search.join.BitSetProducer) SyntaxError(org.apache.solr.search.SyntaxError) AllParentsAware(org.apache.solr.search.join.BlockJoinParentQParser.AllParentsAware)

Example 15 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class CarrotClusteringEngine method init.

@Override
@SuppressWarnings("rawtypes")
public String init(NamedList config, final SolrCore core) {
    this.core = core;
    String result = super.init(config, core);
    final SolrParams initParams = SolrParams.toSolrParams(config);
    // Initialization attributes for Carrot2 controller.
    HashMap<String, Object> initAttributes = new HashMap<>();
    // Customize Carrot2's resource lookup to first look for resources
    // using Solr's resource loader. If that fails, try loading from the classpath.
    ResourceLookup resourceLookup = new ResourceLookup(// Solr-specific resource loading.
    new SolrResourceLocator(core, initParams), // Using the class loader directly because this time we want to omit the prefix
    new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(resourceLookup);
    // Make sure the requested Carrot2 clustering algorithm class is available
    String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
    try {
        this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
    } catch (SolrException s) {
        if (!(s.getCause() instanceof ClassNotFoundException)) {
            throw s;
        }
    }
    // Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
    // of this component. This by-name convention lookup is used to simplify configuring algorithms.
    String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
    log.info("Initializing Clustering Engine '" + MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
    if (!Strings.isNullOrEmpty(componentName)) {
        IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
        if (attributeXmls.length > 0) {
            if (attributeXmls.length > 1) {
                log.warn("More than one attribute file found, first one will be used: " + Arrays.toString(attributeXmls));
            }
            Thread ct = Thread.currentThread();
            ClassLoader prev = ct.getContextClassLoader();
            try {
                ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
                AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
                AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
                initAttributes.putAll(defaultSet.getAttributeValues());
            } catch (Exception e) {
                throw new SolrException(ErrorCode.SERVER_ERROR, "Could not read attributes XML for clustering component: " + componentName, e);
            } finally {
                ct.setContextClassLoader(prev);
            }
        }
    }
    // Extract solrconfig attributes, they take precedence.
    extractCarrotAttributes(initParams, initAttributes);
    // Customize the stemmer and tokenizer factories. The implementations we provide here
    // are included in the code base of Solr, so that it's possible to refactor
    // the Lucene APIs the factories rely on if needed.
    // Additionally, we set a custom lexical resource factory for Carrot2 that
    // will use both Carrot2 default stop words as well as stop words from
    // the StopFilter defined on the field.
    final AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes);
    attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
    if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.TOKENIZER_FACTORY)) {
        attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class);
    }
    if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.STEMMER_FACTORY)) {
        attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class);
    }
    // Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
    initAttributes.put("solrCore", core);
    // Carrot2 uses current thread's context class loader to get
    // certain classes (e.g. custom tokenizer/stemmer) at initialization time.
    // To make sure classes from contrib JARs are available,
    // we swap the context class loader for the time of clustering.
    Thread ct = Thread.currentThread();
    ClassLoader prev = ct.getContextClassLoader();
    try {
        ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
        this.controller.init(initAttributes);
    } finally {
        ct.setContextClassLoader(prev);
    }
    SchemaField uniqueField = core.getLatestSchema().getUniqueKeyField();
    if (uniqueField == null) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotClusteringEngine.class.getSimpleName() + " requires the schema to have a uniqueKeyField");
    }
    this.idFieldName = uniqueField.getName();
    return result;
}
Also used : AttributeBuilder(org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder) HashMap(java.util.HashMap) AttributeValueSets(org.carrot2.util.attribute.AttributeValueSets) SolrException(org.apache.solr.common.SolrException) IOException(java.io.IOException) ResourceLookup(org.carrot2.util.resource.ResourceLookup) AttributeValueSet(org.carrot2.util.attribute.AttributeValueSet) SchemaField(org.apache.solr.schema.SchemaField) ClassLoaderLocator(org.carrot2.util.resource.ClassLoaderLocator) IClusteringAlgorithm(org.carrot2.core.IClusteringAlgorithm) SolrParams(org.apache.solr.common.params.SolrParams) SolrException(org.apache.solr.common.SolrException) IResource(org.carrot2.util.resource.IResource)

Aggregations

SchemaField (org.apache.solr.schema.SchemaField)182 SolrException (org.apache.solr.common.SolrException)48 ArrayList (java.util.ArrayList)42 FieldType (org.apache.solr.schema.FieldType)41 IndexSchema (org.apache.solr.schema.IndexSchema)35 NamedList (org.apache.solr.common.util.NamedList)29 Query (org.apache.lucene.search.Query)23 IOException (java.io.IOException)22 BytesRef (org.apache.lucene.util.BytesRef)21 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)21 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)21 Document (org.apache.lucene.document.Document)20 SolrParams (org.apache.solr.common.params.SolrParams)19 IndexableField (org.apache.lucene.index.IndexableField)18 HashMap (java.util.HashMap)17 SolrInputDocument (org.apache.solr.common.SolrInputDocument)16 SolrDocument (org.apache.solr.common.SolrDocument)15 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)15 Map (java.util.Map)14 Term (org.apache.lucene.index.Term)14